Created
July 13, 2019 06:17
-
-
Save ff6347/199b55dc87f527a7b01dbeb742cb608f to your computer and use it in GitHub Desktop.
Baby Parse Edited for Extendscript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Baby Parse | |
| v0.4.1 | |
| https://github.com/Rich-Harris/BabyParse | |
| Created by Rich Harris | |
| Maintained by Matt Holt | |
| Based on Papa Parse v4.0.7 by Matt Holt | |
| https://github.com/mholt/PapaParse | |
| Minor changs to make it work with extendscript | |
| */ | |
| // (function(global) | |
| // { | |
| // A configuration object from which to draw default settings | |
| var DEFAULTS = { | |
| delimiter: "", // empty: auto-detect | |
| newline: "", // empty: auto-detect | |
| header: false, | |
| dynamicTyping: false, | |
| preview: 0, | |
| step: undefined, | |
| comments: false, | |
| complete: undefined, | |
| skipEmptyLines: false, | |
| fastMode: false | |
| }; | |
| var Baby = {}; | |
| Baby.parse = CsvToJson; | |
| Baby.unparse = JsonToCsv; | |
| Baby.RECORD_SEP = String.fromCharCode(30); | |
| Baby.UNIT_SEP = String.fromCharCode(31); | |
| Baby.BYTE_ORDER_MARK = "\ufeff"; | |
| Baby.BAD_DELIMITERS = ["\r", "\n", "\"", Baby.BYTE_ORDER_MARK]; | |
| Baby.DefaultDelimiter = ","; // Used if not specified and detection fails | |
| Baby.Parser = Parser; // For testing/dev only | |
| Baby.ParserHandle = ParserHandle; // For testing/dev only | |
| function CsvToJson(_input, _config) | |
| { | |
| var config = copyAndValidateConfig(_config); | |
| var ph = new ParserHandle(config); | |
| var results = ph.parse(_input); | |
| if (isFunction(config.complete)) | |
| config.complete(results); | |
| return results; | |
| } | |
| function JsonToCsv(_input, _config) | |
| { | |
| var _output = ""; | |
| var _fields = []; | |
| // Default configuration | |
| var _quotes = false; // whether to surround every datum with quotes | |
| var _delimiter = ","; // delimiting character | |
| var _newline = "\r\n"; // newline character(s) | |
| unpackConfig(); | |
| if (typeof _input === 'string') | |
| _input = JSON.parse(_input); | |
| if (_input instanceof Array) | |
| { | |
| if (!_input.length || _input[0] instanceof Array) | |
| return serialize(null, _input); | |
| else if (typeof _input[0] === 'object') | |
| return serialize(objectKeys(_input[0]), _input); | |
| } | |
| else if (typeof _input === 'object') | |
| { | |
| if (typeof _input.data === 'string') | |
| _input.data = JSON.parse(_input.data); | |
| if (_input.data instanceof Array) | |
| { | |
| if (!_input.fields) | |
| _input.fields = _input.data[0] instanceof Array | |
| ? _input.fields | |
| : objectKeys(_input.data[0]); | |
| if (!(_input.data[0] instanceof Array) && typeof _input.data[0] !== 'object') | |
| _input.data = [_input.data]; // handles input like [1,2,3] or ["asdf"] | |
| } | |
| return serialize(_input.fields || [], _input.data || []); | |
| } | |
| // Default (any valid paths should return before this) | |
| throw "exception: Unable to serialize unrecognized input"; | |
| function unpackConfig() | |
| { | |
| if (typeof _config !== 'object') | |
| return; | |
| if (typeof _config.delimiter === 'string' | |
| && _config.delimiter.length == 1 | |
| && Baby.BAD_DELIMITERS.indexOf(_config.delimiter) == -1) | |
| { | |
| _delimiter = _config.delimiter; | |
| } | |
| if (typeof _config.quotes === 'boolean' | |
| || _config.quotes instanceof Array) | |
| _quotes = _config.quotes; | |
| if (typeof _config.newline === 'string') | |
| _newline = _config.newline; | |
| } | |
| // Turns an object's keys into an array | |
| function objectKeys(obj) | |
| { | |
| if (typeof obj !== 'object') | |
| return []; | |
| var keys = []; | |
| for (var key in obj) | |
| keys.push(key); | |
| return keys; | |
| } | |
| // The double for loop that iterates the data and writes out a CSV string including header row | |
| function serialize(fields, data) | |
| { | |
| var csv = ""; | |
| if (typeof fields === 'string') | |
| fields = JSON.parse(fields); | |
| if (typeof data === 'string') | |
| data = JSON.parse(data); | |
| var hasHeader = fields instanceof Array && fields.length > 0; | |
| var dataKeyedByField = !(data[0] instanceof Array); | |
| // If there a header row, write it first | |
| if (hasHeader) | |
| { | |
| for (var i = 0; i < fields.length; i++) | |
| { | |
| if (i > 0) | |
| csv += _delimiter; | |
| csv += safe(fields[i], i); | |
| } | |
| if (data.length > 0) | |
| csv += _newline; | |
| } | |
| // Then write out the data | |
| for (var row = 0; row < data.length; row++) | |
| { | |
| var maxCol = hasHeader ? fields.length : data[row].length; | |
| for (var col = 0; col < maxCol; col++) | |
| { | |
| if (col > 0) | |
| csv += _delimiter; | |
| var colIdx = hasHeader && dataKeyedByField ? fields[col] : col; | |
| csv += safe(data[row][colIdx], col); | |
| } | |
| if (row < data.length - 1) | |
| csv += _newline; | |
| } | |
| return csv; | |
| } | |
| // Encloses a value around quotes if needed (makes a value safe for CSV insertion) | |
| function safe(str, col) | |
| { | |
| if (typeof str === "undefined" || str === null) | |
| return ""; | |
| str = str.toString().replace(/"/g, '""'); | |
| var needsQuotes = (typeof _quotes === 'boolean' && _quotes) | |
| || (_quotes instanceof Array && _quotes[col]) | |
| || hasAny(str, Baby.BAD_DELIMITERS) | |
| || str.indexOf(_delimiter) > -1 | |
| || str.charAt(0) == ' ' | |
| || str.charAt(str.length - 1) == ' '; | |
| return needsQuotes ? '"' + str + '"' : str; | |
| } | |
| function hasAny(str, substrings) | |
| { | |
| for (var i = 0; i < substrings.length; i++) | |
| if (str.indexOf(substrings[i]) > -1) | |
| return true; | |
| return false; | |
| } | |
| } | |
| // Use one ParserHandle per entire CSV file or string | |
| function ParserHandle(_config) | |
| { | |
| // One goal is to minimize the use of regular expressions... | |
| var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i; | |
| var self = this; | |
| var _stepCounter = 0; // Number of times step was called (number of rows parsed) | |
| var _input; // The input being parsed | |
| var _parser; // The core parser being used | |
| var _paused = false; // Whether we are paused or not | |
| var _delimiterError; // Temporary state between delimiter detection and processing results | |
| var _fields = []; // Fields are from the header row of the input, if there is one | |
| var _results = { // The last results returned from the parser | |
| data: [], | |
| errors: [], | |
| meta: {} | |
| }; | |
| if (isFunction(_config.step)) | |
| { | |
| var userStep = _config.step; | |
| _config.step = function(results) | |
| { | |
| _results = results; | |
| if (needsHeaderRow()) | |
| processResults(); | |
| else // only call user's step function after header row | |
| { | |
| processResults(); | |
| // It's possbile that this line was empty and there's no row here after all | |
| if (_results.data.length == 0) | |
| return; | |
| _stepCounter += results.data.length; | |
| if (_config.preview && _stepCounter > _config.preview) | |
| _parser.abort(); | |
| else | |
| userStep(_results, self); | |
| } | |
| }; | |
| } | |
| this.parse = function(input) | |
| { | |
| if (!_config.newline) | |
| _config.newline = guessLineEndings(input); | |
| _delimiterError = false; | |
| if (!_config.delimiter) | |
| { | |
| var delimGuess = guessDelimiter(input); | |
| if (delimGuess.successful) | |
| _config.delimiter = delimGuess.bestDelimiter; | |
| else | |
| { | |
| _delimiterError = true; // add error after parsing (otherwise it would be overwritten) | |
| _config.delimiter = Baby.DefaultDelimiter; | |
| } | |
| _results.meta.delimiter = _config.delimiter; | |
| } | |
| var parserConfig = copy(_config); | |
| if (_config.preview && _config.header) | |
| parserConfig.preview++; // to compensate for header row | |
| _input = input; | |
| _parser = new Parser(parserConfig); | |
| _results = _parser.parse(_input); | |
| processResults(); | |
| if (isFunction(_config.complete) && !_paused && (!self.streamer || self.streamer.finished())) | |
| _config.complete(_results); | |
| return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } }); | |
| }; | |
| this.pause = function() | |
| { | |
| _paused = true; | |
| _parser.abort(); | |
| _input = _input.substr(_parser.getCharIndex()); | |
| }; | |
| this.resume = function() | |
| { | |
| _paused = false; | |
| _parser = new Parser(_config); | |
| _parser.parse(_input); | |
| if (!_paused) | |
| { | |
| if (self.streamer && !self.streamer.finished()) | |
| self.streamer.resume(); // more of the file yet to come | |
| else if (isFunction(_config.complete)) | |
| _config.complete(_results); | |
| } | |
| }; | |
| this.abort = function() | |
| { | |
| _parser.abort(); | |
| if (isFunction(_config.complete)) | |
| _config.complete(_results); | |
| _input = ""; | |
| }; | |
| function processResults() | |
| { | |
| if (_results && _delimiterError) | |
| { | |
| addError("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to '"+Baby.DefaultDelimiter+"'"); | |
| _delimiterError = false; | |
| } | |
| if (_config.skipEmptyLines) | |
| { | |
| for (var i = 0; i < _results.data.length; i++) | |
| if (_results.data[i].length == 1 && _results.data[i][0] == "") | |
| _results.data.splice(i--, 1); | |
| } | |
| if (needsHeaderRow()) | |
| fillHeaderFields(); | |
| return applyHeaderAndDynamicTyping(); | |
| } | |
| function needsHeaderRow() | |
| { | |
| return _config.header && _fields.length == 0; | |
| } | |
| function fillHeaderFields() | |
| { | |
| if (!_results) | |
| return; | |
| for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) | |
| for (var j = 0; j < _results.data[i].length; j++) | |
| _fields.push(_results.data[i][j]); | |
| _results.data.splice(0, 1); | |
| } | |
| function applyHeaderAndDynamicTyping() | |
| { | |
| if (!_results || (!_config.header && !_config.dynamicTyping)) | |
| return _results; | |
| for (var i = 0; i < _results.data.length; i++) | |
| { | |
| var row = {}; | |
| for (var j = 0; j < _results.data[i].length; j++) | |
| { | |
| if (_config.dynamicTyping) | |
| { | |
| var value = _results.data[i][j]; | |
| if (value == "true") | |
| _results.data[i][j] = true; | |
| else if (value == "false") | |
| _results.data[i][j] = false; | |
| else | |
| _results.data[i][j] = tryParseFloat(value); | |
| } | |
| if (_config.header) | |
| { | |
| if (j >= _fields.length) | |
| { | |
| if (!row["__parsed_extra"]) | |
| row["__parsed_extra"] = []; | |
| row["__parsed_extra"].push(_results.data[i][j]); | |
| } | |
| else | |
| row[_fields[j]] = _results.data[i][j]; | |
| } | |
| } | |
| if (_config.header) | |
| { | |
| _results.data[i] = row; | |
| if (j > _fields.length) | |
| addError("FieldMismatch", "TooManyFields", "Too many fields: expected " + _fields.length + " fields but parsed " + j, i); | |
| else if (j < _fields.length) | |
| addError("FieldMismatch", "TooFewFields", "Too few fields: expected " + _fields.length + " fields but parsed " + j, i); | |
| } | |
| } | |
| if (_config.header && _results.meta) | |
| _results.meta.fields = _fields; | |
| return _results; | |
| } | |
| function guessDelimiter(input) | |
| { | |
| var delimChoices = [",", "\t", "|", ";", Baby.RECORD_SEP, Baby.UNIT_SEP]; | |
| var bestDelim, bestDelta, fieldCountPrevRow; | |
| for (var i = 0; i < delimChoices.length; i++) | |
| { | |
| var delim = delimChoices[i]; | |
| var delta = 0, avgFieldCount = 0; | |
| fieldCountPrevRow = undefined; | |
| var preview = new Parser({ | |
| delimiter: delim, | |
| preview: 10 | |
| }).parse(input); | |
| for (var j = 0; j < preview.data.length; j++) | |
| { | |
| var fieldCount = preview.data[j].length; | |
| avgFieldCount += fieldCount; | |
| if (typeof fieldCountPrevRow === 'undefined') | |
| { | |
| fieldCountPrevRow = fieldCount; | |
| continue; | |
| } | |
| else if (fieldCount > 1) | |
| { | |
| delta += Math.abs(fieldCount - fieldCountPrevRow); | |
| fieldCountPrevRow = fieldCount; | |
| } | |
| } | |
| avgFieldCount /= preview.data.length; | |
| if ((typeof bestDelta === 'undefined' || delta < bestDelta) | |
| && avgFieldCount > 1.99) | |
| { | |
| bestDelta = delta; | |
| bestDelim = delim; | |
| } | |
| } | |
| _config.delimiter = bestDelim; | |
| return { | |
| successful: !!bestDelim, | |
| bestDelimiter: bestDelim | |
| } | |
| } | |
| function guessLineEndings(input) | |
| { | |
| input = input.substr(0, 1024*1024); // max length 1 MB | |
| var r = input.split('\r'); | |
| if (r.length == 1) | |
| return '\n'; | |
| var numWithN = 0; | |
| for (var i = 0; i < r.length; i++) | |
| { | |
| if (r[i][0] == '\n') | |
| numWithN++; | |
| } | |
| return numWithN >= r.length / 2 ? '\r\n' : '\r'; | |
| } | |
| function tryParseFloat(val) | |
| { | |
| var isNumber = FLOAT.test(val); | |
| return isNumber ? parseFloat(val) : val; | |
| } | |
| function addError(type, code, msg, row) | |
| { | |
| _results.errors.push({ | |
| type: type, | |
| code: code, | |
| message: msg, | |
| row: row | |
| }); | |
| } | |
| } | |
| // The core parser implements speedy and correct CSV parsing | |
| function Parser(config) | |
| { | |
| // Unpack the config object | |
| config = config || {}; | |
| var delim = config.delimiter; | |
| var newline = config.newline; | |
| var comments = config.comments; | |
| var step = config.step; | |
| var preview = config.preview; | |
| var fastMode = config.fastMode; | |
| // Delimiter must be valid | |
| if (typeof delim !== 'string' | |
| || delim.length != 1 | |
| || Baby.BAD_DELIMITERS.indexOf(delim) > -1) | |
| delim = ","; | |
| // Comment character must be valid | |
| if (comments === delim) | |
| throw "Comment character same as delimiter"; | |
| else if (comments === true) | |
| comments = "#"; | |
| else if (typeof comments !== 'string' | |
| || Baby.BAD_DELIMITERS.indexOf(comments) > -1) | |
| comments = false; | |
| // Newline must be valid: \r, \n, or \r\n | |
| if (newline != '\n' && newline != '\r' && newline != '\r\n') | |
| newline = '\n'; | |
| // We're gonna need these at the Parser scope | |
| var cursor = 0; | |
| var aborted = false; | |
| this.parse = function(input) | |
| { | |
| // For some reason, in Chrome, this speeds things up (!?) | |
| if (typeof input !== 'string') | |
| throw "Input must be a string"; | |
| // We don't need to compute some of these every time parse() is called, | |
| // but having them in a more local scope seems to perform better | |
| var inputLen = input.length, | |
| delimLen = delim.length, | |
| newlineLen = newline.length, | |
| commentsLen = comments.length; | |
| var stepIsFunction = typeof step === 'function'; | |
| // Establish starting state | |
| cursor = 0; | |
| var data = [], errors = [], row = []; | |
| if (!input) | |
| return returnable(); | |
| if (fastMode) | |
| { | |
| // Fast mode assumes there are no quoted fields in the input | |
| var rows = input.split(newline); | |
| for (var i = 0; i < rows.length; i++) | |
| { | |
| if (comments && rows[i].substr(0, commentsLen) == comments) | |
| continue; | |
| if (stepIsFunction) | |
| { | |
| data = [ rows[i].split(delim) ]; | |
| doStep(); | |
| if (aborted) | |
| return returnable(); | |
| } | |
| else | |
| data.push(rows[i].split(delim)); | |
| if (preview && i >= preview) | |
| { | |
| data = data.slice(0, preview); | |
| return returnable(true); | |
| } | |
| } | |
| return returnable(); | |
| } | |
| var nextDelim = input.indexOf(delim, cursor); | |
| var nextNewline = input.indexOf(newline, cursor); | |
| // Parser loop | |
| for (;;) | |
| { | |
| // Field has opening quote | |
| if (input[cursor] == '"') | |
| { | |
| // Start our search for the closing quote where the cursor is | |
| var quoteSearch = cursor; | |
| // Skip the opening quote | |
| cursor++; | |
| for (;;) | |
| { | |
| // Find closing quote | |
| var quoteSearch = input.indexOf('"', quoteSearch+1); | |
| if (quoteSearch === -1) | |
| { | |
| // No closing quote... what a pity | |
| errors.push({ | |
| type: "Quotes", | |
| code: "MissingQuotes", | |
| message: "Quoted field unterminated", | |
| row: data.length, // row has yet to be inserted | |
| index: cursor | |
| }); | |
| return finish(); | |
| } | |
| if (quoteSearch === inputLen-1) | |
| { | |
| // Closing quote at EOF | |
| row.push(input.substring(cursor, quoteSearch).replace(/""/g, '"')); | |
| data.push(row); | |
| if (stepIsFunction) | |
| doStep(); | |
| return returnable(); | |
| } | |
| // If this quote is escaped, it's part of the data; skip it | |
| if (input[quoteSearch+1] == '"') | |
| { | |
| quoteSearch++; | |
| continue; | |
| } | |
| if (input[quoteSearch+1] == delim) | |
| { | |
| // Closing quote followed by delimiter | |
| row.push(input.substring(cursor, quoteSearch).replace(/""/g, '"')); | |
| cursor = quoteSearch + 1 + delimLen; | |
| nextDelim = input.indexOf(delim, cursor); | |
| nextNewline = input.indexOf(newline, cursor); | |
| break; | |
| } | |
| if (input.substr(quoteSearch+1, newlineLen) === newline) | |
| { | |
| // Closing quote followed by newline | |
| row.push(input.substring(cursor, quoteSearch).replace(/""/g, '"')); | |
| saveRow(quoteSearch + 1 + newlineLen); | |
| nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field | |
| if (stepIsFunction) | |
| { | |
| doStep(); | |
| if (aborted) | |
| return returnable(); | |
| } | |
| if (preview && data.length >= preview) | |
| return returnable(true); | |
| break; | |
| } | |
| } | |
| continue; | |
| } | |
| // Comment found at start of new line | |
| if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) | |
| { | |
| if (nextNewline == -1) // Comment ends at EOF | |
| return returnable(); | |
| cursor = nextNewline + newlineLen; | |
| nextNewline = input.indexOf(newline, cursor); | |
| nextDelim = input.indexOf(delim, cursor); | |
| continue; | |
| } | |
| // Next delimiter comes before next newline, so we've reached end of field | |
| if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) | |
| { | |
| row.push(input.substring(cursor, nextDelim)); | |
| cursor = nextDelim + delimLen; | |
| nextDelim = input.indexOf(delim, cursor); | |
| continue; | |
| } | |
| // End of row | |
| if (nextNewline !== -1) | |
| { | |
| row.push(input.substring(cursor, nextNewline)); | |
| saveRow(nextNewline + newlineLen); | |
| if (stepIsFunction) | |
| { | |
| doStep(); | |
| if (aborted) | |
| return returnable(); | |
| } | |
| if (preview && data.length >= preview) | |
| return returnable(true); | |
| continue; | |
| } | |
| break; | |
| } | |
| return finish(); | |
| // Appends the remaining input from cursor to the end into | |
| // row, saves the row, calls step, and returns the results. | |
| function finish() | |
| { | |
| row.push(input.substr(cursor)); | |
| data.push(row); | |
| cursor = inputLen; // important in case parsing is paused | |
| if (stepIsFunction) | |
| doStep(); | |
| return returnable(); | |
| } | |
| // Appends the current row to the results. It sets the cursor | |
| // to newCursor and finds the nextNewline. The caller should | |
| // take care to execute user's step function and check for | |
| // preview and end parsing if necessary. | |
| function saveRow(newCursor) | |
| { | |
| data.push(row); | |
| row = []; | |
| cursor = newCursor; | |
| nextNewline = input.indexOf(newline, cursor); | |
| } | |
| // Returns an object with the results, errors, and meta. | |
| function returnable(stopped) | |
| { | |
| return { | |
| data: data, | |
| errors: errors, | |
| meta: { | |
| delimiter: delim, | |
| linebreak: newline, | |
| aborted: aborted, | |
| truncated: !!stopped | |
| } | |
| }; | |
| } | |
| // Executes the user's step function and resets data & errors. | |
| function doStep() | |
| { | |
| step(returnable()); | |
| data = [], errors = []; | |
| } | |
| }; | |
| // Sets the abort flag | |
| this.abort = function() | |
| { | |
| aborted = true; | |
| }; | |
| // Gets the cursor position | |
| this.getCharIndex = function() | |
| { | |
| return cursor; | |
| }; | |
| } | |
| // Replaces bad config values with good, default ones | |
| function copyAndValidateConfig(origConfig) | |
| { | |
| if (typeof origConfig !== 'object') | |
| origConfig = {}; | |
| var config = copy(origConfig); | |
| if (typeof config.delimiter !== 'string' | |
| || config.delimiter.length != 1 | |
| || Baby.BAD_DELIMITERS.indexOf(config.delimiter) > -1) | |
| config.delimiter = DEFAULTS.delimiter; | |
| if (config.newline != '\n' | |
| && config.newline != '\r' | |
| && config.newline != '\r\n') | |
| config.newline = DEFAULTS.newline; | |
| if (typeof config.header !== 'boolean') | |
| config.header = DEFAULTS.header; | |
| if (typeof config.dynamicTyping !== 'boolean') | |
| config.dynamicTyping = DEFAULTS.dynamicTyping; | |
| if (typeof config.preview !== 'number') | |
| config.preview = DEFAULTS.preview; | |
| if (typeof config.step !== 'function') | |
| config.step = DEFAULTS.step; | |
| if (typeof config.complete !== 'function') | |
| config.complete = DEFAULTS.complete; | |
| if (typeof config.skipEmptyLines !== 'boolean') | |
| config.skipEmptyLines = DEFAULTS.skipEmptyLines; | |
| if (typeof config.fastMode !== 'boolean') | |
| config.fastMode = DEFAULTS.fastMode; | |
| return config; | |
| } | |
| function copy(obj) | |
| { | |
| if (typeof obj !== 'object') | |
| return obj; | |
| var cpy = obj instanceof Array ? [] : {}; | |
| for (var key in obj) | |
| cpy[key] = copy(obj[key]); | |
| return cpy; | |
| } | |
| function isFunction(func) | |
| { | |
| return typeof func === 'function'; | |
| } | |
| // export to Node... | |
| if ( typeof module !== 'undefined' && module.exports ) { | |
| module.exports = Baby; | |
| } | |
| // ...or as AMD module... | |
| else if ( typeof define === 'function' && define.amd ) { | |
| define( function () { return Baby; }); | |
| } | |
| // ...or as browser global | |
| // else { | |
| // global.Baby = Baby; | |
| // } | |
| // })(typeof window !== 'undefined' ? window : this); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment