Last active
November 19, 2024 05:14
-
-
Save westc/15caf03f6c945564a126 to your computer and use it in GitHub Desktop.
Simple CSV Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Parses a string as if it was the contents of a CSV file. | |
| * @param {string} strCSV | |
| * @param {?parseCSV__Options=} options | |
| * @return {*[][]} | |
| * An array of arrays where each value comes from `strCSV`. | |
| */ | |
| function parseCSV(strCSV, options) { | |
| // Get and validate the delimiter. | |
| const delimiter = options?.delimiter ?? ','; | |
| if (/^(?:[\r\n]|[^]{2,}|)$/.test(delimiter)) { | |
| throw new Error('Delimiter must be exactly one character and cannot be the double-quote or the linefeed or the newline character.'); | |
| } | |
| // Get the parseValue function. | |
| const parseValue = options?.parseValue; | |
| // Parse every part of the CSV. | |
| let row = []; | |
| const rows = [row]; | |
| const rgx = new RegExp(`(${delimiter})|(\r\n?|\n)|"((?:[^"]+|"")*)"|([^\r\n${delimiter}]+)`, 'g'); | |
| let lastWasDelim = 2; | |
| for (let {1: isDelim, 2: isNewRow, 3: quoted, 4: nonQuoted, index} of strCSV.matchAll(rgx)) { | |
| if (isNewRow || isDelim) { | |
| if (lastWasDelim) { | |
| row.push(''); | |
| } | |
| if (isNewRow) { | |
| rows.push(row = []); | |
| } | |
| lastWasDelim = isNewRow ? 2 : 1; | |
| } | |
| else { | |
| if (!lastWasDelim) { | |
| throw new Error(`Unexpected cell value starting at character ${index}`); | |
| } | |
| if (!nonQuoted) { | |
| nonQuoted = nonQuoted ?? quoted.replaceAll('""', '"'); | |
| } | |
| row.push(parseValue ? parseValue(nonQuoted, rows.length - 1, row.length) : nonQuoted); | |
| lastWasDelim = false; | |
| } | |
| } | |
| // If the last character was a comma then make sure to add an empty string | |
| // value and parse it if necessary. | |
| if (lastWasDelim === 1) { | |
| row.push(parseValue ? parseValue('', rows.length - 1, row.length) : ''); | |
| } | |
| // The array of arrays of values. | |
| return rows; | |
| } | |
| /** | |
| * @typedef parseCSV__Options | |
| * @property {string} delimiter | |
| * Defaults to `","`. The character that delimits each cell in a row. | |
| * @property {?((string: string, rowIndex: number, colIndex: number) => any)=} parseValue | |
| * If defined this function will be called for each cell's string to determine | |
| * the appropriate value for this cell. | |
| */ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Converts a an array of arrays of values into a CSV string. | |
| * @param {*[][]} rows | |
| * The array of arrays of values that will be turned into a CSV string. | |
| * @param {?toCSV__Options=} options | |
| * Optional options that alter how `rows` is turned into a CSV string. | |
| * @returns {string} | |
| * The CSV string version of `rows`. | |
| */ | |
| function toCSV(rows, options) { | |
| const delimiter = options?.delimiter ?? ','; | |
| if (/^(?:[\r\n]|[^]{2,}|)$/.test(delimiter)) { | |
| throw new Error('Delimiter must be exactly one character and cannot be the double-quote or the linefeed or the newline character.'); | |
| } | |
| const toValue = options?.toValue; | |
| const rgxNeedsQuoting = new RegExp(`[${delimiter}"\r\n]`); | |
| return rows.map( | |
| (row, rowIndex) => row.map((value, colIndex) => { | |
| if (toValue) { | |
| value = toValue(value, rowIndex, colIndex); | |
| } | |
| if ('string' !== typeof value) { | |
| value = '' + value; | |
| } | |
| return rgxNeedsQuoting.test(value) | |
| ? `"${value.replaceAll('"','""')}"` | |
| : value; | |
| }).join(delimiter) | |
| ).join('\n'); | |
| } | |
| /** | |
| * @typedef toCSV__Options | |
| * @property {string} delimiter | |
| * Defaults to `","`. The character that delimits each cell in a row. | |
| * @property {?((string: string, rowIndex: number, colIndex: number) => any)=} toValue | |
| * If defined this function will be called for each value to get the values | |
| * that will appear in the CSV contents. | |
| */ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Converts a an array of arrays of values into a CSV string. | |
| * @param {Record<string,*>[]} records | |
| * The array of arrays of values that will be turned into a CSV string. | |
| * @param {?toCSVTable__Options=} options | |
| * Optional options that alter how `rows` is turned into a CSV string. | |
| * @returns {string} | |
| * The CSV string version of `rows`. | |
| */ | |
| function toCSVTable(records, options) { | |
| const toValue = options?.toValue; | |
| let headers = options?.headers; | |
| if (!Array.isArray(headers)) { | |
| const fixHeaders = headers; | |
| headers = []; | |
| for (const record of records) { | |
| for (const rowHeader of Object.keys(record)) { | |
| if (!headers.includes(rowHeader)) { | |
| headers.push(rowHeader); | |
| } | |
| } | |
| } | |
| if ('function' === typeof fixHeaders) { | |
| headers = fixHeaders(headers); | |
| } | |
| } | |
| const headerCount = headers.length; | |
| const recordCount = records.length; | |
| const newRows = [headers]; | |
| for (let recordIndex = 0; recordIndex < recordCount; recordIndex++) { | |
| const record = records[recordIndex]; | |
| const newRow = []; | |
| for (let headerIndex = 0; headerIndex < headerCount; headerIndex++) { | |
| const header = headers[headerIndex]; | |
| const value = record[headers[headerIndex]]; | |
| newRow.push(toValue ? toValue(value, recordIndex, headerIndex, header) : value); | |
| } | |
| newRows.push(newRow); | |
| } | |
| return toCSV(newRows, {delimiter: options?.delimiter}); | |
| } | |
| /** | |
| * @typedef toCSVTable__Options | |
| * @property {(headers: string[]) => string[]|string[]} headers | |
| * Defaults to `","`. The character that delimits each cell in a row. | |
| * @property {string} delimiter | |
| * Defaults to `","`. The character that delimits each cell in a row. | |
| * @property {?((string: string, rowIndex: number, colIndex: number, header: string) => any)=} toValue | |
| * If defined this function will be called for each value to get the values | |
| * that will appear in the CSV contents. | |
| */ |
Author
Hi @radiocity, I see what you mean. I think my issue with your example is that it really isn't valid CSV. To make that valid it should really look like this anyway:
Lorem,ipsum, dolor, sit
Lorem,"ipsum""","dolor,""", sit
With that said, I will think about making the solution a bit more robust to handle your example as well. Thanks.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Parse fails with: