Last active
February 13, 2023 09:19
-
-
Save atomkirk/eccb66f77b306d0d1fcecb2c605bd22e to your computer and use it in GitHub Desktop.
parse csv with javascript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import parseCsv from 'zipbooks/utils/parse-csv' | |
import { module, test } from 'qunit' | |
module('Unit | Utility | parse-csv', function(_hooks) { | |
test('parses csv successfully', function(assert) { | |
let result = parseCsv('name,age\nadam,31\ntim,32\n"St, clair",26') | |
assert.equal(JSON.stringify(result), '[["name","age"],["adam","31"],["tim","32"],["St, clair","26"]]') | |
}) | |
test('parses with header', function(assert) { | |
let result = parseCsv('name,age\nadam,31\ntim,32\n"St, clair",26', {headers: true}) | |
assert.equal(JSON.stringify(result), '[{"name":"adam","age":"31"},{"name":"tim","age":"32"},{"name":"St, clair","age":"26"}]') | |
}) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// extracted from: https://stackoverflow.com/a/14991797/798055 | |
export default function parseCsv(str, opts = {}) { | |
var arr = []; | |
var quote = false; // true means we're inside a quoted field | |
var col, c; | |
// iterate over each character, keep track of current row and column (of the returned array) | |
for (var row = col = c = 0; c < str.length; c++) { | |
var cc = str[c], nc = str[c+1]; // current character, next character | |
arr[row] = arr[row] || []; // create a new row if necessary | |
arr[row][col] = arr[row][col] || ''; // create a new column (start with empty string) if necessary | |
// If the current character is a quotation mark, and we're inside a | |
// quoted field, and the next character is also a quotation mark, | |
// add a quotation mark to the current column and skip the next character | |
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; } | |
// If it's just one quotation mark, begin/end quoted field | |
if (cc == '"') { quote = !quote; continue; } | |
// If it's a comma and we're not in a quoted field, move on to the next column | |
if (cc == ',' && !quote) { ++col; continue; } | |
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character | |
// and move on to the next row and move to column 0 of that new row | |
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; } | |
// If it's a newline (LF or CR) and we're not in a quoted field, | |
// move on to the next row and move to column 0 of that new row | |
if (cc == '\n' && !quote) { ++row; col = 0; continue; } | |
if (cc == '\r' && !quote) { ++row; col = 0; continue; } | |
// Otherwise, append the current character to the current column | |
arr[row][col] += cc; | |
} | |
if (opts.headers) { | |
let header = arr[0] | |
let rest = arr.slice(1) | |
return rest.map(r => { | |
return r.reduce((acc, v, i) => { | |
let key = header[i] | |
acc[key] = v | |
return acc | |
}, {}) | |
}) | |
} | |
else { | |
return arr; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment