Last active
December 13, 2019 09:49
-
-
Save tenthree/b44a55c65b96a8eadeed18ab1ed6e34a to your computer and use it in GitHub Desktop.
convert simple csv formatted data to json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs') | |
const NUM_EXP = /^(\+|-)?(\d+)(\.\d+)?$/ | |
const PERCENT_EXP = /^(\+|-)?(\d+)(\.\d+)?%$/ | |
const CSV_COLUMN_EXP = /^"(.*)"$/g | |
const NULL_VALUES = [ '' ] | |
const CSV_OPTIONS = { | |
flip: false, | |
newline: '\r\n', | |
beginRow: 1, | |
fields: [/* String */], | |
insert: {/* field: value */}, | |
alias: {/* value: newValue | field: {value: newValue } */}, | |
required: [/* fields */], | |
filter: (row) => true, // function (row) { return Boolean } | |
groupBy: '', // field | |
sortOn: '', // field | |
sortByDesc: false | |
} | |
function stripQuotationMark (value = '') { | |
if (CSV_COLUMN_EXP.test(value)) { | |
value = value.replace(CSV_COLUMN_EXP, '$1') | |
} | |
return value | |
} | |
function parseValue (field, value, alias = {}) { | |
value = stripQuotationMark(value.toString().trim()) | |
if (NUM_EXP.test(value)) { | |
return Number(value) | |
} else if (PERCENT_EXP.test(value)) { | |
return Number((Number(value.replace(/%$/, '')) / 100).toFixed(4)) | |
} else if (NULL_VALUES.indexOf(value) !== -1) { | |
return null | |
} | |
return value | |
} | |
function aliasValue (field, value, alias = {}) { | |
let subAlias = alias[field] | |
// no sub-alias or invalid setting | |
if (!subAlias || subAlias !== Object(subAlias)) { | |
return (typeof alias[value] !== 'undefined') ? alias[value] : value | |
} | |
// sub-alias on the specified field | |
return (typeof subAlias[value] !== 'undefined') ? subAlias[value] : value | |
} | |
function log (path, options = {}) { | |
let { beginRow, fields, insert, alias, required, groupBy, sortOn, sortByDesc } = options | |
let log = `\n[csv]` | |
let indent = ' ' | |
let insertList = Object.keys(insert).map(prop => `${prop}:${insert[prop]}`) | |
let aliasList = Object.keys(alias).map(prop => `${prop}:${alias[prop]}`) | |
log += `\n${indent}[source] ${path}` | |
log += `\n${indent}[beginRow] ${beginRow}` | |
if (fields.length) { | |
log += `\n${indent}[fields] ${fields.filter(field => !!field)}` | |
} | |
if (insertList.length) { | |
log += `\n${indent}[insert] ${insertList}` | |
} | |
if (aliasList.length) { | |
log += `\n${indent}[alias] ${aliasList}` | |
} | |
if (required.length) { | |
log += `\n${indent}[required] ${required}` | |
} | |
if (groupBy) { | |
log += `\n${indent}[groupBy] ${groupBy}` | |
} | |
if (sortOn) { | |
log += `\n${indent}[sortOn] ${sortOn}` | |
} | |
if (sortByDesc) { | |
log += `\n${indent}[sortByDesc] ${sortByDesc}` | |
} | |
console.log(log) | |
} | |
function flipArray2d (arr) { | |
return arr[0].map((col, index) => arr.map(row => row[index])).slice() | |
} | |
function parseCsvColumns (str = '') { | |
return str.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/) | |
} | |
function parseCSV (path, options = {}) { | |
options = Object.assign({}, CSV_OPTIONS, options) | |
let { beginRow, flip, fields, insert, alias, required, filter, groupBy, sortOn, sortByDesc } = options | |
// log | |
log(path, options) | |
// read raw data | |
let data = fs | |
.readFileSync(path, 'utf-8') | |
.split(options.newline) | |
.map(row => parseCsvColumns(row)) | |
// flip raw data | |
if (flip) { | |
data = flipArray2d(data) | |
} | |
// if there is no custom fields in options, | |
// use first-row values as name of fields | |
if (beginRow > 0 && (!fields || !fields.length)) { | |
fields = data[0] | |
} | |
// slice from beginRow and process data | |
let json = data.slice(beginRow) | |
.map(row => { | |
return row | |
.reduce((obj, value, index) => { | |
let field = fields[index] | |
if (!field) { | |
return obj | |
} | |
obj[field] = aliasValue(field, parseValue(field, value, alias), alias) | |
return obj | |
}, { ...insert }) | |
}) | |
.filter(row => { | |
// filter by required fields | |
return !required.some(f => (row[f] === null || typeof row[f] === 'undefined')) | |
}) | |
// custom filter function | |
if (filter) { | |
json = json.filter(filter) | |
} | |
// groupBy | |
if (groupBy && Array.isArray(json)) { | |
json = json.reduce((obj, row) => { | |
let groupName = row[groupBy] | |
if (typeof obj[groupName] === 'undefined') { | |
obj[groupName] = [] | |
} | |
obj[groupName].push(row) | |
return obj | |
}, {}) | |
} | |
// sortOn | |
if (sortOn && typeof json === 'object') { | |
let sortHandler = !sortByDesc | |
? (a, b) => a[sortOn] - b[sortOn] | |
: (a, b) => b[sortOn] - a[sortOn] | |
if (Array.isArray(json)) { | |
// sort array | |
json.sort(sortHandler) | |
} else { | |
// sort array by group in object after groupby processing | |
for (let groupName in json) { | |
Array.isArray(json[groupName]) && json[groupName].sort(sortHandler) | |
} | |
} | |
} | |
return JSON.parse(JSON.stringify(json)) | |
} | |
// -------------------------------------------------- | |
// Example | |
// -------------------------------------------------- | |
const CSV_SRC = './src/data' | |
const JSON_DEST = './src/data' | |
let data = parseCSV(`${CSV_SRC}/source.csv`, { | |
fields: [ 'year', 'order', 'party', 'proportion', 'seats', 'grants' ], | |
alias: { | |
// value: newValue | |
'民主進步黨': '民進黨', | |
'中國國民黨': '國民黨', | |
// field: { value: newValue } | |
'enable': { 'Y': true, 'N': false } | |
}, | |
groupBy: 'year', | |
sortOn: 'order', | |
sortByDesc: false | |
}) | |
// output | |
fs.writeFile(`${JSON_DEST}/result.json`, JSON.stringify(data), (err) => { | |
err && console.log(err) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment