Skip to content

Instantly share code, notes, and snippets.

@pstaender
Created February 1, 2016 12:08
Show Gist options
  • Save pstaender/97d6f3555360df6d0726 to your computer and use it in GitHub Desktop.
Save pstaender/97d6f3555360df6d0726 to your computer and use it in GitHub Desktop.
Examples of using CoffeeScript to convert and process large files
#!/bin/env/coffee
json2csv = require('json2csv')
JSONStream = require('JSONStream')
expandHomeDir = require('expand-home-dir')
_ = require('lodash')
# { camelize, underscored } = require('underscore.string')
fs = require('fs')
options = require('yargs')
.help('h')
.alias('h', 'help')
.describe('header', 'display csv header [true,false,only]')
.default('header', 'true')
.describe('fields', 'comma sperated fields (e.g. `id,name,owner.login` or `*` for all)')
.default('fields', '*')
.describe('file', 'path to json file')
.default('file', '')
.describe('flatten', 'flatten JSON, so user.login.name is possible as column')
.default('flatten', 'false')
.describe('delimiter', 'csv delimiter')
.default('delimiter', ',')
.describe('quote', 'csv quote')
.default('quote', '"')
.alias('quote', 'terminator')
# .demand(['fields'])
.describe('jsonPath', 'path of parsing (e.g. `items`)')
.epilogue("""
converts json data to csv
Data can be given by a file or pipe
1) cat data.json | coffee json_to_csv.coffee > data.csv
or
2) coffee json_to_csv.coffee --file=data.json > data.csv
""")
.argv
{ delimiter, fields, file, quote, jsonPath, header, flatten } = options
flatten = if flatten is 'false' then false else true
# fields = if fields is '*' then [] else options.fields.split(delimiter)
if file.trim()
file = expandHomeDir(file)
if not fs.lstatSync(file)
console.error("File '#{file}' doesnt exists / isnt readable")
process.exit(1)
else
stream = fs.createReadStream(file)
else
# stream via stdin
stream = process.stdin
parser = if jsonPath then JSONStream.parse(jsonPath.split(',')) else JSONStream.parse()
headerIsProcessed = if header is 'false' then true else false
processHeader = (data, header, json2csvOptions) ->
if header is 'only'
json2csvOptions.data = data[0] # we only need the first row, maybe faster?!
else
json2csvOptions.data = data
json2csvOptions.hasCSVColumnTitle = true
if fields isnt '*' and fields.trim().length > 0
# specific fields
json2csvOptions.fields = fields.split(delimiter)
json2csv json2csvOptions, (err, csv) ->
if header is 'only'
console.log(csv.split('\n')[0])
process.exit(0)
else
console.log(csv)
parser
.on 'data', (data) ->
# check if data is array, if not -> transform to array of length 1
if data? and data.constructor isnt Array
data = [data]
json2csvOptions = {flatten}
json2csvOptions.fields = fields.split(delimiter) if fields isnt '*'
unless headerIsProcessed
headerIsProcessed = true
processHeader(data, header, json2csvOptions)
else
# print out rows
json2csvOptions.data = data
json2csvOptions.hasCSVColumnTitle = false
json2csv json2csvOptions, (err, csv) ->
console.log csv
stream
.pipe(parser)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment