Skip to content

Instantly share code, notes, and snippets.

@peter
Last active January 27, 2023 11:11
Show Gist options
  • Save peter/a613ba3f5f70bc6b36c73c0d91614083 to your computer and use it in GitHub Desktop.
Save peter/a613ba3f5f70bc6b36c73c0d91614083 to your computer and use it in GitHub Desktop.
Simple node script that provides a jq alternative for processing JSON on the command line
#!/usr/bin/env node --max-old-space-size=4096
// Simple node script that provides a jq alternative for processing JSON on the command line.
// Supports newline separated JSON (JSON lines) as well as JSON data.
// Also supports log data where some lines are JSON and some aren't and log lines where the
// beginning of the line is text and the end of the line is JSON, i.e.:
//
// 2022-10-18T14:07:53.960Z [INFO ] starting server with config: {"port":3000}
//
// USAGE:
//
// With pipe (can break for large amounts of JSON data)
//
// cat some-data.json | json <javascript-code>
//
// With a file path (works better for large amounts of data):
//
// json <javascript-code> <file-path>
//
// The JSON data is available in a `data` variable. If the JavaScript code argument starts with a "." then that is equivalent to starting it with "data.".
//
// You can use map and filter and useful lodash functions like get:
//
// cat some-data.json | json ".Items.map(i => ({id: get(i, 'id.S'), updatedAt: get(i, 'updatedAt.N')})).filter(i => new Date(Number(i.updatedAt)) < new Date())"
//
// You can access the JSON data with the data variable:
//
// cat some-data.json | json "Object.keys(data)"
//
// Split complex processing with pipes if it helps readability:
//
// cat some-data.json \
// | json ".Items.map(i => ({id: get(i, 'id.S'), updatedAt: get(i, 'updatedAt.N')}))" \
// | json ".filter(i => new Date(Number(i.updatedAt)) < new Date())"
//
// Easily print lengths of arrays or keys of objects etc:
//
// cat some-data.json | json ".Items.length"
//
// Pretty print (echo) JSON data:
//
// cat some-data.json | json .
//
// Raw output (i.e. a raw string/number or line separated values etc.)
//
// cat some-data.json | RAW=true json '.filter(l => l.message && l.message.includes("Request failed")).map(l => l.status).join("\n")'
const fs = require('fs')
const readline = require('readline')
const _ = require('lodash')
Object.assign(global, require('lodash'))
const R = require('ramda')
const { diff } = require('object-diffy')
// https://stackoverflow.com/questions/1248302/how-to-get-the-size-of-a-javascript-object
function roughSizeOfObject(object) {
var objectList = [];
var stack = [object];
var bytes = 0;
while (stack.length) {
var value = stack.pop();
if (typeof value === 'boolean') {
bytes += 4;
}
else if (typeof value === 'string') {
bytes += value.length * 2;
}
else if (typeof value === 'number') {
bytes += 8;
}
else if
(
typeof value === 'object'
&& objectList.indexOf(value) === -1
) {
objectList.push(value);
for (var i in value) {
stack.push(value[i]);
}
}
}
return bytes;
}
function readStdIn() {
return fs.readFileSync(0).toString()
}
function parseLine(line, openLines) {
let result = { openLines: [...openLines] }
try {
const openIndex = line.indexOf('{')
if (openLines.length === 0 && openIndex >= 0 && line.endsWith('}')) {
const doc = JSON.parse(line.substring(openIndex))
if (openIndex > 0 && !doc._line) doc._line = line.substring(0, openIndex)
result.parsedLine = doc
} else if (line === '{') {
result.openLines = [line]
} else if (openLines.length > 0) {
result.openLines.push(line)
if (line === '}') {
result.parsedLine = JSON.parse(openLines.join('\n'))
result.openLines = []
}
} else {
result.parsedLine = { _line: line }
}
} catch (err) {
result.error = `Error thrown parsing line: ${line} - ${err.stack}`
result.openLines = []
result.parsedLine = { _line: line }
}
return result
}
async function jsonIn(filePath) {
let textInput
try {
if (filePath) {
return require(filePath)
} else {
textInput = readStdIn()
// NOTE: I've found JSON.parse intermittently errors out for data sizes around 15 MB but require(filePath) can handle more?
// const dataSizeMb = Buffer.byteLength(textInput) / 1024 / 1024
return JSON.parse(textInput)
}
} catch (jsonErr) {
try {
const lines = []
let openLines = []
let nErrors = 0
let lineCount = 0
const processLine = (line) => {
lineCount += 1
const result = parseLine(line, openLines)
if (result.error) {
console.log(error)
nErrors += 1
}
if (result.parsedLine) lines.push(result.parsedLine)
openLines = result.openLines
}
if (filePath) {
const rl = readline.createInterface({
input: fs.createReadStream(filePath),
crlfDelay: Infinity
});
for await (const line of rl) {
processLine(line)
}
} else {
textInput = textInput || readStdIn()
const textLines = textInput.trim().split('\n').map(l => l.trim()).filter(Boolean)
for (const [index, line] of textLines.entries()) {
processLine(line)
}
}
if (nErrors > 0) console.log(`Failed to parse ${nErrors}/${textLines.length} lines due to errors`)
return lines
} catch (linesErr) {
console.log(jsonErr.stack)
console.log(linesErr.stack)
throw new Error('Could not parse input as JSON or as JSON lines')
}
}
}
function printJson(data) {
console.log(JSON.stringify(data, null, 4))
}
function printJsonLines(data) {
for (const line of data) {
console.log(JSON.stringify(line))
}
}
function getCodeArg() {
let code = process.argv[2] || 'data'
// Support jq like dot syntax
if (code === '.') {
code = 'data'
} else if (code.startsWith('.')) {
code = 'data' + code;
}
return code
}
// Helper functions
function countBy(data, fn) {
return Object.entries(_.mapValues(_.groupBy(data, fn), l => l.length)).sort((a, b) => b[1] - a[1])
}
async function main() {
const code = getCodeArg()
const filePath = process.argv[3]
const data = await jsonIn(filePath)
const processedData = eval(code)
if (process.env.RAW === 'true') {
console.log(processedData);
} else if (process.env.JSONL === 'true' && Array.isArray(processedData)) {
printJsonLines(processedData)
} else {
printJson(processedData)
}
}
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment