-
-
Save peter/a613ba3f5f70bc6b36c73c0d91614083 to your computer and use it in GitHub Desktop.
Simple node script that provides a jq alternative for processing JSON on the command line
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node --max-old-space-size=4096 | |
// Simple node script that provides a jq alternative for processing JSON on the command line. | |
// Supports newline separated JSON (JSON lines) as well as JSON data. | |
// Also supports log data where some lines are JSON and some aren't and log lines where the | |
// beginning of the line is text and the end of the line is JSON, i.e.: | |
// | |
// 2022-10-18T14:07:53.960Z [INFO ] starting server with config: {"port":3000} | |
// | |
// USAGE: | |
// | |
// With pipe (can break for large amounts of JSON data) | |
// | |
// cat some-data.json | json <javascript-code> | |
// | |
// With a file path (works better for large amounts of data): | |
// | |
// json <javascript-code> <file-path> | |
// | |
// The JSON data is available in a `data` variable. If the JavaScript code argument starts with a "." then that is equivalent to starting it with "data.". | |
// | |
// You can use map and filter and useful lodash functions like get: | |
// | |
// cat some-data.json | json ".Items.map(i => ({id: get(i, 'id.S'), updatedAt: get(i, 'updatedAt.N')})).filter(i => new Date(Number(i.updatedAt)) < new Date())" | |
// | |
// You can access the JSON data with the data variable: | |
// | |
// cat some-data.json | json "Object.keys(data)" | |
// | |
// Split complex processing with pipes if it helps readability: | |
// | |
// cat some-data.json \ | |
// | json ".Items.map(i => ({id: get(i, 'id.S'), updatedAt: get(i, 'updatedAt.N')}))" \ | |
// | json ".filter(i => new Date(Number(i.updatedAt)) < new Date())" | |
// | |
// Easily print lengths of arrays or keys of objects etc: | |
// | |
// cat some-data.json | json ".Items.length" | |
// | |
// Pretty print (echo) JSON data: | |
// | |
// cat some-data.json | json . | |
// | |
// Raw output (i.e. a raw string/number or line separated values etc.) | |
// | |
// cat some-data.json | RAW=true json '.filter(l => l.message && l.message.includes("Request failed")).map(l => l.status).join("\n")' | |
const fs = require('fs') | |
const readline = require('readline') | |
const _ = require('lodash') | |
Object.assign(global, require('lodash')) | |
const R = require('ramda') | |
const { diff } = require('object-diffy') | |
// https://stackoverflow.com/questions/1248302/how-to-get-the-size-of-a-javascript-object | |
function roughSizeOfObject(object) { | |
var objectList = []; | |
var stack = [object]; | |
var bytes = 0; | |
while (stack.length) { | |
var value = stack.pop(); | |
if (typeof value === 'boolean') { | |
bytes += 4; | |
} | |
else if (typeof value === 'string') { | |
bytes += value.length * 2; | |
} | |
else if (typeof value === 'number') { | |
bytes += 8; | |
} | |
else if | |
( | |
typeof value === 'object' | |
&& objectList.indexOf(value) === -1 | |
) { | |
objectList.push(value); | |
for (var i in value) { | |
stack.push(value[i]); | |
} | |
} | |
} | |
return bytes; | |
} | |
function readStdIn() { | |
return fs.readFileSync(0).toString() | |
} | |
function parseLine(line, openLines) { | |
let result = { openLines: [...openLines] } | |
try { | |
const openIndex = line.indexOf('{') | |
if (openLines.length === 0 && openIndex >= 0 && line.endsWith('}')) { | |
const doc = JSON.parse(line.substring(openIndex)) | |
if (openIndex > 0 && !doc._line) doc._line = line.substring(0, openIndex) | |
result.parsedLine = doc | |
} else if (line === '{') { | |
result.openLines = [line] | |
} else if (openLines.length > 0) { | |
result.openLines.push(line) | |
if (line === '}') { | |
result.parsedLine = JSON.parse(openLines.join('\n')) | |
result.openLines = [] | |
} | |
} else { | |
result.parsedLine = { _line: line } | |
} | |
} catch (err) { | |
result.error = `Error thrown parsing line: ${line} - ${err.stack}` | |
result.openLines = [] | |
result.parsedLine = { _line: line } | |
} | |
return result | |
} | |
async function jsonIn(filePath) { | |
let textInput | |
try { | |
if (filePath) { | |
return require(filePath) | |
} else { | |
textInput = readStdIn() | |
// NOTE: I've found JSON.parse intermittently errors out for data sizes around 15 MB but require(filePath) can handle more? | |
// const dataSizeMb = Buffer.byteLength(textInput) / 1024 / 1024 | |
return JSON.parse(textInput) | |
} | |
} catch (jsonErr) { | |
try { | |
const lines = [] | |
let openLines = [] | |
let nErrors = 0 | |
let lineCount = 0 | |
const processLine = (line) => { | |
lineCount += 1 | |
const result = parseLine(line, openLines) | |
if (result.error) { | |
console.log(error) | |
nErrors += 1 | |
} | |
if (result.parsedLine) lines.push(result.parsedLine) | |
openLines = result.openLines | |
} | |
if (filePath) { | |
const rl = readline.createInterface({ | |
input: fs.createReadStream(filePath), | |
crlfDelay: Infinity | |
}); | |
for await (const line of rl) { | |
processLine(line) | |
} | |
} else { | |
textInput = textInput || readStdIn() | |
const textLines = textInput.trim().split('\n').map(l => l.trim()).filter(Boolean) | |
for (const [index, line] of textLines.entries()) { | |
processLine(line) | |
} | |
} | |
if (nErrors > 0) console.log(`Failed to parse ${nErrors}/${textLines.length} lines due to errors`) | |
return lines | |
} catch (linesErr) { | |
console.log(jsonErr.stack) | |
console.log(linesErr.stack) | |
throw new Error('Could not parse input as JSON or as JSON lines') | |
} | |
} | |
} | |
function printJson(data) { | |
console.log(JSON.stringify(data, null, 4)) | |
} | |
function printJsonLines(data) { | |
for (const line of data) { | |
console.log(JSON.stringify(line)) | |
} | |
} | |
function getCodeArg() { | |
let code = process.argv[2] || 'data' | |
// Support jq like dot syntax | |
if (code === '.') { | |
code = 'data' | |
} else if (code.startsWith('.')) { | |
code = 'data' + code; | |
} | |
return code | |
} | |
// Helper functions | |
function countBy(data, fn) { | |
return Object.entries(_.mapValues(_.groupBy(data, fn), l => l.length)).sort((a, b) => b[1] - a[1]) | |
} | |
async function main() { | |
const code = getCodeArg() | |
const filePath = process.argv[3] | |
const data = await jsonIn(filePath) | |
const processedData = eval(code) | |
if (process.env.RAW === 'true') { | |
console.log(processedData); | |
} else if (process.env.JSONL === 'true' && Array.isArray(processedData)) { | |
printJsonLines(processedData) | |
} else { | |
printJson(processedData) | |
} | |
} | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment