Last active
March 11, 2021 00:18
-
-
Save bsidhom/ba93423467cbb444319e6487888a50a3 to your computer and use it in GitHub Desktop.
Turn a top-level JSON array into a stream of objects using jq
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage: jq --stream --null-input --from-file stream_array.jq | |
# | |
# Or, as a one-liner: | |
# jq --stream -n 'def end_array_or_object: length == 1 and (first | length == 1); def outer_primitive: length == 2 and (first | length == 0); def end_element: length > 0 and (last | (end_array_or_object or outer_primitive)); foreach (inputs | del(.[0][0])) as $path ([]; if end_element then [$path] else . + [$path] end; if end_element then fromstream(.[]) else empty end)' | |
# This is useful for processing large JSON files/indefinite streams of elements | |
# that happen to be wrapped in an outer array. | |
# The process is: | |
# - Stream the path elements (via `jq --stream`) | |
# - Remove the leading path element from all entries. This effectively drops the | |
# outer array. | |
# - Collect the remaining stream elements into complete objects. Output each | |
# object as it completes. | |
# - This collection/output happens in a foreach reduction, which is why we | |
# need `--null-input`. | |
# We know we've reached the end of an outer element when we have just a path | |
# element (no associated value) and that path element is of length 1. | |
def end_array_or_object: length == 1 and (first | length == 1); | |
# New top-level primitives have an associated value but an empty path. | |
def outer_primitive: length == 2 and (first | length == 0); | |
# Was the last stream element an outer primitive or end-of-object marker? | |
def end_element: length > 0 and (last | (end_array_or_object or outer_primitive)); | |
foreach ( | |
inputs | | |
# Remove the outer array path | |
del(.[0][0]) | |
# We don't need to remove the outer end-of-array marker; jq will ignore it. | |
) as $path ( | |
# We will build up the object path elements within this array. | |
[]; | |
if end_element then | |
# The previous stream element resulted in a materialized object. | |
# This next path element should start a new object. | |
[$path] | |
else | |
# We're still building an object. Append this path to what we've | |
# accumulated already. | |
. + [$path] | |
end; | |
if end_element then | |
# Output our object from the path elements. | |
fromstream(.[]) | |
else | |
# We're still building up the current object. | |
empty end) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment