|
import argparse |
|
import csv |
|
from collections import OrderedDict |
|
from tempfile import TemporaryFile |
|
|
|
# Requires yajl and cffi. On MacOS: brew install yajl; pip install cffi |
|
# See https://github.com/ICRAR/ijson/pull/1 |
|
import ijson.backends.yajl2_cffi as ijson |
|
|
|
|
|
def build_row(obj, row, path): |
|
if isinstance(obj, dict): |
|
iterator = obj.items() |
|
elif isinstance(obj, list): |
|
iterator = enumerate(obj) |
|
else: |
|
key = '/'.join(path) |
|
row[key] = obj |
|
return |
|
|
|
for key, value in iterator: |
|
build_row(value, row, path + (str(key),)) |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('input_file', help='the input JSON file') |
|
parser.add_argument('output_file', help='the output CSV file') |
|
args = parser.parse_args() |
|
|
|
fieldnames = OrderedDict() |
|
|
|
with TemporaryFile('w+', newline='') as tmpfile: |
|
writer = csv.writer(tmpfile) |
|
|
|
with open(args.input_file, 'rb') as f: |
|
for entry in ijson.items(f, 'item'): |
|
row = {} |
|
build_row(entry, row, ()) |
|
|
|
fieldnames.update(row) |
|
writer.writerow([row.get(fieldname) for fieldname in fieldnames]) |
|
|
|
tmpfile.seek(0) |
|
|
|
reader = csv.DictReader(tmpfile, fieldnames=list(fieldnames)) |
|
|
|
with open(args.output_file, 'w') as f: |
|
writer = csv.DictWriter(f, fieldnames=sorted(fieldnames)) |
|
writer.writeheader() |
|
for row in reader: |
|
writer.writerow(row) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |