Last active
October 20, 2021 16:32
-
-
Save zelaznik/968f74ce4b7f32d5fd46360a406b032f to your computer and use it in GitHub Desktop.
Convert JSON to CSV in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import csv | |
import io | |
import json | |
import pdb | |
import sys | |
from collections import OrderedDict | |
def serialize_cell(cell): | |
if isinstance(cell, dict) or isinstance(cell, list): | |
return json.dumps(cell) | |
else: | |
return cell | |
def post_process(row, prefix=()): | |
new_row = OrderedDict() | |
for key, value in row.items(): | |
if isinstance(value, dict): | |
subdict = post_process(value, prefix + (key,)) | |
for subkey, subval in subdict.items(): | |
new_row[subkey] = subval | |
else: | |
new_key = ".".join(prefix + (key,)) | |
new_row[new_key] = serialize_cell(value) | |
return new_row | |
def get_master_keyset(rows): | |
aggregate = set() | |
for row in rows: | |
aggregate = aggregate | set(row) | |
return tuple(sorted(aggregate)) | |
def json_string_to_csv(raw): | |
as_json = json.loads(raw, object_pairs_hook=OrderedDict) | |
key_sets = {tuple(row) for row in as_json} | |
if len(key_sets) > 1: | |
raise RuntimeError("Inconsistent keys across rows") | |
(key_set,) = key_sets | |
processed = [post_process(row) for row in as_json] | |
master_keyset = get_master_keyset(processed) | |
def orderer(key): | |
prefix = key.split(".")[0] | |
return (key not in key_set, key_set.index(prefix), key) | |
sorted_keyset = tuple(sorted(master_keyset, key=orderer)) | |
buffer = io.StringIO() | |
dict_writer = csv.DictWriter(buffer, sorted_keyset) | |
dict_writer.writeheader() | |
dict_writer.writerows(processed) | |
return buffer.getvalue() | |
def main(): | |
raw_as_json = sys.stdin.read() | |
raw_as_csv = json_string_to_csv(raw_as_json) | |
print(raw_as_csv) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment