Last active
March 2, 2022 08:31
-
-
Save tankala/2f0ac895e063ddaef239b9dca16f11c8 to your computer and use it in GitHub Desktop.
Write/Convert Nested JSON data to CSV for all keys or specific keys. For more details read my article https://blog.tanka.la/2020/03/29/write-convert-nested-json-data-to-csv-for-specific-subset-keysheaders/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"_id": "5e7f55e2f065ef934a048d32", | |
"index": 0, | |
"guid": "88647c07-bf2a-42c3-8e23-cec1ed731f2f", | |
"isActive": false, | |
"balance": "$1,254.74", | |
"picture": "http://placehold.it/32x32", | |
"age": 40, | |
"eyeColor": "green", | |
"name": "Laverne Robles", | |
"gender": "female", | |
"company": "COMVEY", | |
"email": "[email protected]", | |
"phone": "+1 (898) 413-3041", | |
"address": "783 Livingston Street, Hillsboro, District Of Columbia, 1897", | |
"about": "Anim ut sunt consectetur irure irure sit consectetur et laboris minim. Et tempor excepteur aute consequat in sunt aliquip in. Proident non et reprehenderit consequat ea adipisicing. Nisi excepteur Lorem irure pariatur fugiat occaecat labore officia. Quis consectetur commodo velit aliquip cupidatat et esse.\r\n", | |
"registered": "2019-04-13T02:41:28 -06:-30", | |
"latitude": 85.172936, | |
"longitude": 40.103047, | |
"tags": [ | |
"anim", | |
"cupidatat", | |
"excepteur", | |
"ea", | |
"do", | |
"ipsum", | |
"consectetur" | |
], | |
"friends": [ | |
{ | |
"1": "Dawn Lynch" | |
}, | |
{ | |
"2": "Rhodes Pacheco" | |
} | |
], | |
"greeting": "Hello, Laverne Robles! You have 4 unread messages.", | |
"favoriteFruit": "banana" | |
}, | |
{ | |
"_id": "5e7f55e272c1af3e99b6bf78", | |
"index": 1, | |
"guid": "0cb01bc1-c468-4bd4-9d1f-fcf52538f095", | |
"isActive": false, | |
"balance": "$2,697.51", | |
"picture": "http://placehold.it/32x32", | |
"age": 23, | |
"eyeColor": "brown", | |
"name": "Holder Hickman", | |
"gender": "male", | |
"company": "XINWARE", | |
"email": "[email protected]", | |
"phone": "+1 (934) 547-3070", | |
"address": "671 Claver Place, Robinette, Vermont, 710", | |
"about": "Ea et do qui aliqua laborum sunt mollit enim dolore aliqua. Elit eiusmod nulla reprehenderit aliqua irure consectetur culpa. Laborum cillum dolore culpa Lorem consequat sunt officia ut id dolore reprehenderit sint et.\r\n", | |
"registered": "2016-05-10T08:20:36 -06:-30", | |
"latitude": 10.192942, | |
"longitude": 74.879352, | |
"tags": [ | |
"laboris", | |
"laborum", | |
"ullamco", | |
"laborum", | |
"occaecat", | |
"ad", | |
"mollit" | |
], | |
"friends": [ | |
{ | |
"1": "Ryan Ellis" | |
} | |
], | |
"greeting": "Hello, Holder Hickman! You have 5 unread messages.", | |
"favoriteFruit": "strawberry" | |
}, | |
{ | |
"_id": "5e7f55e235816416478ec377", | |
"index": 2, | |
"guid": "247a0687-b1f8-4b38-86b2-0eacb3fdab28", | |
"isActive": true, | |
"balance": "$2,269.84", | |
"picture": "http://placehold.it/32x32", | |
"age": 37, | |
"eyeColor": "blue", | |
"name": "Minnie Woods", | |
"gender": "female", | |
"company": "EPLOSION", | |
"email": "[email protected]", | |
"phone": "+1 (806) 460-3043", | |
"address": "197 Ocean Court, Blue, Federated States Of Micronesia, 7113", | |
"about": "Ex officia occaecat incididunt commodo Lorem. Minim amet fugiat labore qui. Eu culpa culpa magna voluptate aute est tempor amet velit id est est. Laborum proident esse excepteur voluptate reprehenderit. Laborum ullamco commodo qui Lorem veniam id reprehenderit.\r\n", | |
"registered": "2018-06-18T06:37:45 -06:-30", | |
"latitude": 80.884237, | |
"longitude": -171.336369, | |
"tags": [ | |
"Lorem", | |
"laborum", | |
"mollit", | |
"consequat", | |
"dolore", | |
"nostrud", | |
"anim" | |
], | |
"friends": [ | |
{ | |
"1": "Jolene Franco" | |
}, | |
{ | |
"2": "Gardner Boyle" | |
}, | |
{ | |
"3": "Alisa Carlson" | |
} | |
], | |
"greeting": "Hello, Minnie Woods! You have 2 unread messages.", | |
"favoriteFruit": "banana" | |
}, | |
{ | |
"_id": "5e7f55e222ba00311b60b946", | |
"index": 3, | |
"guid": "f80d3b7f-28c0-47a6-ad53-6d4d28671a29", | |
"isActive": false, | |
"balance": "$3,801.17", | |
"picture": "http://placehold.it/32x32", | |
"age": 37, | |
"name": "Ford Melton", | |
"gender": "male", | |
"company": "DIGIFAD", | |
"email": "[email protected]", | |
"phone": "+1 (881) 494-2384", | |
"address": "650 Nassau Avenue, Shasta, Georgia, 7589", | |
"about": "Consequat sint id sunt cupidatat quis proident. Elit sit ipsum aliqua amet. Anim qui incididunt culpa anim eu culpa irure deserunt non. Sint occaecat deserunt non minim ut in id voluptate ullamco laborum consequat sunt excepteur ex.\r\n", | |
"registered": "2019-03-04T02:35:04 -06:-30", | |
"latitude": -25.65117, | |
"longitude": 115.060646, | |
"tags": [ | |
"anim", | |
"magna", | |
"deserunt", | |
"eu", | |
"cupidatat", | |
"ullamco", | |
"duis" | |
], | |
"friends": [ | |
{ | |
"1": "Thornton Gilmore" | |
}, | |
{ | |
"2": "Diana Everett" | |
}, | |
{ | |
"3": "Le Reynolds" | |
} | |
], | |
"greeting": "Hello, Ford Melton! You have 5 unread messages.", | |
"favoriteFruit": "apple" | |
}, | |
{ | |
"_id": "5e7f55e26964f0e4a752656c", | |
"index": 4, | |
"guid": "9161a683-9640-4472-a5b9-a673f7ec4c68", | |
"isActive": true, | |
"balance": "$3,823.16", | |
"picture": "http://placehold.it/32x32", | |
"age": 35, | |
"name": "Foley Lucas", | |
"gender": "male", | |
"company": "PHOLIO", | |
"email": "[email protected]", | |
"phone": "+1 (824) 468-3510", | |
"address": "426 Kingsland Avenue, Cucumber, Puerto Rico, 540", | |
"about": "Enim dolore aliquip proident et excepteur consectetur non minim excepteur qui. Labore quis minim aute laborum ad. Dolore velit sit officia nostrud tempor ullamco magna non reprehenderit consectetur sint nulla. Eiusmod nisi ad exercitation nulla velit fugiat. Tempor laborum elit veniam aliqua incididunt quis magna ex adipisicing magna. Culpa mollit voluptate dolor labore nisi Lorem incididunt. Sit labore qui labore esse eu nulla consectetur exercitation duis et in excepteur commodo.\r\n", | |
"registered": "2018-09-21T08:17:37 -06:-30", | |
"latitude": 77.763657, | |
"longitude": 85.236121, | |
"tags": [ | |
"aliqua", | |
"velit", | |
"mollit", | |
"aute", | |
"dolor", | |
"exercitation", | |
"dolore" | |
], | |
"greeting": "Hello, Foley Lucas! You have 9 unread messages.", | |
"favoriteFruit": "strawberry" | |
} | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json # For JSON loading | |
import csv # For CSV dict writer | |
def get_leaves(item, key=None, key_prefix=""): | |
""" | |
This function converts nested dictionary structure to flat | |
""" | |
if isinstance(item, dict): | |
leaves = {} | |
"""Iterates the dictionary and go to leaf node after that calls to get_leaves function recursively to go to leaves level""" | |
for item_key in item.keys(): | |
"""Some times leaves and parents or some other leaves might have same key that's why adding leave node key to distinguish""" | |
temp_key_prefix = ( | |
item_key if (key_prefix == "") else (key_prefix + "_" + str(item_key)) | |
) | |
leaves.update(get_leaves(item[item_key], item_key, temp_key_prefix)) | |
return leaves | |
elif isinstance(item, list): | |
leaves = {} | |
elements = [] | |
"""Iterates the list and go to leaf node after that if it is leave then simply add value to current key's list or | |
calls to get_leaves function recursively to go to leaves level""" | |
for element in item: | |
if isinstance(element, dict) or isinstance(element, list): | |
leaves.update(get_leaves(element, key, key_prefix)) | |
else: | |
elements.append(element) | |
if len(elements) > 0: | |
leaves[key] = elements | |
return leaves | |
else: | |
return {key_prefix: item} | |
with open("data.json") as f_input, open("output.csv", "w", newline="") as f_output: | |
json_data = json.load(f_input, strict=False) | |
"""'First parse all entries to get the unique fieldnames why because already we have file in RAM level and | |
if we put each dictionary after parsing in list or some data structure it will crash your system due to memory constraint | |
that's why first we will get the keys first then we convert each dictionary and put it to CSV""" | |
fieldnames = set() | |
for entry in json_data: | |
fieldnames.update(get_leaves(entry).keys()) | |
csv_output = csv.DictWriter(f_output, delimiter=";", fieldnames=sorted(fieldnames)) | |
csv_output.writeheader() | |
csv_output.writerows(get_leaves(entry) for entry in json_data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json # For JSON loading | |
import csv # For CSV dict writer | |
def get_leaves(item, key=None, key_prefix=""): | |
""" | |
This function converts nested dictionary structure to flat | |
""" | |
if isinstance(item, dict): | |
leaves = {} | |
"""Iterates the dictionary and go to leaf node after that calls to get_leaves function recursively to go to leaves level""" | |
for item_key in item.keys(): | |
"""Some times leaves and parents or some other leaves might have same key that's why adding leave node key to distinguish""" | |
temp_key_prefix = ( | |
item_key if (key_prefix == "") else (key_prefix + "_" + str(item_key)) | |
) | |
leaves.update(get_leaves(item[item_key], item_key, temp_key_prefix)) | |
return leaves | |
elif isinstance(item, list): | |
leaves = {} | |
elements = [] | |
"""Iterates the list and go to leaf node after that if it is leave then simply add value to current key's list or | |
calls to get_leaves function recursively to go to leaves level""" | |
for element in item: | |
if isinstance(element, dict) or isinstance(element, list): | |
leaves.update(get_leaves(element, key, key_prefix)) | |
else: | |
elements.append(element) | |
if len(elements) > 0: | |
leaves[key] = elements | |
return leaves | |
else: | |
return {key_prefix: item} | |
field_names = [ | |
"age", | |
"balance", | |
"company", | |
"eyeColor", | |
"favoriteFruit", | |
"friends_1", | |
"friends_2", | |
"friends_3", | |
"gender", | |
"greeting", | |
"isActive", | |
"latitude", | |
"longitude", | |
"registered", | |
"tags" | |
] | |
with open("data.json") as f_input, open("output.csv", "w", newline="") as f_output: | |
json_data = json.load(f_input, strict=False) | |
'''If dictionary have keys more than we mentioned in fieldnames it will give error | |
to ignore the keys which we don't have we need to use extrasaction="ignore"''' | |
csv_output = csv.DictWriter(f_output, delimiter=";", fieldnames=field_names, extrasaction="ignore") | |
csv_output.writeheader() | |
csv_output.writerows(get_leaves(entry) for entry in json_data) |
Looking for a generic solution to convert nested JSON file to CSV in Python
Looking for a generic solution to convert nested JSON file to CSV in Python
Here you want to run for Data field right. You need to tweak the code take the Data field value and proceed further.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Provided code is not working for shared nested JSON data
{
"Response": "Success",
"Message": "",
"HasWarning": false,
"Type": 100,
"RateLimit": {},
"Data": {
"Aggregated": false,
"TimeFrom": 1234567800,
"TimeTo": 1234567900,
"Data": [
{
"id": 11,
"symbol": "AAA",
"time": 1234567800,
"block_time": 123.282828282828,
"block_size": 1212121,
"current_supply": 10101010
},
{
"id": 12,
"symbol": "BBB",
"time": 1234567900,
"block_time": 234.696969696969,
"block_size": 1313131,
"current_supply": 20202020
},
]
}
}