Skip to content

Instantly share code, notes, and snippets.

@atraining
Created December 9, 2021 08:01
Show Gist options
  • Save atraining/1ad6bb196a9be519acd12194595f0f8f to your computer and use it in GitHub Desktop.
Save atraining/1ad6bb196a9be519acd12194595f0f8f to your computer and use it in GitHub Desktop.
Combine multiple json files into one csv
import glob
import json
from copy import deepcopy
import pandas
def cross_join(left, right):
new_rows = [] if right else left
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def json_to_dataframe(data_in):
def flatten_json(data, prev_heading=''):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_json(value, prev_heading + '.' + key))
elif isinstance(data, list):
rows = []
for i in range(len(data)):
[rows.append(elem) for elem in flatten_list(flatten_json(data[i], prev_heading))]
else:
rows = [{prev_heading[1:]: data}]
return rows
return pandas.DataFrame(flatten_json(data_in))
if __name__ == '__main__':
df_list = []
for json_file in glob.glob("*.json"):
with open(json_file, "rb") as file:
df_list.append(json_to_dataframe(json.load(file)))
merged_df = pandas.concat(df_list, axis=0, ignore_index=True)
merged_df.to_csv('all.csv', encoding='utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment