Created
December 9, 2021 08:01
-
-
Save atraining/1ad6bb196a9be519acd12194595f0f8f to your computer and use it in GitHub Desktop.
Combine multiple json files into one csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import json | |
from copy import deepcopy | |
import pandas | |
def cross_join(left, right): | |
new_rows = [] if right else left | |
for left_row in left: | |
for right_row in right: | |
temp_row = deepcopy(left_row) | |
for key, value in right_row.items(): | |
temp_row[key] = value | |
new_rows.append(deepcopy(temp_row)) | |
return new_rows | |
def flatten_list(data): | |
for elem in data: | |
if isinstance(elem, list): | |
yield from flatten_list(elem) | |
else: | |
yield elem | |
def json_to_dataframe(data_in): | |
def flatten_json(data, prev_heading=''): | |
if isinstance(data, dict): | |
rows = [{}] | |
for key, value in data.items(): | |
rows = cross_join(rows, flatten_json(value, prev_heading + '.' + key)) | |
elif isinstance(data, list): | |
rows = [] | |
for i in range(len(data)): | |
[rows.append(elem) for elem in flatten_list(flatten_json(data[i], prev_heading))] | |
else: | |
rows = [{prev_heading[1:]: data}] | |
return rows | |
return pandas.DataFrame(flatten_json(data_in)) | |
if __name__ == '__main__': | |
df_list = [] | |
for json_file in glob.glob("*.json"): | |
with open(json_file, "rb") as file: | |
df_list.append(json_to_dataframe(json.load(file))) | |
merged_df = pandas.concat(df_list, axis=0, ignore_index=True) | |
merged_df.to_csv('all.csv', encoding='utf-8') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment