Skip to content

Instantly share code, notes, and snippets.

@ababilinski
Last active May 13, 2025 16:10
Show Gist options
  • Select an option

  • Save ababilinski/10aa3939218e71922c0d8096d98c883d to your computer and use it in GitHub Desktop.

Select an option

Save ababilinski/10aa3939218e71922c0d8096d98c883d to your computer and use it in GitHub Desktop.
convert-json-to-csv
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"collapsed_sections": [
"-6Z_2b5d9Oni"
],
"name": "convert-json-to-csv",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/ababilinski/10aa3939218e71922c0d8096d98c883d/convert-json-to-csv.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"# @title Upload Json Files\n",
"# @markdown Adds files to the input folder and validates the json data\n",
"\n",
"import shutil\n",
"import os\n",
"from google.colab import files\n",
"import json\n",
"import pandas as pd\n",
"\n",
"# Clear directory\n",
"def clear_directory(dir_path):\n",
" if os.path.exists(dir_path):\n",
" shutil.rmtree(dir_path)\n",
" os.makedirs(dir_path)\n",
"\n",
"def check_json(file_path):\n",
" with open(file_path, \"r\") as file:\n",
" data = json.load(file)\n",
" return True if isinstance(data, dict) else False\n",
"\n",
"# Creating and clearing directories\n",
"input_dir = 'Input'\n",
"output_dir = 'Converted'\n",
"clear_directory(input_dir)\n",
"clear_directory(output_dir)\n",
"\n",
"# Upload files and move them to the Input Folder\n",
"os.makedirs(input_dir, exist_ok=True)\n",
"uploaded = files.upload()\n",
"for name, data in uploaded.items():\n",
" with open(os.path.join(input_dir, name), 'wb') as f:\n",
" f.write(data)\n",
"\n",
"# Delete uploaded files after moving them to the input folder\n",
"for files in uploaded.keys():\n",
" os.remove(files)\n",
"\n",
"# Delete invalid files from input folder\n",
"for file in os.listdir(input_dir):\n",
" file_path = os.path.join(input_dir, file)\n",
" if not check_json(file_path):\n",
" print(f\"Warning: {file_path} is not a valid JSON file. Deleting file.\")\n",
" os.remove(file_path)"
],
"metadata": {
"id": "rhF-vB2NWCbU"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title Convert Files\n",
"#@markdown Processing JSON to CSV(s): This block will read the JSON, process it, and store the CSV files in the output folder.\n",
"\n",
"\n",
"\n",
"import shutil\n",
"import os\n",
"from google.colab import files\n",
"import json\n",
"import pandas as pd\n",
"\n",
"#@markdown **Settings:**\n",
"Root_Variable_Handling = 'Write to .txt file' #@param ['Ignore', 'Write to .txt file', 'Put in separate CSV file']\n",
"root_setting = 'ignore'\n",
"if Root_Variable_Handling == 'Write to .txt file':\n",
" root_setting = 'txt'\n",
"elif Root_Variable_Handling == 'Put in separate CSV file':\n",
" root_setting = 'csv'\n",
"else:\n",
" root_setting = 'ignore'\n",
"\n",
"def flatten_json(y, name=''):\n",
" out = {}\n",
" if isinstance(y, dict):\n",
" for key, value in y.items():\n",
" out.update(flatten_json(value, name + key + '_'))\n",
" else:\n",
" out[name[:-1]] = y\n",
" return out\n",
"\n",
"def process_data(data, root_handling, file_name_without_extension):\n",
" root_variables = {}\n",
" processed_data = {}\n",
"\n",
" for key, value in data.items():\n",
" if isinstance(value, dict):\n",
" processed_data[key] = [value]\n",
" elif isinstance(value, list):\n",
" processed_data[key] = value\n",
" else:\n",
" root_variables[key] = value\n",
"\n",
" # Handling root variables based on user choice\n",
" if root_variables:\n",
" if root_handling == 'txt':\n",
" with open(os.path.join(output_dir, f\"{file_name_without_extension}_root_variables.txt\"), \"w\") as file:\n",
" for key, value in root_variables.items():\n",
" file.write(f\"{key}: {value}\\n\")\n",
" elif root_handling == 'csv':\n",
" pd.DataFrame([root_variables]).to_csv(os.path.join(output_dir, f\"{file_name_without_extension}_root_variables.csv\"), index=False)\n",
"\n",
" return {key: pd.DataFrame([flatten_json(item) for item in value]) for key, value in processed_data.items()}\n",
"\n",
"def create_csvs_from_json(root_handling):\n",
" # Process each file in input directory\n",
" for file_name in os.listdir(input_dir):\n",
" file_path = os.path.join(input_dir, file_name)\n",
" file_name_without_extension = file_name.rsplit('.', 1)[0]\n",
" # Read the JSON file and convert it to a data frame\n",
" with open(file_path, \"r\") as file:\n",
" data = json.load(file)\n",
" # Process the JSON file\n",
" data_frames = process_data(data, root_handling,file_name_without_extension)\n",
"\n",
" # Saving data frames as CSV files\n",
" for key, df in data_frames.items():\n",
" csv_file_name = os.path.join(output_dir, file_name.rsplit('.', 1)[0] + f\"_{key}.csv\")\n",
" df.to_csv(csv_file_name, index=False)\n",
"\n",
"create_csvs_from_json(root_setting)\n"
],
"metadata": {
"id": "4-m_6wIUYo4J"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title Preview Data\n",
"#@markdown Previews the table and csv file information\n",
"Max_Preview_Rows = 5 # @param {type:\"integer\"}\n",
"import pandas as pd\n",
"from google.colab import files\n",
"\n",
"\n",
"def preview_large_csv(file_path, chunksize=1000):\n",
" try:\n",
" chunk = next(pd.read_csv(file_path, chunksize=chunksize,dtype=str, nrows=Max_Preview_Rows))\n",
" return chunk\n",
" except StopIteration:\n",
" print(\"File is empty.\")\n",
" except Exception as e:\n",
" print(f\"Error reading file: {e}\")\n",
"\n",
"# Function to upload and display a CSV file\n",
"def display_full_csv(file_path):\n",
" # Read the uploaded file into a DataFrame\n",
" df = pd.read_csv(file_path)\n",
"\n",
" # Setting display options to show all rows and columns\n",
" pd.set_option('display.max_rows', None)\n",
" pd.set_option('display.max_columns', None)\n",
"\n",
" # Display the DataFrame\n",
" return df\n",
"def get_row_count(file_path):\n",
" df = pd.read_csv(file_path)\n",
" # Setting display options to show all rows and columns\n",
" pd.set_option('display.max_rows', None)\n",
" pd.set_option('display.max_columns', None)\n",
" return df.shape[0]\n",
"\n",
"\n",
"for file_name in os.listdir(output_dir):\n",
" file_path = os.path.join(output_dir, file_name)\n",
" # Display the CSV file as a table\n",
" df_table = preview_large_csv(file_path)\n",
" row_count = get_row_count(file_path)\n",
" print(f\"File: {file_name} -\\033[1m [rows: {row_count}] \\033[0m\")\n",
" display(df_table)\n",
" print(\"-\"*50 + \"\\n\") # Separator between file previews\n"
],
"metadata": {
"cellView": "form",
"id": "heIiEfccbJYL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title Download Data\n",
"#@markdown Downloads the single csv file or a zip.\n",
"#@markdown > a `.zip` is downloaded only if multiple root objects were in the json file\n",
"\n",
"import os\n",
"import zipfile\n",
"\n",
"def download_csv_or_zip():\n",
" # Listing all files in the output directory\n",
" files_to_download = [f for f in os.listdir(output_dir) if f.endswith('.csv') or f.endswith('.txt')]\n",
"\n",
" # If there's only one file, download it directly\n",
" if len(files_to_download) == 1:\n",
" files.download(os.path.join(output_dir, files_to_download[0]))\n",
"\n",
" # If there are multiple files, zip them and download the zip file\n",
" elif len(files_to_download) > 1:\n",
" zip_filename = file_name.rsplit('.', 1)[0] + f\"_converted.zip\"\n",
" with zipfile.ZipFile(zip_filename, 'w') as output_zip:\n",
" for file in files_to_download:\n",
" output_zip.write(os.path.join(output_dir, file), file)\n",
" files.download(zip_filename)\n",
"\n",
" else:\n",
" print(\"No files to download.\")\n",
"\n",
"download_csv_or_zip()\n"
],
"metadata": {
"id": "WZkX01YHa5c9",
"cellView": "form"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment