Skip to content

Instantly share code, notes, and snippets.

@mmaous
Created April 25, 2024 15:40
Show Gist options
  • Save mmaous/3fd751e7e6660857b493fb773ef2ccdc to your computer and use it in GitHub Desktop.
Save mmaous/3fd751e7e6660857b493fb773ef2ccdc to your computer and use it in GitHub Desktop.
A py script to convert JSON data to Excel (XLSX) and CSV formats, checking for duplicate IDs and exporting the data accordingly.
import pandas as pd
import json
# to detect duplicates
unique_key = 'id'
def check_duplicates(json_file):
# Read JSON file
with open(json_file, 'r') as file:
data = json.load(file)
# Load JSON data into a pandas DataFrame
df = pd.DataFrame(data)
# Check for duplicates based on the ID column
duplicates = df.duplicated(subset=unique_key, keep=False)
if duplicates.any():
print("Duplicates found for IDs:")
print(df[duplicates][unique_key])
def json_to_excel(json_file, excel_filename):
# Read JSON file
with open(json_file, 'r') as file:
data = json.load(file)
# Load JSON data into a pandas DataFrame
df = pd.DataFrame(data)
# Write DataFrame to Excel file
df.to_excel(excel_filename, index=False)
def json_to_csv(json_file, csv_filename):
# Read JSON file
with open(json_file, 'r') as file:
data = json.load(file)
# Load JSON data into a pandas DataFrame
df = pd.DataFrame(data)
# Write DataFrame to CSV file
df.to_csv(csv_filename, index=False)
# Example JSON file (replace 'data.json' with your file path)
json_file = 'data.json'
# Check for duplicates
check_duplicates(json_file)
# Convert JSON data to Excel format
json_to_excel(json_file, 'output.xlsx')
# # Convert JSON data to CSV format
json_to_csv(json_file, 'output.csv')
print("Conversion completed.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment