Created
April 25, 2024 15:40
-
-
Save mmaous/3fd751e7e6660857b493fb773ef2ccdc to your computer and use it in GitHub Desktop.
A py script to convert JSON data to Excel (XLSX) and CSV formats, checking for duplicate IDs and exporting the data accordingly.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import json | |
# to detect duplicates | |
unique_key = 'id' | |
def check_duplicates(json_file): | |
# Read JSON file | |
with open(json_file, 'r') as file: | |
data = json.load(file) | |
# Load JSON data into a pandas DataFrame | |
df = pd.DataFrame(data) | |
# Check for duplicates based on the ID column | |
duplicates = df.duplicated(subset=unique_key, keep=False) | |
if duplicates.any(): | |
print("Duplicates found for IDs:") | |
print(df[duplicates][unique_key]) | |
def json_to_excel(json_file, excel_filename): | |
# Read JSON file | |
with open(json_file, 'r') as file: | |
data = json.load(file) | |
# Load JSON data into a pandas DataFrame | |
df = pd.DataFrame(data) | |
# Write DataFrame to Excel file | |
df.to_excel(excel_filename, index=False) | |
def json_to_csv(json_file, csv_filename): | |
# Read JSON file | |
with open(json_file, 'r') as file: | |
data = json.load(file) | |
# Load JSON data into a pandas DataFrame | |
df = pd.DataFrame(data) | |
# Write DataFrame to CSV file | |
df.to_csv(csv_filename, index=False) | |
# Example JSON file (replace 'data.json' with your file path) | |
json_file = 'data.json' | |
# Check for duplicates | |
check_duplicates(json_file) | |
# Convert JSON data to Excel format | |
json_to_excel(json_file, 'output.xlsx') | |
# # Convert JSON data to CSV format | |
json_to_csv(json_file, 'output.csv') | |
print("Conversion completed.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment