Skip to content

Instantly share code, notes, and snippets.

@ThomasRohde
Last active March 10, 2025 10:09
Show Gist options
  • Save ThomasRohde/36183cd7507c849f71efdd91cc503439 to your computer and use it in GitHub Desktop.
Save ThomasRohde/36183cd7507c849f71efdd91cc503439 to your computer and use it in GitHub Desktop.
Load a JSON file from the command line and estimat...
# /// script
# description = "Load a JSON file from the command line and estimate its schema."
# authors = ["Script-Magic AI Generator"]
# date = "2023-10-07"
# requires-python = ">=3.9"
# dependencies = [
# "jsonschema"
# ]
# tags = ["json", "schema", "data analysis"]
# ///
# Generated from the prompt: "Load a JSON {{file}} from the command line and assuming it is an array of objects use best effort to estimate the schema and output the schema to the command line"
import json
import argparse
from collections import defaultdict
def estimate_schema(data):
# Create a default dictionary to hold the types of fields
schema = defaultdict(set)
# Iterate through each object in the JSON array
for obj in data:
for key, value in obj.items():
# Add the type of the value to the set for that key
schema[key].add(type(value).__name__)
# Convert schema sets to a list for easier readability
return {key: list(value) for key, value in schema.items()}
def main():
# Set up command line argument parsing
parser = argparse.ArgumentParser(description="Estimate schema from JSON file.")
parser.add_argument('file', type=str, help="Path to the JSON file.")
args = parser.parse_args()
try:
# Load the JSON data from the specified file
with open(args.file, 'r') as f:
data = json.load(f)
# Ensure the data is a list
if not isinstance(data, list):
raise ValueError("The JSON data should be an array of objects.")
# Estimate the schema
schema = estimate_schema(data)
# Print the estimated schema
print(json.dumps(schema, indent=4))
except FileNotFoundError:
print(f"Error: File '{args.file}' not found.")
except json.JSONDecodeError:
print(f"Error: '{args.file}' is not a valid JSON file.")
except Exception as e:
print(f"Error: {e}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment