Last active
April 26, 2024 08:33
-
-
Save FeepingCreature/491763b314b8ac0f9f5411312fb4c752 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Helper script that classifies the structure of a JSON object. | |
# Useful for getting an overview of novel JSON data. | |
# Created largely by Claude 3 Opus. | |
import json | |
import sys | |
class JSONType: | |
def is_similar(self, other): | |
return isinstance(other, self.__class__) | |
def merge(self, other): | |
assert self.is_similar(other) | |
pass | |
class JSONObject(JSONType): | |
def __init__(self, fields=None): | |
self.fields = fields or {} | |
for field in self.fields.values(): | |
assert isinstance(field, JSONType) | |
def is_similar(self, other): | |
if not isinstance(other, JSONObject): | |
return False | |
common_keys = set(self.fields.keys()) & set(other.fields.keys()) | |
return len(common_keys) >= min(len(self.fields), len(other.fields)) | |
def merge(self, other): | |
for key, value in self.fields.items(): | |
if key not in other.fields: | |
self.add_to_field(key, JSONUndefined()) | |
for key, value in other.fields.items(): | |
if not isinstance(value, list): | |
value = [value] | |
for other_type in value: | |
if key in self.fields: | |
existing = self.fields[key] | |
if not isinstance(existing, list): | |
existing = [existing] | |
merged = False | |
for existing_value in existing: | |
if existing_value.is_similar(other_type): | |
existing_value.merge(other_type) | |
merged = True | |
break | |
if not merged: | |
self.add_to_field(key, other_type) | |
else: | |
self.add_to_field(key, JSONUndefined()) | |
self.add_to_field(key, other_type) | |
def add_to_field(self, key, type): | |
if key in self.fields: | |
if isinstance(self.fields[key], list): | |
self.fields[key].append(type) | |
else: | |
self.fields[key] = [self.fields[key], type] | |
else: | |
self.fields[key] = type | |
def __repr__(self): | |
fields_str = ', '.join(f"{k} => {v}" for k, v in self.fields.items()) | |
return f"JSONObject({fields_str})" | |
class JSONMap(JSONType): | |
def __init__(self, field_type=None): | |
self.field_type = field_type | |
def __repr__(self): | |
return f"JSONMap(=> {self.field_type})" | |
def is_similar(self, other): | |
if not isinstance(other, JSONMap): | |
return False | |
return self.field_type.is_similar(other.field_type) | |
def merge(self, other): | |
if self.field_type is None: | |
self.field_type = other.field_type | |
else: | |
self.field_type.merge(other.field_type) | |
class JSONArray(JSONType): | |
def __init__(self, elements=None): | |
self.elements = elements or [] | |
def is_similar(self, other): | |
return isinstance(other, JSONArray) | |
def merge(self, other): | |
for element in other.elements: | |
merged = False | |
for existing_element in self.elements: | |
if existing_element.is_similar(element): | |
existing_element.merge(element) | |
merged = True | |
break | |
if not merged: | |
self.elements.append(element) | |
def __repr__(self): | |
elements_str = ', '.join(repr(e) for e in self.elements) | |
return f"JSONArray([{elements_str}])" | |
class JSONString(JSONType): | |
def __repr__(self): | |
return "string" | |
class JSONNumber(JSONType): | |
def __repr__(self): | |
return "number" | |
class JSONInteger(JSONNumber): | |
def __repr__(self): | |
return "integer" | |
class JSONFloat(JSONNumber): | |
def __repr__(self): | |
return "float" | |
class JSONBool(JSONType): | |
def __repr__(self): | |
return "bool" | |
class JSONNull(JSONType): | |
def __repr__(self): | |
return "null" | |
class JSONUndefined(JSONType): | |
def __repr__(self): | |
return "undefined" | |
def extract_json_structure(data): | |
if isinstance(data, dict): | |
obj = JSONObject() | |
for key, value in data.items(): | |
obj.fields[key] = extract_json_structure(value) | |
# is the object actually a map? | |
field_types = JSONArray() | |
for field_type in obj.fields.values(): | |
field_types.merge(JSONArray([field_type])) | |
if len(field_types.elements) == 1: | |
return JSONMap(field_types.elements[0]) | |
return obj | |
elif isinstance(data, list): | |
arr = JSONArray() | |
for item in data: | |
element = extract_json_structure(item) | |
arr.merge(JSONArray([element])) | |
return arr | |
elif isinstance(data, str): | |
return JSONString() | |
elif isinstance(data, int): | |
return JSONInteger() | |
elif isinstance(data, float): | |
return JSONFloat() | |
elif isinstance(data, bool): | |
return JSONBool() | |
elif data is None: | |
return JSONNull() | |
else: | |
return JSONUndefined() | |
json_data = sys.stdin.read() | |
data = json.loads(json_data) | |
structure = extract_json_structure(data) | |
print(structure) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment