Last active
February 1, 2024 20:01
-
-
Save AlmightyOatmeal/eadde0393f3a04c6bc7a618f93ca240e to your computer and use it in GitHub Desktop.
Pretty-print a dictionary/list structure in JSON while having the option to use a custom JSON encoder to help with special objects that don't support serialization, such as converting 'datetime' objects to an ISO-formatted string.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import decimal | |
import json | |
import re | |
class CustomJSONEncoder(json.JSONEncoder): | |
"""Custom JSON encoder that does things that shouldn't need to be done.""" | |
def default(self, obj): | |
"""Overrides the default serialization of JSONEncoder then calls the JSONEncoder default() method. | |
:param obj: Object to serialize. | |
:type obj: object | |
:return: json.JSONEncoder.default() object. | |
:rtype: instance | |
""" | |
try: | |
if isinstance(obj, (datetime.datetime, datetime.time, datetime.date)): | |
return obj.isoformat() | |
if isinstance(obj, decimal.Decimal): | |
s = str(obj) | |
if '.' in s: | |
return float(s) | |
else: | |
return int(s) | |
iterable = iter(obj) | |
except TypeError: | |
pass | |
else: | |
return list(iterable) | |
return json.JSONEncoder.default(self, obj) | |
def json_string_hook(obj): | |
"""JSON deserializer helper to ensure values are converted to strings instead of native datatypes due | |
to data inconsistencies. | |
Current behavior: | |
- Convert all non-iterable values to strings. | |
- Exclude values where the key contains the word 'date'. | |
:param obj: json.loads() dict | |
:type obj: dict | |
:return: Updated dictionary | |
:rtype: dict | |
""" | |
obj_d = dict(obj) | |
# return {k: str(v) if isinstance(v, bool) else v for k, v in obj_d.items()} | |
return {k: str(v) if 'date' not in str(k).lower() and not hasattr(v, '__iter__') else v for k, v in obj_d.items()} | |
def json_pretty(data, encoder=CustomJSONEncoder): | |
"""Converts Python dict or list/set/array objects to a pretty-printed JSON string. | |
:param data: Python iter object like dict, list, set, array, tuple, etc. | |
:type data: dict, list, set, array, tuple | |
:param encoder: (optional) Custom JSON encoder class that's an extension of `json.JSONEncoder`. | |
(default: CustomJSONEncoder) | |
:type encoder: json.JSONEncoder | |
:return: Pretty-printed JSON string. | |
:rtype: str | |
""" | |
return json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '), ensure_ascii=True, cls=encoder) | |
def json_min(data, encoder=CustomJSONEncoder): | |
"""Converts Python dict or list/set/array objects to a minified JSON string. | |
:param data: Python iter object like dict, list, set, array, tuple, etc. | |
:type data: dict, list, set, array, tuple | |
:param encoder: (optional) Custom JSON encoder class that's an extension of `json.JSONEncoder`. | |
(default: CustomJSONEncoder) | |
:type encoder: json.JSONEncoder | |
:return: Minified JSON string. | |
:rtype: str | |
""" | |
return json.dumps(data, separators=(',', ":"), cls=encoder) | |
def fix_broken_json(input_str): | |
"""Fix broken JSON quotes... | |
:param input_str: Broken JSON string. | |
:type input_str: str | |
:return: Fixed JSON string | |
:rtype: str | |
""" | |
# <MOVED INSIDE FUNCTION> | |
# Move this stuff out of the function because it doesn't need to be assigned and compiled every. single. time. | |
# It's just here for the sake of organization and keeping things somewhat organized. | |
# Use sets `()` instead of lists `[]` because of set hashtables which improves performance over lists. Set's need | |
# to have more than one value so some may have a comma without an additional value just to appease the Python gods. | |
expected_chars = { | |
"[": (",", "]"), | |
"]": ("[", ","), | |
"{": (":",), | |
"}": (",", "{", "]"), | |
":": (",", "}"), | |
",": (":", "{", "}", "[", "]"), | |
} | |
double_quote = '"' | |
# Backslash needs to be escaped otherwise Python thinks it's escaping the single quote. | |
backslash = '\\' | |
# Precompile the regular expression (which is why it's better outside of this function) | |
regex_nonwhite = re.compile(r'\S') | |
# </MOVED INSIDE FUNCTION> | |
output_str = '' | |
in_string = False | |
prev = None | |
prev_nonwhite_nonquote = None | |
# Iterate over string, letter by letter, with character position. | |
for char_pos, char in enumerate(input_str): | |
if char is double_quote and prev is not backslash: | |
if in_string: | |
# If we're already inside a quoted string and if the next non-whitespace character is an expected one, | |
# then we have exited the quoted string. Otherwise, escape the quote. | |
nonwhite_char = regex_nonwhite.search(input_str, pos=char_pos+1).group() | |
if nonwhite_char in expected_chars.get(prev_nonwhite_nonquote, ''): | |
in_string = False | |
else: | |
output_str += backslash | |
else: | |
in_string = True | |
elif not in_string and char.strip(): | |
# Previous non-whitespace non-quote character. | |
prev_nonwhite_nonquote = char | |
# Add character to the output string. | |
output_str += char | |
prev = char | |
return output_str | |
def sub_json_parser(obj): | |
"""Try to parse JSON values from a dictionary or list of dictionaries. | |
NOTE: This does not recursively go through and try to parse; this is currently setup to only do the root level | |
for things such as JSON stored in a database table. | |
:param obj: Dictionary or list of dictionaries. | |
:type obj: dict or list | |
:return: dict or list | |
:rtype: dict or list | |
""" | |
# If it's a list of dictionaries then iterate and pass each dict into this function | |
if isinstance(obj, (list, set, tuple)): | |
# Create a new list of results because you can't manipulate a list you're iterating over. | |
new_obj = [] | |
for i in obj: | |
# E.T. phone home... | |
new_obj.append( | |
sub_json_parser(i) | |
) | |
return new_obj | |
# If it's a dict then iterate over the keys and values | |
elif isinstance(obj, dict): | |
# Create a new dictionary object because you can't edit dicts or lists while iterating over them without | |
# causing state inconsistencies. | |
new_obj = {} | |
for k, v in obj.items(): | |
# If the value is not already a string then keep the original value and move on. | |
if not isinstance(v, str): | |
new_obj[k] = v | |
continue | |
# If there is a curly brace in there then assume it might be JSON. | |
elif '{' in v: | |
# Try to parse the JSON as-is | |
try: | |
new_obj[k] = json.loads(v) | |
except Exception as err: | |
# Since the JSON might be broken then try to run it through the `fix_broken_json()` function | |
try: | |
new_obj[k] = json.loads(fix_broken_json(v)) | |
except Exception as errrrrrrr: | |
logger.debug(f'Unable to fix broken json key={k}, value={v}') | |
# If the JSON can't be fixed then keep the original value and move on. | |
new_obj[k] = v | |
else: | |
# Catch-all | |
new_obj[k] = v | |
return new_obj | |
# If it's not a list, set, tuple, or dict, then return the object untouched. | |
else: | |
return obj |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment