Skip to content

Instantly share code, notes, and snippets.

@jannismain
Last active October 25, 2024 15:50
Show Gist options
  • Save jannismain/e96666ca4f059c3e5bc28abb711b5c92 to your computer and use it in GitHub Desktop.
Save jannismain/e96666ca4f059c3e5bc28abb711b5c92 to your computer and use it in GitHub Desktop.
A JSON Encoder in Python, that puts small lists on single lines.
#!/usr/bin/env python3
from __future__ import annotations
import json
class CompactJSONEncoder(json.JSONEncoder):
"""A JSON Encoder that puts small containers on single lines."""
CONTAINER_TYPES = (list, tuple, dict)
"""Container datatypes include primitives or other containers."""
MAX_WIDTH = 70
"""Maximum width of a container that might be put on a single line."""
MAX_ITEMS = 10
"""Maximum number of items in container that might be put on single line."""
def __init__(self, *args, **kwargs):
# using this class without indentation is pointless
if kwargs.get("indent") is None:
kwargs["indent"] = 4
super().__init__(*args, **kwargs)
self.indentation_level = 0
def encode(self, o):
"""Encode JSON object *o* with respect to single line lists."""
if isinstance(o, (list, tuple)):
return self._encode_list(o)
if isinstance(o, dict):
return self._encode_object(o)
if isinstance(o, float): # Use scientific notation for floats
return format(o, "g")
return json.dumps(
o,
skipkeys=self.skipkeys,
ensure_ascii=self.ensure_ascii,
check_circular=self.check_circular,
allow_nan=self.allow_nan,
sort_keys=self.sort_keys,
indent=self.indent,
separators=(self.item_separator, self.key_separator),
default=self.default if hasattr(self, "default") else None,
)
def _encode_list(self, o):
if self._put_on_single_line(o):
return "[" + ", ".join(self.encode(el) for el in o) + "]"
self.indentation_level += 1
output = [self.indent_str + self.encode(el) for el in o]
self.indentation_level -= 1
return "[\n" + ",\n".join(output) + "\n" + self.indent_str + "]"
def _encode_object(self, o):
if not o:
return "{}"
# ensure keys are converted to strings
o = {str(k) if k is not None else "null": v for k, v in o.items()}
if self.sort_keys:
o = dict(sorted(o.items(), key=lambda x: x[0]))
if self._put_on_single_line(o):
return (
"{ "
+ ", ".join(
f"{json.dumps(k)}: {self.encode(el)}" for k, el in o.items()
)
+ " }"
)
self.indentation_level += 1
output = [
f"{self.indent_str}{json.dumps(k)}: {self.encode(v)}" for k, v in o.items()
]
self.indentation_level -= 1
return "{\n" + ",\n".join(output) + "\n" + self.indent_str + "}"
def iterencode(self, o, **kwargs):
"""Required to also work with `json.dump`."""
return self.encode(o)
def _put_on_single_line(self, o):
return (
self._primitives_only(o)
and len(o) <= self.MAX_ITEMS
and len(str(o)) - 2 <= self.MAX_WIDTH
)
def _primitives_only(self, o: list | tuple | dict):
if isinstance(o, (list, tuple)):
return not any(isinstance(el, self.CONTAINER_TYPES) for el in o)
elif isinstance(o, dict):
return not any(isinstance(el, self.CONTAINER_TYPES) for el in o.values())
@property
def indent_str(self) -> str:
if isinstance(self.indent, int):
return " " * (self.indentation_level * self.indent)
elif isinstance(self.indent, str):
return self.indentation_level * self.indent
else:
raise ValueError(
f"indent must either be of type int or str (is: {type(self.indent)})"
)
if __name__ == "__main__":
import sys
if "--example" in sys.argv:
data = {
"compact_object": {"first": "element", "second": 2},
"compact_list": ["first", "second"],
"long_list": [
"this",
"is",
"a",
"rather",
"long\nlist",
"and should be broken up because of its width",
],
"non_ascii": "汉语",
1: 2,
}
json.dump(data, sys.stdout, cls=CompactJSONEncoder, ensure_ascii=False)
exit()
json.dump(json.load(sys.stdin), sys.stdout, cls=CompactJSONEncoder)
@oesteban
Copy link

oesteban commented Dec 11, 2023

@jannismain, what is the license of this code? (just making sure I can reuse it)

@oesteban
Copy link

Note: sort_keys=True when calling dumps(..., sort_keys=True, indent=2, cls=CompactJSONEncoder) doesn't seem to work. Indent does work though.

@jannismain
Copy link
Author

@jannismain, what is the license of this code? (just making sure I can reuse it)

Thanks for asking! Feel free to reuse it in any way you want. Consider its license as MIT.

@jannismain
Copy link
Author

Note: sort_keys=True when calling dumps(..., sort_keys=True, indent=2, cls=CompactJSONEncoder) doesn't seem to work. Indent does work though.

I‘ll have a look at sorting when I get a chance. If somebody knows how to preserve the sorting feature with this way of custom encoding in the meantime, let me know 🙂

@PenutChen
Copy link

@jannismain maybe done by this way in _encode_object?

items = [(k, el) for k, el in o.items()]
if self.sort_keys:
    items = sorted(items)

# (... for k, el in items)

@oesteban
Copy link

For me, inserting the following branch in L31 does the trick:

            if self.sort_keys:
                o = dict(sorted(o.items()))

@Royal724
Copy link

Royal724 commented Jan 10, 2024

@jannismain
Excuse me, can you tell me how to use this? I have a small script that writes data from a binary file to json. How should I use it with your compact version?

My script:

def readInt(file, size):
    return int.from_bytes(file.read(size), "little")

with open("club.dat", 'rb') as datFile:
    datFile.read(8)
    size = readInt(datFile, 4)
    clubs = []
    for i in range(size):
        print(f"#{i + 1}/{size} - {hex(datFile.tell())}")
        club = {}
        club['uid'] = readInt(datFile, 4)
        club['player'] = [(readInt(datFile, 4)) for i in range(readInt(datFile, 2))]
        club['mainClubId'] = readInt(datFile, 4)
        club['isNational'] = readInt(datFile, 2)
        clubs.append(club)

with open("clubs.json", 'wt') as jsonFile:
    json.dump(clubs, jsonFile, ensure_ascii=False, indent=2)

@jannismain
Copy link
Author

jannismain commented Jan 10, 2024

@Royal724 Line 118 is what you are looking for:
json.dump(data, stream, cls=CompactJSONEncoder)

@Royal724
Copy link

Royal724 commented Jan 11, 2024

@@jannismain
Thank you for your response. I downloaded your module and crossed it to the same folder where my script is, then made the changes I wrote below, but it didn't work. My json file stays as it is, no compact format. I'm sorry, I'm making a mistake somewhere, but I can't figure out where, as I don't have much experience.
And created a new folder pycache in my folder and in it is the file CompactJSONEncoder.cpython-311.pyc

from CompactJSONEncoder import CompactJSONEncoder

json.dump(clubs, jsonFile, cls=CompactJSONEncoder)

@PenutChen
Copy link

@Royal724 maybe your json file have too long width or too many items, try to set MAX_ITEMS or MAX_WIDTH to a larger number.

@Royal724
Copy link

@PenutChen
Yes, that was the point, thank you.

@jannismain
Excellent work! Thank you so much!

@olin256
Copy link

olin256 commented Jan 25, 2024

There's a small bug: If the keys of a dict are integers of floats, they don't get converted to strings. Perhaps adding inner str() might be a solution…

@jannismain
Copy link
Author

There's a small bug: If the keys of a dict are integers of floats, they don't get converted to strings. Perhaps adding inner str() might be a solution…

You are right, I didn’t even think to treat the keys in any way.. will push a revision soon to address this! 👍

@jannismain
Copy link
Author

jannismain commented Jan 30, 2024

@olin256 I'm now converting keys to string to ensure the output produced is valid JSON.

@jannismain
Copy link
Author

@oesteban @PenutChen sort_keys=True is now supported 👍

@Xonxt
Copy link

Xonxt commented Sep 25, 2024

Little suggestion to also correctly process Numpy types:

    def encode(self, o):
        """Encode JSON object *o* with respect to single line lists."""
        if isinstance(o, (list, tuple)):
            return self._encode_list(o)
        elif isinstance(o, dict):
            return self._encode_object(o)
        if isinstance(o, float):  # Use scientific notation for floats
            return format(o, "g") 
        elif isinstance(o, np.integer):  # process numpy integers
            return self.encode(int(o))
        elif isinstance(o, np.floating): # process numpy floats
            return self.encode(float(o))
        elif isinstance(o, np.ndarray): # flatten numpy arrays as lists
            return self._encode_list(o.tolist())

        return json.dumps(
            o,
            skipkeys=self.skipkeys,
            ensure_ascii=self.ensure_ascii,
            check_circular=self.check_circular,
            allow_nan=self.allow_nan,
            sort_keys=self.sort_keys,
            indent=self.indent,
            separators=(self.item_separator, self.key_separator),
            default=self.default if hasattr(self, "default") else None,
        )

Also, personally, I would remove the format(o, "g") part, as it sometimes results in converting floats into exponential notation.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment