-
-
Save jannismain/e96666ca4f059c3e5bc28abb711b5c92 to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python3 | |
| from __future__ import annotations | |
| import json | |
| class CompactJSONEncoder(json.JSONEncoder): | |
| """A JSON Encoder that puts small containers on single lines.""" | |
| CONTAINER_TYPES = (list, tuple, dict) | |
| """Container datatypes include primitives or other containers.""" | |
| MAX_WIDTH = 70 | |
| """Maximum width of a container that might be put on a single line.""" | |
| MAX_ITEMS = 10 | |
| """Maximum number of items in container that might be put on single line.""" | |
| def __init__(self, *args, **kwargs): | |
| # using this class without indentation is pointless | |
| if kwargs.get("indent") is None: | |
| kwargs["indent"] = 4 | |
| super().__init__(*args, **kwargs) | |
| self.indentation_level = 0 | |
| def encode(self, o): | |
| """Encode JSON object *o* with respect to single line lists.""" | |
| if isinstance(o, (list, tuple)): | |
| return self._encode_list(o) | |
| if isinstance(o, dict): | |
| return self._encode_object(o) | |
| return json.dumps( | |
| o, | |
| skipkeys=self.skipkeys, | |
| ensure_ascii=self.ensure_ascii, | |
| check_circular=self.check_circular, | |
| allow_nan=self.allow_nan, | |
| sort_keys=self.sort_keys, | |
| indent=self.indent, | |
| separators=(self.item_separator, self.key_separator), | |
| default=self.default if hasattr(self, "default") else None, | |
| ) | |
| def _encode_list(self, o): | |
| if self._put_on_single_line(o): | |
| return "[" + ", ".join(self.encode(el) for el in o) + "]" | |
| self.indentation_level += 1 | |
| output = [self.indent_str + self.encode(el) for el in o] | |
| self.indentation_level -= 1 | |
| return "[\n" + ",\n".join(output) + "\n" + self.indent_str + "]" | |
| def _encode_object(self, o): | |
| if not o: | |
| return "{}" | |
| # ensure keys are converted to strings | |
| o = {str(k) if k is not None else "null": v for k, v in o.items()} | |
| if self.sort_keys: | |
| o = dict(sorted(o.items(), key=lambda x: x[0])) | |
| if self._put_on_single_line(o): | |
| return ( | |
| "{ " | |
| + ", ".join( | |
| f"{self.encode(k)}: {self.encode(el)}" for k, el in o.items() | |
| ) | |
| + " }" | |
| ) | |
| self.indentation_level += 1 | |
| output = [ | |
| f"{self.indent_str}{self.encode(k)}: {self.encode(v)}" for k, v in o.items() | |
| ] | |
| self.indentation_level -= 1 | |
| return "{\n" + ",\n".join(output) + "\n" + self.indent_str + "}" | |
| def iterencode(self, o, **kwargs): | |
| """Required to also work with `json.dump`.""" | |
| return self.encode(o) | |
| def _put_on_single_line(self, o): | |
| return ( | |
| self._primitives_only(o) | |
| and len(o) <= self.MAX_ITEMS | |
| and len(str(o)) - 2 <= self.MAX_WIDTH | |
| ) | |
| def _primitives_only(self, o: list | tuple | dict): | |
| if isinstance(o, (list, tuple)): | |
| return not any(isinstance(el, self.CONTAINER_TYPES) for el in o) | |
| elif isinstance(o, dict): | |
| return not any(isinstance(el, self.CONTAINER_TYPES) for el in o.values()) | |
| @property | |
| def indent_str(self) -> str: | |
| if isinstance(self.indent, int): | |
| return " " * (self.indentation_level * self.indent) | |
| elif isinstance(self.indent, str): | |
| return self.indentation_level * self.indent | |
| else: | |
| raise ValueError( | |
| f"indent must either be of type int or str (is: {type(self.indent)})" | |
| ) | |
| if __name__ == "__main__": | |
| import sys | |
| if "--example" in sys.argv: | |
| data = { | |
| "compact_object": {"first": "element", "second": 2}, | |
| "compact_list": ["first", "second"], | |
| "long_list": [ | |
| "this", | |
| "is", | |
| "a", | |
| "rather", | |
| "long\nlist", | |
| "and should be broken up because of its width", | |
| ], | |
| "non_ascii_ๆฑ": "ๆฑ่ฏญ", | |
| 1: 2, | |
| } | |
| json.dump(data, sys.stdout, cls=CompactJSONEncoder, ensure_ascii=False) | |
| exit() | |
| json.dump(json.load(sys.stdin), sys.stdout, cls=CompactJSONEncoder) |
Awesome. I was going to implement something like this myself, but thought I'd check for an existing implementation first. This saved me so much time, and quite thorough job. Well done. ๐
@jannismain This saved me so much time, thanks a lot for the excellent work! ๐ And all you other guys for optimizing it ^^
Another minor bugfix:
f"{self.encode(k)}: {self.encode(el)}" for k, el in o.items() otherwise the key encoding is not correct (I had a non-ascii character in there)
Most json.dumps callas are missing at least ensure_ascii=self.ensure_ascii
This JSON data won't be formatted consistently:
{
"data1": [
27.135222728033753,
114.02096846633076,
26.074562556253937,
24.57196064623251
],
"data2": [
27.135222,
114.02094,
26.074565,
24.571964
]
}@senyai
hi, to keep the number of digits after the decimal comma, remove the lines 32-33:
if isinstance(o, float): # Use scientific notation for floats
return format(o, "g")and to have both lists be printed on just one line, increase the value of MAX_WIDTH.
In this case, you will have the output:
{
"data1": [27.135222728033753, 114.02096846633076, 26.074562556253937, 24.57196064623251],
"data2": [27.135222, 114.02094, 26.074565, 24.571964]
}Thanks @senyai @nikita-k0v and @chrismaes87, I have incorporated your suggestions.
Little suggestion to also correctly process Numpy types:
Also, personally, I would remove the
format(o, "g")part, as it sometimes results in converting floats into exponential notation.