Last active
December 21, 2015 04:39
-
-
Save gustavofonseca/6251218 to your computer and use it in GitHub Desktop.
codificar objetos geradores em Json, sem expandi-los em memória
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Copyright (c) 2013 Python Software Foundation; All Rights Reserved | |
Adds capabilities for encoding generator objects as JSON arrays. | |
""" | |
import json | |
import types | |
INFINITY = json.encoder.INFINITY | |
FLOAT_REPR = json.encoder.FLOAT_REPR | |
encode_basestring_ascii = json.encoder.encode_basestring_ascii | |
encode_basestring = json.encoder.encode_basestring | |
class GeneratorJSONEncoder(json.JSONEncoder): | |
def iterencode(self, o, _one_shot=False): | |
try: | |
it = super(GeneratorJSONEncoder, self).iterencode( | |
o, _one_shot=_one_shot) | |
except TypeError: | |
# lots of code repetition caused by bad design | |
markers = {} if self.check_circular else None | |
_encoder = encode_basestring_ascii if self.ensure_ascii else encode_basestring | |
if self.encoding != 'utf-8': | |
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): | |
if isinstance(o, str): | |
o = o.decode(_encoding) | |
return _orig_encoder(o) | |
def floatstr(o, allow_nan=self.allow_nan, | |
_repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): | |
# Check for specials. Note that this type of test is processor | |
# and/or platform-specific, so do tests which don't depend on the | |
# internals. | |
if o != o: | |
text = 'NaN' | |
elif o == _inf: | |
text = 'Infinity' | |
elif o == _neginf: | |
text = '-Infinity' | |
else: | |
return _repr(o) | |
if not allow_nan: | |
raise ValueError( | |
"Out of range float values are not JSON compliant: " + | |
repr(o)) | |
return text | |
_iterencode = _make_geniterencode( | |
markers, self.default, _encoder, self.indent, floatstr, | |
self.key_separator, self.item_separator, self.sort_keys, | |
self.skipkeys, _one_shot) | |
it = _iterencode(o, 0) | |
return it | |
def _make_geniterencode(markers, _default, _encoder, _indent, _floatstr, | |
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, | |
## HACK: hand-optimized bytecode; turn globals into locals | |
ValueError=ValueError, | |
basestring=basestring, | |
dict=dict, | |
float=float, | |
id=id, | |
int=int, | |
isinstance=isinstance, | |
list=list, | |
long=long, | |
str=str, | |
tuple=tuple, | |
): | |
def _iterencode_list(lst, _current_indent_level): | |
if not lst: | |
yield '[]' | |
return | |
if markers is not None: | |
markerid = id(lst) | |
if markerid in markers: | |
raise ValueError("Circular reference detected") | |
markers[markerid] = lst | |
buf = '[' | |
if _indent is not None: | |
_current_indent_level += 1 | |
newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | |
separator = _item_separator + newline_indent | |
buf += newline_indent | |
else: | |
newline_indent = None | |
separator = _item_separator | |
first = True | |
for value in lst: | |
if first: | |
first = False | |
else: | |
buf = separator | |
if isinstance(value, basestring): | |
yield buf + _encoder(value) | |
elif value is None: | |
yield buf + 'null' | |
elif value is True: | |
yield buf + 'true' | |
elif value is False: | |
yield buf + 'false' | |
elif isinstance(value, (int, long)): | |
yield buf + str(value) | |
elif isinstance(value, float): | |
yield buf + _floatstr(value) | |
else: | |
yield buf | |
if isinstance(value, (list, tuple)): | |
chunks = _iterencode_list(value, _current_indent_level) | |
elif isinstance(value, dict): | |
chunks = _iterencode_dict(value, _current_indent_level) | |
else: | |
chunks = _iterencode(value, _current_indent_level) | |
for chunk in chunks: | |
yield chunk | |
if newline_indent is not None: | |
_current_indent_level -= 1 | |
yield '\n' + (' ' * (_indent * _current_indent_level)) | |
yield ']' | |
if markers is not None: | |
del markers[markerid] | |
def _iterencode_dict(dct, _current_indent_level): | |
if not dct: | |
yield '{}' | |
return | |
if markers is not None: | |
markerid = id(dct) | |
if markerid in markers: | |
raise ValueError("Circular reference detected") | |
markers[markerid] = dct | |
yield '{' | |
if _indent is not None: | |
_current_indent_level += 1 | |
newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | |
item_separator = _item_separator + newline_indent | |
yield newline_indent | |
else: | |
newline_indent = None | |
item_separator = _item_separator | |
first = True | |
if _sort_keys: | |
items = sorted(dct.items(), key=lambda kv: kv[0]) | |
else: | |
items = dct.iteritems() | |
for key, value in items: | |
if isinstance(key, basestring): | |
pass | |
# JavaScript is weakly typed for these, so it makes sense to | |
# also allow them. Many encoders seem to do something like this. | |
elif isinstance(key, float): | |
key = _floatstr(key) | |
elif key is True: | |
key = 'true' | |
elif key is False: | |
key = 'false' | |
elif key is None: | |
key = 'null' | |
elif isinstance(key, (int, long)): | |
key = str(key) | |
elif _skipkeys: | |
continue | |
else: | |
raise TypeError("key " + repr(key) + " is not a string") | |
if first: | |
first = False | |
else: | |
yield item_separator | |
yield _encoder(key) | |
yield _key_separator | |
if isinstance(value, basestring): | |
yield _encoder(value) | |
elif value is None: | |
yield 'null' | |
elif value is True: | |
yield 'true' | |
elif value is False: | |
yield 'false' | |
elif isinstance(value, (int, long)): | |
yield str(value) | |
elif isinstance(value, float): | |
yield _floatstr(value) | |
else: | |
if isinstance(value, (list, tuple)): | |
chunks = _iterencode_list(value, _current_indent_level) | |
elif isinstance(value, dict): | |
chunks = _iterencode_dict(value, _current_indent_level) | |
else: | |
chunks = _iterencode(value, _current_indent_level) | |
for chunk in chunks: | |
yield chunk | |
if newline_indent is not None: | |
_current_indent_level -= 1 | |
yield '\n' + (' ' * (_indent * _current_indent_level)) | |
yield '}' | |
if markers is not None: | |
del markers[markerid] | |
def _iterencode(o, _current_indent_level): | |
if isinstance(o, basestring): | |
yield _encoder(o) | |
elif o is None: | |
yield 'null' | |
elif o is True: | |
yield 'true' | |
elif o is False: | |
yield 'false' | |
elif isinstance(o, (int, long)): | |
yield str(o) | |
elif isinstance(o, float): | |
yield _floatstr(o) | |
elif isinstance(o, (list, tuple, types.GeneratorType)): | |
for chunk in _iterencode_list(o, _current_indent_level): | |
yield chunk | |
elif isinstance(o, dict): | |
for chunk in _iterencode_dict(o, _current_indent_level): | |
yield chunk | |
else: | |
if markers is not None: | |
markerid = id(o) | |
if markerid in markers: | |
raise ValueError("Circular reference detected") | |
markers[markerid] = o | |
o = _default(o) | |
for chunk in _iterencode(o, _current_indent_level): | |
yield chunk | |
if markers is not None: | |
del markers[markerid] | |
return _iterencode | |
import unittest | |
class GeneratorJSONEncoderTests(unittest.TestCase): | |
def test_list_of_ints(self): | |
def make_gen(): | |
for i in range(5): | |
yield i | |
self.assertEqual(json.dumps(make_gen(), cls=GeneratorJSONEncoder), | |
'[0, 1, 2, 3, 4]') | |
def test_normal_list_of_ints(self): | |
self.assertEqual(json.dumps([0, 1, 2, 3, 4], cls=GeneratorJSONEncoder), | |
'[0, 1, 2, 3, 4]') | |
def test_list_of_generators_of_ints(self): | |
def make_gen(): | |
for i in range(5): | |
yield i | |
l = (make_gen() for i in range(3)) | |
self.assertEqual(json.dumps(l, cls=GeneratorJSONEncoder), | |
'[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment