Created
February 21, 2015 21:18
-
-
Save hirokai/2fc4353cf6b28e5d2aac to your computer and use it in GitHub Desktop.
microjson by Patrick Hensley
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# microjson - Minimal JSON parser/emitter for use in standalone scripts. | |
# No warranty. Free to use/modify as you see fit. Trades speed for compactness. | |
# Send ideas, bugs, simplifications to http://github.com/phensley | |
# Copyright (c) 2010 Patrick Hensley <[email protected]> | |
# std | |
import math | |
import StringIO | |
import types | |
# the '_from_json_number' function returns either float or long. | |
__pychecker__ = 'no-returnvalues' | |
# character classes | |
WS = set([' ','\t','\r','\n','\b','\f']) | |
DIGITS = set([str(i) for i in range(0, 10)]) | |
NUMSTART = DIGITS.union(['.','-','+']) | |
NUMCHARS = NUMSTART.union(['e','E']) | |
ESC_MAP = {'n':'\n','t':'\t','r':'\r','b':'\b','f':'\f'} | |
REV_ESC_MAP = dict([(_v,_k) for _k,_v in ESC_MAP.items()] + [('"','"')]) | |
# error messages | |
E_BYTES = 'input string must be type str containing ASCII or UTF-8 bytes' | |
E_MALF = 'malformed JSON data' | |
E_TRUNC = 'truncated JSON data' | |
E_BOOL = 'expected boolean' | |
E_NULL = 'expected null' | |
E_LITEM = 'expected list item' | |
E_DKEY = 'expected key' | |
E_COLON = 'missing colon after key' | |
E_EMPTY = 'found empty string, not valid JSON data' | |
E_BADESC = 'bad escape character found' | |
E_UNSUPP = 'unsupported type "%s" cannot be JSON-encoded' | |
E_BADFLOAT = 'cannot emit floating point value "%s"' | |
class JSONError(Exception): | |
def __init__(self, msg, stm=None, pos=0): | |
if stm: | |
msg += ' at position %d, "%s"' % (pos, repr(stm.substr(pos, 32))) | |
Exception.__init__(self, msg) | |
class JSONStream(object): | |
# no longer inherit directly from StringIO, since we only want to | |
# expose the methods below and not allow direct access to the | |
# underlying stream. | |
def __init__(self, data): | |
self._stm = StringIO.StringIO(data) | |
@property | |
def pos(self): | |
return self._stm.pos | |
@property | |
def len(self): | |
return self._stm.len | |
def getvalue(self): | |
return self._stm.getvalue() | |
def skipspaces(self): | |
"post-cond: read pointer will be over first non-WS char" | |
self._skip(lambda c: c not in WS) | |
def _skip(self, stopcond): | |
while True: | |
c = self.peek() | |
if stopcond(c) or c == '': | |
break | |
self.next() | |
def next(self, size=1): | |
return self._stm.read(size) | |
def next_ord(self): | |
return ord(self.next()) | |
def peek(self): | |
if self.pos == self.len: | |
return '' | |
return self.getvalue()[self.pos] | |
def substr(self, pos, length): | |
return self.getvalue()[pos:pos+length] | |
def _decode_utf8(c0, stm): | |
c0 = ord(c0) | |
r = 0xFFFD # unicode replacement character | |
nc = stm.next_ord | |
# 110yyyyy 10zzzzzz | |
if (c0 & 0xE0) == 0xC0: | |
r = ((c0 & 0x1F) << 6) + (nc() & 0x3F) | |
# 1110xxxx 10yyyyyy 10zzzzzz | |
elif (c0 & 0xF0) == 0xE0: | |
r = ((c0 & 0x0F) << 12) + ((nc() & 0x3F) << 6) + (nc() & 0x3F) | |
# 11110www 10xxxxxx 10yyyyyy 10zzzzzz | |
elif (c0 & 0xF8) == 0xF0: | |
r = ((c0 & 0x07) << 18) + ((nc() & 0x3F) << 12) + \ | |
((nc() & 0x3F) << 6) + (nc() & 0x3F) | |
return unichr(r) | |
def decode_escape(c, stm): | |
# whitespace | |
v = ESC_MAP.get(c, None) | |
if v is not None: | |
return v | |
# plain character | |
elif c != 'u': | |
return c | |
# decode unicode escape \u1234 | |
sv = 12 | |
r = 0 | |
for _ in range(0, 4): | |
r |= int(stm.next(), 16) << sv | |
sv -= 4 | |
return unichr(r) | |
def _from_json_string(stm): | |
# skip over '"' | |
stm.next() | |
r = [] | |
while True: | |
c = stm.next() | |
if c == '': | |
raise JSONError(E_TRUNC, stm, stm.pos - 1) | |
elif c == '\\': | |
c = stm.next() | |
r.append(decode_escape(c, stm)) | |
elif c == '"': | |
return ''.join(r) | |
elif c > '\x7f': | |
r.append(_decode_utf8(c, stm)) | |
else: | |
r.append(c) | |
def _from_json_fixed(stm, expected, value, errmsg): | |
off = len(expected) | |
pos = stm.pos | |
if stm.substr(pos, off) == expected: | |
stm.next(off) | |
return value | |
raise JSONError(errmsg, stm, pos) | |
def _from_json_number(stm): | |
# Per rfc 4627 section 2.4 '0' and '0.1' are valid, but '01' and | |
# '01.1' are not, presumably since this would be confused with an | |
# octal number. This rule is not enforced. | |
is_float = 0 | |
saw_exp = 0 | |
pos = stm.pos | |
while True: | |
c = stm.peek() | |
if c not in NUMCHARS: | |
break | |
elif c == '-' and not saw_exp: | |
pass | |
elif c in ('.','e','E'): | |
is_float = 1 | |
if c in ('e','E'): | |
saw_exp = 1 | |
stm.next() | |
s = stm.substr(pos, stm.pos - pos) | |
if is_float: | |
return float(s) | |
return long(s) | |
def _from_json_list(stm): | |
# skip over '[' | |
stm.next() | |
result = [] | |
pos = stm.pos | |
while True: | |
stm.skipspaces() | |
c = stm.peek() | |
if c == '': | |
raise JSONError(E_TRUNC, stm, pos) | |
elif c == ']': | |
stm.next() | |
return result | |
elif c == ',': | |
stm.next() | |
result.append(_from_json_raw(stm)) | |
continue | |
elif not result: | |
# first item | |
result.append(_from_json_raw(stm)) | |
continue | |
else: | |
raise JSONError(E_MALF, stm, stm.pos) | |
def _from_json_dict(stm): | |
# skip over '{' | |
stm.next() | |
result = {} | |
expect_key = 0 | |
pos = stm.pos | |
while True: | |
stm.skipspaces() | |
c = stm.peek() | |
if c == '': | |
raise JSONError(E_TRUNC, stm, pos) | |
# end of dictionary, or next item | |
if c in ('}',','): | |
stm.next() | |
if expect_key: | |
raise JSONError(E_DKEY, stm, stm.pos) | |
if c == '}': | |
return result | |
expect_key = 1 | |
continue | |
# parse out a key/value pair | |
elif c == '"': | |
key = _from_json_string(stm) | |
stm.skipspaces() | |
c = stm.next() | |
if c != ':': | |
raise JSONError(E_COLON, stm, stm.pos) | |
stm.skipspaces() | |
val = _from_json_raw(stm) | |
result[key] = val | |
expect_key = 0 | |
continue | |
# unexpected character in middle of dict | |
raise JSONError(E_MALF, stm, stm.pos) | |
def _from_json_raw(stm): | |
while True: | |
stm.skipspaces() | |
c = stm.peek() | |
if c == '"': | |
return _from_json_string(stm) | |
elif c == '{': | |
return _from_json_dict(stm) | |
elif c == '[': | |
return _from_json_list(stm) | |
elif c == 't': | |
return _from_json_fixed(stm, 'true', True, E_BOOL) | |
elif c == 'f': | |
return _from_json_fixed(stm, 'false', False, E_BOOL) | |
elif c == 'n': | |
return _from_json_fixed(stm, 'null', None, E_NULL) | |
elif c in NUMSTART: | |
return _from_json_number(stm) | |
raise JSONError(E_MALF, stm, stm.pos) | |
def from_json(data): | |
""" | |
Converts 'data' which is UTF-8 (or the 7-bit pure ASCII subset) into | |
a Python representation. You must pass bytes to this in a str type, | |
not unicode. | |
""" | |
if not isinstance(data, str): | |
raise JSONError(E_BYTES) | |
if not data: | |
return None | |
stm = JSONStream(data) | |
return _from_json_raw(stm) | |
# JSON emitter | |
def _to_json_list(stm, lst): | |
seen = 0 | |
stm.write('[') | |
for elem in lst: | |
if seen: | |
stm.write(',') | |
seen = 1 | |
_to_json_object(stm, elem) | |
stm.write(']') | |
def _to_json_string(stm, buf): | |
stm.write('"') | |
for c in buf: | |
nc = REV_ESC_MAP.get(c, None) | |
if nc: | |
stm.write('\\' + nc) | |
elif ord(c) <= 0x7F: | |
# force ascii | |
stm.write(str(c)) | |
else: | |
stm.write('\\u%04x' % ord(c)) | |
stm.write('"') | |
def _to_json_dict(stm, dct): | |
seen = 0 | |
stm.write('{') | |
for key in dct.keys(): | |
if seen: | |
stm.write(',') | |
seen = 1 | |
val = dct[key] | |
if not type(key) in (types.StringType, types.UnicodeType): | |
key = str(key) | |
_to_json_string(stm, key) | |
stm.write(':') | |
_to_json_object(stm, val) | |
stm.write('}') | |
def _to_json_object(stm, obj): | |
if isinstance(obj, (types.ListType, types.TupleType)): | |
_to_json_list(stm, obj) | |
elif isinstance(obj, types.BooleanType): | |
if obj: | |
stm.write('true') | |
else: | |
stm.write('false') | |
elif isinstance(obj, types.FloatType): | |
if math.isnan(obj) or math.isinf(obj): | |
raise JSONError(E_BADFLOAT % obj) | |
stm.write("%s" % obj) | |
elif isinstance(obj, (types.IntType, types.LongType)): | |
stm.write("%d" % obj) | |
elif isinstance(obj, types.NoneType): | |
stm.write('null') | |
elif isinstance(obj, (types.StringType, types.UnicodeType)): | |
_to_json_string(stm, obj) | |
elif hasattr(obj, 'keys') and hasattr(obj, '__getitem__'): | |
_to_json_dict(stm, obj) | |
# fall back to implicit string conversion. | |
elif hasattr(obj, '__unicode__'): | |
_to_json_string(stm, obj.__unicode__()) | |
elif hasattr(obj, '__str__'): | |
_to_json_string(stm, obj.__str__()) | |
else: | |
raise JSONError(E_UNSUPP % type(obj)) | |
def to_json(obj): | |
""" | |
Converts 'obj' to an ASCII JSON string representation. | |
""" | |
stm = StringIO.StringIO('') | |
_to_json_object(stm, obj) | |
return stm.getvalue() | |
decode = from_json | |
encode = to_json | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment