Created
October 2, 2019 18:25
-
-
Save blzzua/6f80a05ac9dd01923ea522af353e78a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# rison for python (parser only so far) | |
# see http://mjtemplate.org/examples/rison.html for more info | |
# | |
###################################################################### | |
# | |
# the rison parser is based on javascript openlaszlo-json: | |
# Author: Oliver Steele | |
# Copyright: Copyright 2006 Oliver Steele. All rights reserved. | |
# Homepage: http:#osteele.com/sources/openlaszlo/json | |
# License: MIT License. | |
# Version: 1.0 | |
# | |
# hacked by nix for use in uris | |
# ported to python by nix | |
# | |
# TODO | |
# | |
# switch to unicode | |
# fall through to simplejson if first char is not in '!(' - | |
# this allows code to use just one parser | |
# | |
# https://searchcode.com/codesearch/view/8429695/ | |
import os, sys, re | |
#import simplejson | |
simplejson = None | |
class ParserException(Exception): | |
pass | |
class Parser(object): | |
WHITESPACE = '' | |
#WHITESPACE = " \t\n\r\f" | |
# we divide the uri-safe glyphs into three sets | |
# <rison> and <reserved> classes are illegal in ids. | |
# <rison> - used by rison (possibly later) | |
# <reserved> - not common in strings, reserved | |
#not_idchar = "'!=:(),*@$;&"; | |
idchar_punctuation = '_-./~' | |
not_idchar = ''.join([c for c in (chr(i) for i in range(127)) | |
if not (c.isalnum() | |
or c in idchar_punctuation)]) | |
# additionally, we need to distinguish ids and numbers by first char | |
not_idstart = "-0123456789"; | |
# regexp string matching a valid id | |
idrx = ('[^' + not_idstart + not_idchar + | |
'][^' + not_idchar + ']*') | |
# regexp to check for valid rison ids | |
id_ok_re = re.compile('^' + idrx + '$', re.M) | |
# regexp to find the end of an id when parsing | |
next_id_re = re.compile(idrx, re.M) | |
def parse_json(self, str): | |
if len(str) > 0 and str[0] not in '!(': | |
return simplejson.loads(str) | |
return self.parse(str) | |
def parse(self, str): | |
self.string = str | |
self.index = 0 | |
value = self.readValue() | |
if self.next(): | |
raise ParserException("unable to parse rison string %r" % (str,)) | |
return value | |
def readValue(self): | |
c = self.next() | |
if c == '!': | |
return self.parse_bang() | |
if c == '(': | |
return self.parse_open_paren() | |
if c == "'": | |
return self.parse_single_quote() | |
if c in '-0123456789': | |
return self.parse_number() | |
# fell through table, parse as an id | |
s = self.string | |
i = self.index-1 | |
m = self.next_id_re.match(s, i) | |
if m: | |
id = m.group(0) | |
self.index = i + len(id) | |
return id # a string | |
if c: | |
raise ParserException("invalid character: '" + c + "'") | |
raise ParserException("empty expression") | |
def parse_array(self): | |
ar = [] | |
while 1: | |
c = self.next() | |
if c == ')': | |
return ar | |
if c is None: | |
raise ParserException("unmatched '!('") | |
if len(ar): | |
if c != ',': | |
raise ParserException("missing ','") | |
elif c == ',': | |
raise ParserException("extra ','") | |
else: | |
self.index -= 1 | |
n = self.readValue() | |
ar.append(n) | |
return ar | |
def parse_bang (self): | |
s = self.string | |
c = s[self.index] | |
self.index += 1 | |
if c is None: | |
raise ParserException('"!" at end of input') | |
if c not in self.bangs: | |
raise ParserException('unknown literal: "!' + c + '"') | |
x = self.bangs[c] | |
if callable(x): | |
return x(self) | |
return x | |
def parse_open_paren (self): | |
count = 0 | |
o = {} | |
while 1: | |
c = self.next() | |
if c == ')': | |
return o | |
if count: | |
if c != ',': | |
raise ParserException("missing ','") | |
elif c == ',': | |
raise ParserException("extra ','") | |
else: | |
self.index -= 1 | |
k = self.readValue() | |
if self.next() != ':': | |
raise ParserException("missing ':'") | |
v = self.readValue() | |
o[k] = v | |
count += 1 | |
def parse_single_quote(self): | |
s = self.string | |
i = self.index | |
start = i | |
segments = [] | |
while 1: | |
if i >= len(s): | |
raise ParserException('unmatched "\'"') | |
c = s[i] | |
i += 1 | |
if c == "'": | |
break | |
if c == '!': | |
if start < i-1: | |
segments.append(s[start:i-1]) | |
c = s[i] | |
i += 1 | |
if c in "!'": | |
segments.append(c) | |
else: | |
raise ParserException('invalid string escape: "!'+c+'"') | |
start = i | |
if start < i-1: | |
segments.append(s[start:i-1]) | |
self.index = i | |
return ''.join(segments) | |
# Also any number start (digit or '-') | |
def parse_number(self): | |
s = self.string | |
i = self.index | |
start = i-1 | |
state = 'int' | |
permittedSigns = '-' | |
transitions = { | |
'int+.': 'frac', | |
'int+e': 'exp', | |
'frac+e': 'exp' | |
} | |
while 1: | |
if i >= len(s): | |
i += 1 | |
break | |
c = s[i] | |
i += 1 | |
if '0' <= c and c <= '9': | |
continue | |
if permittedSigns.find(c) >= 0: | |
permittedSigns = '' | |
continue | |
state = transitions.get(state + '+' + c.lower(), None) | |
if state is None: | |
break | |
if state == 'exp': | |
permittedSigns = '-' | |
self.index = i - 1 | |
s = s[start:self.index] | |
if s == '-': | |
raise ParserException("invalid number") | |
if re.search('[.e]', s): | |
return float(s) | |
return int(s) | |
# return the next non-whitespace character, or undefined | |
def next(self): | |
l = len(self.string) | |
s = self.string | |
i = self.index | |
while 1: | |
if i == len(s): | |
return None | |
c = s[i] | |
i += 1 | |
if c not in self.WHITESPACE: | |
break | |
self.index = i | |
return c | |
bangs = { | |
't': True, | |
'f': False, | |
'n': None, | |
'(': parse_array | |
} | |
def loads(s): | |
return Parser().parse(s) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment