blzzua · October 2, 2019 18:25
diff --git a/rison.py b/rison.py

 #
 #  rison for python (parser only so far)
 #    see http://mjtemplate.org/examples/rison.html for more info
 #

 ######################################################################
 #
 # the rison parser is based on javascript openlaszlo-json:
 #    Author: Oliver Steele
 #    Copyright: Copyright 2006 Oliver Steele.  All rights reserved.
 #    Homepage: http:#osteele.com/sources/openlaszlo/json
 #    License: MIT License.
 #    Version: 1.0
 #

 # hacked by nix for use in uris
 # ported to python by nix
 #
 #  TODO
 #
 #  switch to unicode
 #  fall through to simplejson if first char is not in '!(' -
 #   this allows code to use just one parser
 #

 # https://searchcode.com/codesearch/view/8429695/


 import os, sys, re
 #import simplejson
 simplejson = None

 class ParserException(Exception):
    pass

 class Parser(object):
    WHITESPACE = ''
    #WHITESPACE = " \t\n\r\f"

    # we divide the uri-safe glyphs into three sets
    # <rison> and <reserved> classes are illegal in ids.
    #   <rison> - used by rison (possibly later)
    #   <reserved> - not common in strings, reserved
    #not_idchar  = "'!=:(),*@$;&";

    idchar_punctuation = '_-./~'
    not_idchar  = ''.join([c for c in (chr(i) for i in range(127))
                           if not (c.isalnum()
                                   or c in idchar_punctuation)])

    # additionally, we need to distinguish ids and numbers by first char
    not_idstart = "-0123456789";

    # regexp string matching a valid id
    idrx = ('[^' + not_idstart + not_idchar + 
            '][^' + not_idchar + ']*')

    # regexp to check for valid rison ids
    id_ok_re = re.compile('^' + idrx + '$', re.M)

    # regexp to find the end of an id when parsing
    next_id_re = re.compile(idrx, re.M)

    def parse_json(self, str):
        if len(str) > 0 and str[0] not in '!(':
            return simplejson.loads(str)
        return self.parse(str)

    def parse(self, str):
        self.string = str
        self.index = 0

        value = self.readValue()
        if self.next():
            raise ParserException("unable to parse rison string %r" % (str,))
        return value
    
    def readValue(self):
        c = self.next()

        if c == '!':
            return self.parse_bang()
        if c == '(':
            return self.parse_open_paren()
        if c == "'":
            return self.parse_single_quote()
        if c in '-0123456789':
            return self.parse_number()

        # fell through table, parse as an id
        s = self.string
        i = self.index-1

        m = self.next_id_re.match(s, i)
        if m:
            id = m.group(0)
            self.index = i + len(id)
            return id  # a string

        if c:
            raise ParserException("invalid character: '" + c + "'")
        raise ParserException("empty expression")

    def parse_array(self):
        ar = []
        while 1:
            c = self.next()
            if c == ')':
                return ar

            if c is None:
                raise ParserException("unmatched '!('")

            if len(ar):
                if c != ',':
                    raise ParserException("missing ','")
            elif c == ',':
                raise ParserException("extra ','")
            else:
                self.index -= 1
            n = self.readValue()
            ar.append(n)

        return ar

    def parse_bang (self):
        s = self.string
        c = s[self.index]
        self.index += 1
        if c is None:
            raise ParserException('"!" at end of input')
        if c not in self.bangs:
            raise ParserException('unknown literal: "!' + c + '"')
        x = self.bangs[c]
        if callable(x):
            return x(self)

        return x


    def parse_open_paren (self):
        count = 0
        o = {}

        while 1:
            c = self.next()
            if c == ')':
                return o
            if count:
                if c != ',':
                    raise ParserException("missing ','")
            elif c == ',':
                raise ParserException("extra ','")
            else:
                self.index -= 1
            k = self.readValue()

            if self.next() != ':':
                raise ParserException("missing ':'")
            v = self.readValue()

            o[k] = v
            count += 1
        

    def parse_single_quote(self):
        s = self.string
        i = self.index
        start = i
        segments = []

        while 1:
            if i >= len(s):
                raise ParserException('unmatched "\'"')

            c = s[i]
            i += 1
            if c == "'":
                break

            if c == '!':
                if start < i-1:
                    segments.append(s[start:i-1])
                c = s[i]
                i += 1
                if c in "!'":
                    segments.append(c)
                else:
                    raise ParserException('invalid string escape: "!'+c+'"')
                
                start = i
            
        
        if start < i-1:
            segments.append(s[start:i-1])
        self.index = i
        return ''.join(segments)


    # Also any number start (digit or '-')
    def parse_number(self):
        s = self.string
        i = self.index
        start = i-1
        state = 'int'
        permittedSigns = '-'
        transitions = {
            'int+.': 'frac',
            'int+e': 'exp',
            'frac+e': 'exp'
            }
        
        while 1:
            if i >= len(s):
                i += 1
                break

            c = s[i]
            i += 1

            if '0' <= c and c <= '9':
                continue

            if permittedSigns.find(c) >= 0:
                permittedSigns = ''
                continue

            state = transitions.get(state + '+' + c.lower(), None)
            if state is None:
                break
            if state == 'exp':
                permittedSigns = '-'

        self.index = i - 1
        s = s[start:self.index]
        if s == '-':
            raise ParserException("invalid number")
        if re.search('[.e]', s):
            return float(s)
        return int(s)
    
    # return the next non-whitespace character, or undefined
    def next(self):
        l = len(self.string)
        s = self.string
        i = self.index

        while 1:
            if i == len(s):
                return None
            c = s[i]
            i += 1
            if c not in self.WHITESPACE:
                break

        self.index = i
        return c


    bangs = {
        't': True,
        'f': False,
        'n': None,
        '(': parse_array
        }


 def loads(s):
    return Parser().parse(s)

	#
	# rison for python (parser only so far)
	# see http://mjtemplate.org/examples/rison.html for more info
	#

	######################################################################
	#
	# the rison parser is based on javascript openlaszlo-json:
	# Author: Oliver Steele
	# Copyright: Copyright 2006 Oliver Steele. All rights reserved.
	# Homepage: http:#osteele.com/sources/openlaszlo/json
	# License: MIT License.
	# Version: 1.0
	#

	# hacked by nix for use in uris
	# ported to python by nix
	#
	# TODO
	#
	# switch to unicode
	# fall through to simplejson if first char is not in '!(' -
	# this allows code to use just one parser
	#

	# https://searchcode.com/codesearch/view/8429695/


	import os, sys, re
	#import simplejson
	simplejson = None

	class ParserException(Exception):
	pass

	class Parser(object):
	WHITESPACE = ''
	#WHITESPACE = " \t\n\r\f"

	# we divide the uri-safe glyphs into three sets
	# <rison> and <reserved> classes are illegal in ids.
	# <rison> - used by rison (possibly later)
	# <reserved> - not common in strings, reserved
	#not_idchar = "'!=:(),*@$;&";

	idchar_punctuation = '_-./~'
	not_idchar = ''.join([c for c in (chr(i) for i in range(127))
	if not (c.isalnum()
	or c in idchar_punctuation)])

	# additionally, we need to distinguish ids and numbers by first char
	not_idstart = "-0123456789";

	# regexp string matching a valid id
	idrx = ('[^' + not_idstart + not_idchar +
	'][^' + not_idchar + ']*')

	# regexp to check for valid rison ids
	id_ok_re = re.compile('^' + idrx + '$', re.M)

	# regexp to find the end of an id when parsing
	next_id_re = re.compile(idrx, re.M)

	def parse_json(self, str):
	if len(str) > 0 and str[0] not in '!(':
	return simplejson.loads(str)
	return self.parse(str)

	def parse(self, str):
	self.string = str
	self.index = 0

	value = self.readValue()
	if self.next():
	raise ParserException("unable to parse rison string %r" % (str,))
	return value

	def readValue(self):
	c = self.next()

	if c == '!':
	return self.parse_bang()
	if c == '(':
	return self.parse_open_paren()
	if c == "'":
	return self.parse_single_quote()
	if c in '-0123456789':
	return self.parse_number()

	# fell through table, parse as an id
	s = self.string
	i = self.index-1

	m = self.next_id_re.match(s, i)
	if m:
	id = m.group(0)
	self.index = i + len(id)
	return id # a string

	if c:
	raise ParserException("invalid character: '" + c + "'")
	raise ParserException("empty expression")

	def parse_array(self):
	ar = []
	while 1:
	c = self.next()
	if c == ')':
	return ar

	if c is None:
	raise ParserException("unmatched '!('")

	if len(ar):
	if c != ',':
	raise ParserException("missing ','")
	elif c == ',':
	raise ParserException("extra ','")
	else:
	self.index -= 1
	n = self.readValue()
	ar.append(n)

	return ar

	def parse_bang (self):
	s = self.string
	c = s[self.index]
	self.index += 1
	if c is None:
	raise ParserException('"!" at end of input')
	if c not in self.bangs:
	raise ParserException('unknown literal: "!' + c + '"')
	x = self.bangs[c]
	if callable(x):
	return x(self)

	return x


	def parse_open_paren (self):
	count = 0
	o = {}

	while 1:
	c = self.next()
	if c == ')':
	return o
	if count:
	if c != ',':
	raise ParserException("missing ','")
	elif c == ',':
	raise ParserException("extra ','")
	else:
	self.index -= 1
	k = self.readValue()

	if self.next() != ':':
	raise ParserException("missing ':'")
	v = self.readValue()

	o[k] = v
	count += 1


	def parse_single_quote(self):
	s = self.string
	i = self.index
	start = i
	segments = []

	while 1:
	if i >= len(s):
	raise ParserException('unmatched "\'"')

	c = s[i]
	i += 1
	if c == "'":
	break

	if c == '!':
	if start < i-1:
	segments.append(s[start:i-1])
	c = s[i]
	i += 1
	if c in "!'":
	segments.append(c)
	else:
	raise ParserException('invalid string escape: "!'+c+'"')

	start = i


	if start < i-1:
	segments.append(s[start:i-1])
	self.index = i
	return ''.join(segments)


	# Also any number start (digit or '-')
	def parse_number(self):
	s = self.string
	i = self.index
	start = i-1
	state = 'int'
	permittedSigns = '-'
	transitions = {
	'int+.': 'frac',
	'int+e': 'exp',
	'frac+e': 'exp'
	}

	while 1:
	if i >= len(s):
	i += 1
	break

	c = s[i]
	i += 1

	if '0' <= c and c <= '9':
	continue

	if permittedSigns.find(c) >= 0:
	permittedSigns = ''
	continue

	state = transitions.get(state + '+' + c.lower(), None)
	if state is None:
	break
	if state == 'exp':
	permittedSigns = '-'

	self.index = i - 1
	s = s[start:self.index]
	if s == '-':
	raise ParserException("invalid number")
	if re.search('[.e]', s):
	return float(s)
	return int(s)

	# return the next non-whitespace character, or undefined
	def next(self):
	l = len(self.string)
	s = self.string
	i = self.index

	while 1:
	if i == len(s):
	return None
	c = s[i]
	i += 1
	if c not in self.WHITESPACE:
	break

	self.index = i
	return c


	bangs = {
	't': True,
	'f': False,
	'n': None,
	'(': parse_array
	}


	def loads(s):
	return Parser().parse(s)