dcluna · July 18, 2012 04:07
diff --git a/regexparser.py b/regexparser.py
 "Recursive-descent parser for Regexes, based on: http://matt.might.net/articles/parsing-regex-with-recursive-descent/ \
 Test project for Emacs + Python"

 class Parser():
    "Recursive descent regex parser"
    
    def __init__(self, regex):
        "Initializes the parser with the regex string"
        self.input = regex
        self.current = 0

    # Primitives

    def __peek__(self):
        "Peeks at next input"
        return self.input[self.current]

    def __consume__(self, input):
        "Consumes token if input is accepted"
        if self.__peek__() == input:
            self.current += 1
        else:
            raise ParserException('Expected \'' + self.__peek__() + '\', got \'' + input + '\'')

    def __next__(self):
        "Consumes token and returns it if successful"
        token = self.__peek__()
        self.__consume__(token)
        return token

    # Non-default primitive

    def __more__(self):
        "Checks if there is more input"
        return self.current != self.input.__len__()

    # Grammar:
    # <regex> ::= <term> '|' <regex>
    #          |  <term>
    
    # <term> ::= { <factor> }
    
    # <factor> ::= <base> { '*' }
    
    # <base> ::= <char>
    #         |  '\' <char>
    #         |  '(' <regex> ')'

    # The implementation is straightforward:
    
    def regex(self):
        "<regex> ::= <term> '|' <regex> \
                 |  <term>"
        term = self.term() # parses a term

        if self.__more__() and self.__peek__() == '|': # if term is followed by '|'
            self.__consume__('|')
            regex = self.regex()
            choice = Choice(term,regex)
            return choice
        else:
            return term

    def term(self):
        "<term> ::= { <factor> }"
        factor = [] # begins empty

        while self.__more__() and self.__peek__() != '|' and self.__peek__() != ')': # while there's still input and it's not a regex delimiter
            factor += [self.factor()]
            
        return factor

    def factor(self):
        "<factor> ::= <base> { '*' }"
        base = self.base()
        
        if self.__more__() and self.__peek__() == '*':
            self.__consume__('*')
            return Repetition(base)
        else:
            return base

    def base(self):
        "<base> ::= <char>              \
        |  '\' <char>                   \
        |  '(' <regex> ')'"

        if self.__peek__() == '\\':
            self.__consume__('\\')
            return '\\' + self.consumechar()
        elif self.__peek__() == '(':
            self.__consume__('(')
            regex = self.regex()
            self.__consume__(')')
            return regex
        else:
            return self.consumechar()

    def consumechar(self):
        "Expects an alphanumeric character"
        c = self.__next__()
        if not c.isalnum():
            raise ParserException('Expected alphanumeric, got ' + c + ' at ' + self.current)
        else:
            return c
        
    # def parse(self):
    #     acc = []
    #     while self.__more__():
    #         acc += [self.regex()]

    #     return acc
    
 class Regex():
    "Abstract class to represent a regex"
    
 class Choice(Regex):
    "Choice between a term and another regex"
    
    def __init__(self, regex1, regex2):
        "Holds 2 regexes"
        self.regex1 = regex1
        self.regex2 = regex2

 class Repetition(Regex):
    "Repetition between regex"
    
    def __init__(self, repetition):
        "Holds repetition"
        self.repetition = repetition

 class ParserException(Exception):
    "Thrown when a parser error occurs"
    
    def __init__(self, message):
        "Prints error message"
        print message
	"Recursive-descent parser for Regexes, based on: http://matt.might.net/articles/parsing-regex-with-recursive-descent/ \
	Test project for Emacs + Python"

	class Parser():
	"Recursive descent regex parser"

	def __init__(self, regex):
	"Initializes the parser with the regex string"
	self.input = regex
	self.current = 0

	# Primitives

	def __peek__(self):
	"Peeks at next input"
	return self.input[self.current]

	def __consume__(self, input):
	"Consumes token if input is accepted"
	if self.__peek__() == input:
	self.current += 1
	else:
	raise ParserException('Expected \'' + self.__peek__() + '\', got \'' + input + '\'')

	def __next__(self):
	"Consumes token and returns it if successful"
	token = self.__peek__()
	self.__consume__(token)
	return token

	# Non-default primitive

	def __more__(self):
	"Checks if there is more input"
	return self.current != self.input.__len__()

	# Grammar:
	# <regex> ::= <term> '\|' <regex>
	# \| <term>

	# <term> ::= { <factor> }

	# <factor> ::= <base> { '*' }

	# <base> ::= <char>
	# \| '\' <char>
	# \| '(' <regex> ')'

	# The implementation is straightforward:

	def regex(self):
	"<regex> ::= <term> '\|' <regex> \
	\| <term>"
	term = self.term() # parses a term

	if self.__more__() and self.__peek__() == '\|': # if term is followed by '\|'
	self.__consume__('\|')
	regex = self.regex()
	choice = Choice(term,regex)
	return choice
	else:
	return term

	def term(self):
	"<term> ::= { <factor> }"
	factor = [] # begins empty

	while self.__more__() and self.__peek__() != '\|' and self.__peek__() != ')': # while there's still input and it's not a regex delimiter
	factor += [self.factor()]

	return factor

	def factor(self):
	"<factor> ::= <base> { '*' }"
	base = self.base()

	if self.__more__() and self.__peek__() == '*':
	self.__consume__('*')
	return Repetition(base)
	else:
	return base

	def base(self):
	"<base> ::= <char> \
	\| '\' <char> \
	\| '(' <regex> ')'"

	if self.__peek__() == '\\':
	self.__consume__('\\')
	return '\\' + self.consumechar()
	elif self.__peek__() == '(':
	self.__consume__('(')
	regex = self.regex()
	self.__consume__(')')
	return regex
	else:
	return self.consumechar()

	def consumechar(self):
	"Expects an alphanumeric character"
	c = self.__next__()
	if not c.isalnum():
	raise ParserException('Expected alphanumeric, got ' + c + ' at ' + self.current)
	else:
	return c

	# def parse(self):
	# acc = []
	# while self.__more__():
	# acc += [self.regex()]

	# return acc

	class Regex():
	"Abstract class to represent a regex"

	class Choice(Regex):
	"Choice between a term and another regex"

	def __init__(self, regex1, regex2):
	"Holds 2 regexes"
	self.regex1 = regex1
	self.regex2 = regex2

	class Repetition(Regex):
	"Repetition between regex"

	def __init__(self, repetition):
	"Holds repetition"
	self.repetition = repetition

	class ParserException(Exception):
	"Thrown when a parser error occurs"

	def __init__(self, message):
	"Prints error message"
	print message
No results found