jelmervdl · August 23, 2017 11:49
diff --git a/parser.py b/parser.py
 from pprint import pprint

 class l(object):
    __slots__ = ('word',)
    def __init__(self, word):
        self.word = word

    def __repr__(self):
        return 'l({})'.format(self.word)

    def test(self, word):
        return self.word == word


 def is_literal(obj):
    return isinstance(obj, l)


 rules = [
    ('extended_claims', ['extended_claim']),
    ('extended_claims', ['extended_claim', l('and'), 'extended_claims']),
    ('extended_claim', ['claim', 'supports', 'attacks']),
    ('claim', [l('birds'), l('can'), l('fly')]),
    ('claim', [l('Tweety'), l('can'), l('fly')]),
    ('claim', [l('Tweety'), l('has'), l('wings')]),
    ('claim', [l('Tweety'), l('is'), l('a'), l('bird')]),
    ('claim', [l('Tweety'), l('is'), l('a'), l('penguin')]),
    ('supports', []),
    ('supports', ['support']),
    ('supports', ['support', l('and'), 'supports']),
    ('support', [l('because'), 'extended_claims']),
    ('attacks', []),
    ('attacks', ['attack']),
    ('attacks', ['attack', l('and'), 'attacks']),
    ('attack', [l('but'), 'extended_claims'])
 ]

 sentence = 'Tweety can fly because Tweety is a bird and birds can fly but Tweety is a penguin'

 words = sentence.split(' ')


 def find_rules(name):
    found = False
    for rule in rules:
        if rule[0] == name:
            found = True
            yield rule[1]
    if not found:
        raise Error('No rules for {}' .format(name))


 def parse(rule, words):
    for acc, rem_words in _parse(rule[1], words):
        if len(rem_words) == 0:
            yield (rule[0], acc)


 def _parse(rule, words):
    if len(rule) == 0:
        yield [], words

    elif is_literal(rule[0]):
        if len(words) == 0 or not rule[0].test(words[0]):
            return
        else:
            for subacc, words_rem in _parse(rule[1:], words[1:]):
                yield [l(words[0])] + subacc, words_rem

    else:
        for subrule in find_rules(rule[0]):
            for subacc, words_rem in _parse(subrule, words):
                for recacc, rec_words_rem in _parse(rule[1:], words_rem):
                    yield [(rule[0], subacc)] + recacc, rec_words_rem



 for n, parsed in enumerate(parse(rules[0], words)):
    print("Parse {}:".format(n))
    pprint(parsed)
	from pprint import pprint

	class l(object):
	__slots__ = ('word',)
	def __init__(self, word):
	self.word = word

	def __repr__(self):
	return 'l({})'.format(self.word)

	def test(self, word):
	return self.word == word


	def is_literal(obj):
	return isinstance(obj, l)


	rules = [
	('extended_claims', ['extended_claim']),
	('extended_claims', ['extended_claim', l('and'), 'extended_claims']),
	('extended_claim', ['claim', 'supports', 'attacks']),
	('claim', [l('birds'), l('can'), l('fly')]),
	('claim', [l('Tweety'), l('can'), l('fly')]),
	('claim', [l('Tweety'), l('has'), l('wings')]),
	('claim', [l('Tweety'), l('is'), l('a'), l('bird')]),
	('claim', [l('Tweety'), l('is'), l('a'), l('penguin')]),
	('supports', []),
	('supports', ['support']),
	('supports', ['support', l('and'), 'supports']),
	('support', [l('because'), 'extended_claims']),
	('attacks', []),
	('attacks', ['attack']),
	('attacks', ['attack', l('and'), 'attacks']),
	('attack', [l('but'), 'extended_claims'])
	]

	sentence = 'Tweety can fly because Tweety is a bird and birds can fly but Tweety is a penguin'

	words = sentence.split(' ')


	def find_rules(name):
	found = False
	for rule in rules:
	if rule[0] == name:
	found = True
	yield rule[1]
	if not found:
	raise Error('No rules for {}' .format(name))


	def parse(rule, words):
	for acc, rem_words in _parse(rule[1], words):
	if len(rem_words) == 0:
	yield (rule[0], acc)


	def _parse(rule, words):
	if len(rule) == 0:
	yield [], words

	elif is_literal(rule[0]):
	if len(words) == 0 or not rule[0].test(words[0]):
	return
	else:
	for subacc, words_rem in _parse(rule[1:], words[1:]):
	yield [l(words[0])] + subacc, words_rem

	else:
	for subrule in find_rules(rule[0]):
	for subacc, words_rem in _parse(subrule, words):
	for recacc, rec_words_rem in _parse(rule[1:], words_rem):
	yield [(rule[0], subacc)] + recacc, rec_words_rem



	for n, parsed in enumerate(parse(rules[0], words)):
	print("Parse {}:".format(n))
	pprint(parsed)