Created
October 4, 2011 11:54
-
-
Save adaptives/1261454 to your computer and use it in GitHub Desktop.
LPTHW Exercise 49
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class LexiconC(object): | |
def __init__(self): | |
#constants | |
# TODO: Can we make these as constants so they can be accessed from out side the class with just the class name ? | |
self.C_DIRECTION_WORDS = "direction" | |
self.C_VERBS = "verb" | |
self.C_STOP_WORDS = "stop" | |
self.C_NOUNS = "noun" | |
self.C_NUMBER = "number" | |
self.C_ERROR = "error" | |
#lexicon | |
self.direction_words = ['north', 'south', 'east', 'west', 'down', 'up', 'left', 'right', 'back'] | |
self.verbs = ['go', 'stop', 'kill', 'eat'] | |
self.stop_words = ['the', 'in', 'of', 'from', 'at', 'it'] | |
self.nouns = ['door', 'bear', 'princess', 'cabinet'] | |
def get_tuple(self, word): | |
ret_val = None | |
lword = word.lower() | |
if(lword in self.direction_words): | |
ret_val = (self.C_DIRECTION_WORDS, word) | |
elif(lword in self.verbs): | |
ret_val = (self.C_VERBS, word) | |
elif(lword in self.stop_words): | |
ret_val = (self.C_STOP_WORDS, word) | |
elif(lword in self.nouns): | |
ret_val = (self.C_NOUNS, word) | |
elif(self.convert_number(lword) != None): | |
ret_val = (self.C_NUMBER, self.convert_number(lword)) | |
else: | |
ret_val = (self.C_ERROR, word) | |
return ret_val | |
def convert_number(self, text): | |
try: | |
return int(text) | |
except ValueError: | |
return None | |
def scan(line): | |
the_lexicon = LexiconC() | |
ret_val = [] | |
words = line.split() | |
for word in words: | |
the_tuple = the_lexicon.get_tuple(word) | |
ret_val.append(the_tuple) | |
return ret_val |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nose.tools import * | |
from ex49 import lexicon | |
def setup(): | |
print "SETUP!" | |
def teardown(): | |
print "TEAR DOWN!" | |
def test_directions(): | |
assert_equal(lexicon.scan("north"), [('direction', 'north')]) | |
result = lexicon.scan("north south east") | |
assert_equal(result, [('direction', 'north'), | |
('direction', 'south'), | |
('direction', 'east')]) | |
def test_verbs(): | |
assert_equal(lexicon.scan("go"), [('verb', 'go')]) | |
result = lexicon.scan("go kill eat") | |
assert_equal(result, [('verb', 'go'), | |
('verb', 'kill'), | |
('verb', 'eat')]) | |
def test_stops(): | |
assert_equal(lexicon.scan("the"), [('stop', 'the')]) | |
result = lexicon.scan("the in of") | |
assert_equals(result, [('stop', 'the'), | |
('stop', 'in'), | |
('stop', 'of')]) | |
def test_nouns(): | |
assert_equal(lexicon.scan("bear"), [('noun', 'bear')]) | |
result = lexicon.scan("bear princess") | |
assert_equal(result, [('noun', 'bear'), | |
('noun', 'princess')]) | |
def test_numbers(): | |
assert_equal(lexicon.scan("1234"), [('number', 1234)]) | |
result = lexicon.scan("3 91234") | |
assert_equal(result, [('number', 3), | |
('number', 91234)]) | |
def test_errors(): | |
assert_equal(lexicon.scan("ASDFADFA"), [('error', "ASDFADFA")]) | |
result = lexicon.scan("bear IAS princess") | |
assert_equal(result, [('noun', 'bear'), | |
('error', 'IAS'), | |
('noun', 'princess')]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ParseError(Exception): | |
pass | |
class Sentence(object): | |
def __init__(self, subject, verb, object): | |
""" Each argument 'subject', 'verb', 'object' is a tuple containing the code and word""" | |
self.subject = subject | |
self.verb = verb | |
self.object = object | |
def peek(word_list): | |
""" | |
word_list is a list of tuples in the form (code, word). | |
This method returns the code of the first tuple in the list | |
""" | |
if(word_list): | |
word = word_list[0] | |
return word[0] | |
else: | |
return None | |
def match(word_list, expecting): | |
""" | |
Returns the first word if it matches the expecting type, else returns None. | |
In any case, the first word will be removed from the word_list | |
""" | |
ret_val = None | |
if(word_list): | |
word_tuple = word_list.pop(0) | |
if(word_tuple[0] == expecting): | |
ret_val = word_tuple | |
return ret_val | |
def skip(word_list, word_type): | |
if(word_list): | |
while(peek(word_list) == word_type): | |
match(word_list, word_type) | |
def parse_subject(word_list, subject): | |
skip(word_list, 'stop') | |
verb = match(word_list, 'verb') | |
if(verb == None): | |
raise ParseError("Expecting a verb") | |
skip(word_list, 'stop') | |
object = match(word_list, 'noun') | |
if(object == None): | |
raise ParseError("Expecting a noun") | |
return Sentence(subject, verb, object) | |
def parse_sentence(word_list): | |
""" Accepts a list of tuples, where each tuple contains (code, word)""" | |
skip(word_list, 'stop') | |
start = peek(word_list) | |
if(start == 'noun'): | |
subject = match(word_list, 'noun') | |
return parse_subject(word_list, subject) | |
elif(start == 'verb'): | |
subject = ('noun', 'player') | |
return parse_subject(word_list, subject) | |
else: | |
raise ParseError("Must start with either a noun or a verb ... not '%s'" % start) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nose.tools import * | |
from ex49 import sentence | |
from ex49 import lexicon | |
def setup(): | |
print "SETUP!" | |
def teardown(): | |
print "TEAR DOWN!" | |
def test_peek(): | |
#test a line with one word | |
line1 = "princess" | |
word_tuples1 = lexicon.scan(line1) | |
word_type1 = sentence.peek(word_tuples1) | |
assert_equal("noun", word_type1) | |
#test a line with multiple words | |
line2 = "princess ate the bear" | |
word_tuples2 = lexicon.scan(line2) | |
assert_equal("noun", sentence.peek(word_tuples2)) | |
def test_match(): | |
#test a line with one word | |
line1 = "princess" | |
word_tuples1 = lexicon.scan(line1) | |
assert_equal("princess", sentence.match(word_tuples1, 'noun')[1]) | |
#Here we should get a None for any type since there is nothing in the word_list | |
assert_equal(None, sentence.match(word_tuples1, 'noun')) | |
#test a line with multiple words | |
line2 = "princess eat the bear" | |
word_tuples2 = lexicon.scan(line2) | |
assert_equals(4, len(word_tuples2)) | |
assert_equal("princess", sentence.match(word_tuples2, "noun")[1]) | |
assert_equal("eat", sentence.match(word_tuples2, "verb")[1]) | |
assert_equal("the", sentence.match(word_tuples2, 'stop')[1]) | |
assert_equal("bear", sentence.match(word_tuples2, 'noun')[1]) | |
def test_skip(): | |
#test a line with one word | |
line1 = "princess" | |
word_tuples1 = lexicon.scan(line1) | |
sentence.skip(word_tuples1, "verb") | |
assert_equal(1, len(word_tuples1)) | |
sentence.skip(word_tuples1, 'noun') | |
assert_equal(0, len(word_tuples1)) | |
#test a line with multiple words | |
line2 = "princess eat the bear" | |
word_tuples2 = lexicon.scan(line2) | |
sentence.skip(word_tuples2, "verb") | |
assert_equal(4, len(word_tuples2)) | |
sentence.skip(word_tuples2, "noun") | |
assert_equal(3, len(word_tuples2)) | |
sentence.skip(word_tuples2, "verb") | |
assert_equal(2, len(word_tuples2)) | |
def test_parse_sentence(): | |
line = "princess eat the bear" | |
word_list = lexicon.scan(line) | |
parsed_sentence = sentence.parse_sentence(word_list) | |
assert_equal("princess", parsed_sentence.subject[1]) | |
assert_equal("eat", parsed_sentence.verb[1]) | |
assert_equal("bear", parsed_sentence.object[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment