Created
September 4, 2015 08:33
-
-
Save jweinst1/7501747912973f80c190 to your computer and use it in GitHub Desktop.
SentenceObjects.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from strmodfunctions import * | |
| #linked list that links togeter a sentence. | |
| class Word (object): | |
| def __init__(self, first=None, rest=None): | |
| self.first = first | |
| self.rest = rest | |
| def get_data(self): | |
| return self.first | |
| def get_next(self): | |
| return self.rest | |
| def __getitem__(self, i): | |
| if i == 0: | |
| return self.first | |
| get = self | |
| while i > 0: | |
| get = get.rest | |
| i -= 1 | |
| if get == None: | |
| raise IndexError('The Sentence Index is Out of Range.') | |
| return get.first | |
| class Fuse_Link (object): | |
| def __init__(self, link1, link2): | |
| self.chain = link1 | |
| self.chain.rest = link2 | |
| class Sentence (object): | |
| #Holds Data for an indivudal sentence as a linked list. | |
| def __init__(self, list): | |
| list = self.remove_punc(list) | |
| index = len(list) - 1 | |
| self.words = Word(list[index]) | |
| while index > 0: | |
| index -= 1 | |
| self.words = Word(list[index], self.words) | |
| self.subject, self.predicate = [], [] | |
| self.object = [] | |
| self.word_set() | |
| def __repr__(self): | |
| read = [] | |
| current_word = self.words | |
| while current_word is not None: | |
| read.append(current_word.first) | |
| read.append(' ') | |
| current_word = current_word.rest | |
| read.pop() | |
| return ''.join(read) | |
| def __str__(self): | |
| read = [] | |
| current_word = self.words | |
| while current_word is not None: | |
| read.append(current_word.first) | |
| read.append(' ') | |
| current_word = current_word.rest | |
| read.pop() | |
| return ''.join(read) | |
| def __getitem__(self, i): | |
| if i == 0: | |
| return self.words.first | |
| get = self.words | |
| while i > 0: | |
| get = get.rest | |
| i -= 1 | |
| if get == None: | |
| raise IndexError #The Sentence Index is Out of Range. | |
| return get.first | |
| def __setitem__(self, i, tag): | |
| if i == 0: | |
| self.words.POS_tag = tag | |
| get = self.words | |
| while i > 0: | |
| get = get.rest | |
| i -= 1 | |
| if get == None: | |
| raise IndexError #The Sentence Index is Out of Range. | |
| self.words.POS_tag = tag | |
| def __len__(self): | |
| length = 0 | |
| current_word = self.words | |
| while current_word is not None: | |
| length += 1 | |
| current_word = current_word.rest | |
| return length | |
| def __contains__(self, other): | |
| if self.words.first == other: | |
| return True | |
| else: | |
| current_word = self.words | |
| while current_word is not None: | |
| if current_word.first == other: | |
| return True | |
| current_word = current_word.rest | |
| return False | |
| def remove_punc(self, text): | |
| if text[-1] == '?': | |
| self.question = True | |
| mod = list(text) | |
| mod.pop() | |
| mod = ''.join(mod) | |
| mod = mod.lower() | |
| return mod.split() | |
| else: | |
| self.question = False | |
| mod = list(text) | |
| mod.pop() | |
| mod = ''.join(mod) | |
| mod = mod.lower() | |
| return mod.split() | |
| def word_set(self): | |
| collection = set({}) | |
| current_word = self.words | |
| while current_word is not None: | |
| collection.add(current_word.first) | |
| current_word = current_word.rest | |
| self.wordset = collection | |
| def checktagnum(self, i): | |
| if i == 0: | |
| return self.words.POS_tag | |
| get = self.words | |
| while i > 0: | |
| get = get.rest | |
| i -= 1 | |
| if get == None: | |
| raise IndexError #The Sentence Index is Out of Range. | |
| return get.POS_tag | |
| class Sentence_Bank (object): | |
| #A container object that takes a text onject and breaks it into a bin of sentences. | |
| def __init__(self, text): | |
| self.text = text.split() | |
| self.bank, self.template = [], [] | |
| index = 0 | |
| while self.text != []: | |
| if self.template != []: | |
| if self.template[-1][-1] == '.': | |
| self.bank.append(Sentence(self.template)) | |
| self.template = [] | |
| else: | |
| self.template.append(self.text[0]) | |
| del self.text[0] | |
| else: | |
| self.template.append(self.text[0]) | |
| del self.text[0] | |
| self.bank.append(Sentence(self.template)) | |
| def __getitem__(self, i): | |
| return self.bank[i] | |
| def __len__(self): | |
| return len(self.bank) | |
| class Word_Ref (object): | |
| #used for part of speech tagging, and word look up. | |
| def __init__(self, selection): | |
| if selection == 'Verbs': | |
| wordfile = open('Verbs.txt', 'r') | |
| wordstring = wordfile.read() | |
| self.reference = wordstring.split() | |
| elif selection == 'Nouns': | |
| wordfile = open('Nouns.txt', 'r') | |
| wordstring = wordfile.read() | |
| self.reference = wordstring.split() | |
| elif selection == 'Adjectives': | |
| wordfile = open('Adjectives.txt', 'r') | |
| wordstring = wordfile.read() | |
| self.reference = wordstring.split() | |
| elif selection == 'Adverbs': | |
| wordfile = open('Adverbs.txt', 'r') | |
| wordstring = wordfile.read() | |
| self.reference = wordstring.split() | |
| else: | |
| raise ReferenceError('Must choose a valid reference library.') | |
| def __contains__(self, other): | |
| if other[-1] == ',': | |
| return other[:-1] in self.reference | |
| else: | |
| return other in self.reference | |
| def wordref_tester(text): | |
| nouns = Word_Ref('Nouns') | |
| verbs = Word_Ref('Verbs') | |
| adjectives = Word_Ref('Adjectives') | |
| adverbs = Word_Ref('Adverbs') | |
| mod = list(text) | |
| mod.pop() | |
| mod = ''.join(mod) | |
| mod = mod.lower() | |
| mod = mod.split() | |
| current_word = 0 | |
| while current_word < len(mod)-1: | |
| if mod[current_word] in nouns: | |
| current_word += 1 | |
| if mod[current_word] in verbs: | |
| current_word += 1 | |
| if mod[current_word] in adverbs: | |
| current_word += 1 | |
| if mod[current_word] in adjectives: | |
| current_word += 1 | |
| else: | |
| return mod[current_word] | |
| return 'No Leaks' | |
| class SP_Tagger (object): | |
| #tags the subject and predicate of a sentence. | |
| def __init__(self, target): | |
| assert target.__class__ == Sentence #Must take a Sentence Object. | |
| self.target = target | |
| self.subject_words = ['the', 'in', 'and', 'or', 'with', 'as', 'at', 'he', 'his', 'she', 'her', 'i', 'a'] | |
| self.prepredicate_words = ['if', 'in', 'either', 'when', 'whether', 'however'] | |
| self.predicate_words = ['do', 'did', 'was', 'is', 'are', 'were', 'can', 'cannot', 'wont', 'come', 'like', 'came', 'will'] | |
| def __repr__(self): | |
| if self.target.subject and self.target.predicate != []: | |
| return join_with_spaces(self.target.subject), join_with_spaces(self.target.predicate) | |
| else: | |
| return None | |
| def makelink(self, oldlink, newlink): | |
| if oldlink.rest == None: | |
| oldlink.rest = newlink | |
| return oldlink | |
| else: | |
| oldlink = oldlink.rest | |
| self.makelink(oldlink, newlink) | |
| def append_subject(self, string): | |
| self.target.subject.append(string) | |
| def append_predicate(self, string): | |
| self.target.predicate.append(string) | |
| def tag(self): | |
| writer_routes = ['subject', 'predicate', 'pre-predicate'] | |
| indicator = None | |
| nouns = Word_Ref('Nouns') | |
| verbs = Word_Ref('Verbs') | |
| adjectives = Word_Ref('Adjectives') | |
| adverbs = Word_Ref('Adverbs') | |
| current_word = 0 | |
| while current_word < len(self.target)-1: | |
| if len(self.target.subject) == 0 and len(self.target.predicate) == 0 : | |
| #if self.target[current_word] in self.prepredicate_words: | |
| #self.append_predicate(self.target[current_word]) | |
| #indicator = 'pre-predicate' | |
| #current_word += 1 | |
| if self.target[current_word] in self.subject_words: | |
| self.append_subject(self.target[current_word]) | |
| indicator = 'subject' | |
| current_word += 1 | |
| if self.target[current_word] in nouns: | |
| self.append_subject(self.target[current_word]) | |
| indicator = 'subject' | |
| current_word += 1 | |
| if self.target[current_word] in adjectives: | |
| self.append_subject(self.target[current_word]) | |
| indicator = 'subject' | |
| current_word += 1 | |
| #if self.target[current_word] in adverbs: | |
| #self.append_predicate(self.target[current_word]) | |
| #indicator = 'pre-predicate' | |
| #current_word += 1 | |
| #if self.target[current_word] in verbs: | |
| #self.append_predicate(self.target[current_word]) | |
| #indicator = 'pre-predicate' | |
| #current_word += 1 | |
| else: | |
| self.append_subject(self.target[current_word]) | |
| indicator = 'subject' | |
| current_word += 1 | |
| if indicator == 'pre-predicate': | |
| if self.target[current_word][-1] == ',': | |
| self.append_predicate(self.target[current_word]) | |
| indicator = 'subject' | |
| current_word += 1 | |
| else: | |
| self.append_predicate(self.target[current_word]) | |
| current_word += 1 | |
| if indicator == 'subject': | |
| if self.target[current_word] in nouns: | |
| self.append_subject(self.target[current_word]) | |
| current_word += 1 | |
| if self.target[current_word] in adjectives: | |
| self.append_subject(self.target[current_word]) | |
| current_word += 1 | |
| if self.target[current_word] in self.subject_words: | |
| self.append_subject(self.target[current_word]) | |
| current_word += 1 | |
| if self.target[current_word] in self.predicate_words: | |
| self.append_predicate(self.target[current_word]) | |
| indicator = 'predicate' | |
| current_word += 1 | |
| if self.target[current_word] in verbs: | |
| self.append_predicate(self.target[current_word]) | |
| indicator = 'predicate' | |
| current_word += 1 | |
| if self.target[current_word] in adverbs: | |
| self.append_predicate(self.target[current_word]) | |
| indicator = 'predicate' | |
| current_word += 1 | |
| else: | |
| print self.target[current_word] | |
| raise ReferenceError('Word not identifiable') | |
| if indicator == 'predicate': | |
| self.append_predicate(self.target[current_word]) | |
| current_word += 1 | |
| else: | |
| print self.target[current_word] | |
| print indicator | |
| raise ReferenceError('Incorrect set of conditionals being looped') | |
| return join_with_spaces(self.target.subject), join_with_spaces(self.target.predicate) | |
| #Tester Function for Subject_Predicate tagging | |
| def SP_tagtester(text): | |
| assert text[-1] == '.' #valid sentence text | |
| test = Sentence(text) | |
| tagger = SP_Tagger(test) | |
| return tagger.tag() | |
| class Compound_tagger (object): | |
| #Used for tagging if a sentence is compounded. | |
| def __init__(self): | |
| self.compound_chars = {',', ':', ';'} | |
| def __call__(self, words): | |
| assert words.__class__ == Sentence #Must be a Valid Sentence Object. | |
| if words.words.first[-1] in self.compound_chars: | |
| words.compound = True | |
| else: | |
| current_word = words.words | |
| while current_word is not None: | |
| if current_word.first[-1] in self.compound_chars: | |
| words.compound = True | |
| return None | |
| current_word = current_word.rest | |
| words.compound = False | |
| class Noun_tagger (object): | |
| def __init__(self): | |
| self.nouns = [] | |
| self.articles = {'the', 'a', 'an'} | |
| self.conditionals = {'if', 'either'} | |
| def __call__(self, statement): | |
| current_word = 0 | |
| while current_word < len(statement)-1: | |
| if statement[current_word] in self.conditionals: | |
| self.nouns.append(statement[current_word+1]) | |
| current_word += 1 | |
| if statement[current_word] in self.articles: | |
| if statement[current_word+2] not in self.articles: | |
| current_word += 1 | |
| else: | |
| self.nouns.append(statement[current_word+1]) | |
| current_word += 1 | |
| if statement[current_word-1] in self.articles: | |
| pass | |
| else: | |
| current_word += 1 | |
| statement.nouns = self.nouns | |
| self.nouns = [] | |
| return statement.nouns | |
| class Inspector_tagger (object): | |
| def __init__(self): | |
| self.inspectors = {} | |
| self.keys = ['is', 'was', 'are', 'were', 'if', 'can', 'do', 'does', 'did', 'have', 'has', 'not', 'could', 'would', 'should', 'whether', 'either', 'whenever', 'why', 'will', 'since', 'because', 'that', 'these', 'those', 'i', 'me', 'you', 'we', 'and', 'or', 'when'] | |
| def __call__(self, statement): | |
| statement.inspectors = {word:word in statement for word in self.keys} | |
| class Time_tagger (object): | |
| #tagging object meant to classify sentence data types | |
| def __init__(self): | |
| self.past = {'was', 'did', 'used', 'had', 'were', 'came'} | |
| self.future = {'will', 'should', 'could', 'whenever', 'would'} | |
| self.current = {'is', 'do', 'have', 'has', 'does', 'are', 'now'} | |
| def __call__(self, statement): | |
| if len(statement.wordset.intersection(self.past)) >= 1: | |
| statement.time = 'past' | |
| if len(statement.wordset.intersection(self.current)) >= 1: | |
| statement.time = 'current' | |
| if len(statement.wordset.intersection(self.future)) >= 1: | |
| statement.time = 'future' | |
| else: | |
| statement.time = None | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment