jss367 · September 23, 2017 07:39
diff --git a/sorting_parts_of_speech.py b/sorting_parts_of_speech.py
 # Let's make a single function to determine the parts of speech

 import re
 import nltk
 import os
 #from collections import Counter # Is this used?

 # First we break the text into tokens
 def tokinze_text(raw_text):
    tokens = nltk.word_tokenize(raw_text)
    return tokens
 tokens = tokinze_text(text)

 def mytagger(tokens):
    '''This function inputs tokens'''
    tags = nltk.pos_tag(tokens)
    return tags

 tagged = mytagger(tokens)

 # Note that IN can be either a preposition or a conjunction, for now we're going to list it with the prepositions
 common_noun_pos = ['NN', 'NNS']
 common_nouns = []
 verb_pos = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
 verbs=[]
 adjective_pos = ['JJ', 'JJR', 'JJS']
 adjectives = []
 pronoun_pos = ['PRP', 'PRP$', 'WP', 'WP$']
 pronouns = []
 adverb_pos = ['RB', 'RBR', 'RBS', 'WRB']
 adverbs = []
 proper_noun_pos = ['NNP', 'NNPS']
 proper_nouns = []
 conjunction_pos = ['CC']
 conjunctions = []
 preposition_pos = ['IN', 'TO']
 prepositions = []
 interjection_pos = ['UH']
 interjections = []
 modal_pos = ['MD'] # But these are also verbs, so let's make sure they show up as such
 modals = []
 tagged_other_pos = ['CD', 'DT', 'EX', 'FW', 'LS', 'PDT', 'POS', 'RP', 'SYM', 'WDT']
 tagged_others = []
 other = []

 for idx, token in enumerate(tagged):
    if token[1] in common_noun_pos:
        common_nouns.append(token)
    elif token[1] in verb_pos:
        verbs.append(token)
    elif token[1] in adjective_pos:
        adjectives.append(token)
    elif token[1] in pronoun_pos:
        pronouns.append(token)
    elif token[1] in adverb_pos:
        adverbs.append(token)
    elif token[1] in proper_noun_pos:
        proper_nouns.append(token)
    elif token[1] in conjunction_pos:
        conjunctions.append(token)
    elif token[1] in preposition_pos:
        prepositions.append(token)
    elif token[1] in interjection_pos:
        interjections.append(token)
    elif token[1] in modal_pos:
        modals.append(token)
    elif token[1] in tagged_other_pos:
        tagged_others.append(token)
    else:
        other.append(token)
    

 parts_of_speech = [common_nouns, verbs, adjectives, pronouns, adverbs, proper_nouns, conjunctions, prepositions, interjections, modals]
   
 # Apped modals to verbs
 # Create nouns that is both proper nouns and common nouns
	# Let's make a single function to determine the parts of speech

	import re
	import nltk
	import os
	#from collections import Counter # Is this used?

	# First we break the text into tokens
	def tokinze_text(raw_text):
	tokens = nltk.word_tokenize(raw_text)
	return tokens
	tokens = tokinze_text(text)

	def mytagger(tokens):
	'''This function inputs tokens'''
	tags = nltk.pos_tag(tokens)
	return tags

	tagged = mytagger(tokens)

	# Note that IN can be either a preposition or a conjunction, for now we're going to list it with the prepositions
	common_noun_pos = ['NN', 'NNS']
	common_nouns = []
	verb_pos = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
	verbs=[]
	adjective_pos = ['JJ', 'JJR', 'JJS']
	adjectives = []
	pronoun_pos = ['PRP', 'PRP$', 'WP', 'WP$']
	pronouns = []
	adverb_pos = ['RB', 'RBR', 'RBS', 'WRB']
	adverbs = []
	proper_noun_pos = ['NNP', 'NNPS']
	proper_nouns = []
	conjunction_pos = ['CC']
	conjunctions = []
	preposition_pos = ['IN', 'TO']
	prepositions = []
	interjection_pos = ['UH']
	interjections = []
	modal_pos = ['MD'] # But these are also verbs, so let's make sure they show up as such
	modals = []
	tagged_other_pos = ['CD', 'DT', 'EX', 'FW', 'LS', 'PDT', 'POS', 'RP', 'SYM', 'WDT']
	tagged_others = []
	other = []

	for idx, token in enumerate(tagged):
	if token[1] in common_noun_pos:
	common_nouns.append(token)
	elif token[1] in verb_pos:
	verbs.append(token)
	elif token[1] in adjective_pos:
	adjectives.append(token)
	elif token[1] in pronoun_pos:
	pronouns.append(token)
	elif token[1] in adverb_pos:
	adverbs.append(token)
	elif token[1] in proper_noun_pos:
	proper_nouns.append(token)
	elif token[1] in conjunction_pos:
	conjunctions.append(token)
	elif token[1] in preposition_pos:
	prepositions.append(token)
	elif token[1] in interjection_pos:
	interjections.append(token)
	elif token[1] in modal_pos:
	modals.append(token)
	elif token[1] in tagged_other_pos:
	tagged_others.append(token)
	else:
	other.append(token)


	parts_of_speech = [common_nouns, verbs, adjectives, pronouns, adverbs, proper_nouns, conjunctions, prepositions, interjections, modals]

	# Apped modals to verbs
	# Create nouns that is both proper nouns and common nouns