mertyildiran · April 19, 2016 23:48
diff --git a/word-class-parser.py b/word-class-parser.py
 #!/usr/bin/python

 import sys
 from subprocess import call
 import nltk
 from nltk import word_tokenize,sent_tokenize
 from nltk.tag.hunpos import HunposTagger

 def command(dict):
 	while(True):
 		outputstring = ""
 		line = dict.readline()
 		parts = line.split() # split line into parts
        	if len(parts) > 1:   # if at least 2 parts/columns
 			print parts[0]
 			outputstring = parts[0]
                        for x in parts[1::1]: outputstring += " " +  x
 			outputstring = outputstring[:-3]
 			outputstring += "\n"
 			tags = nltk. word_tokenize(parts[0])
 			hpt = HunposTagger(path_to_model='hunpos-1.0-linux/en_wsj.model',path_to_bin='hunpos-read-only/tagger.native',encoding='hunpos-1.0-linux/hunpos-tag')
 			n = 0
 			#print len(nltk.pos_tag(tags))
 			while n < len(nltk.pos_tag(tags)):
 				#print nltk.pos_tag(tags)[n][1]

                                if hpt.tag(tags)[n][1] == 'CC':
                                        text_file = open("dictionaries/conjunction", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> conjunction"

                                elif hpt.tag(tags)[n][1] == 'CD':
                                        text_file = open("dictionaries/numeral", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> numeral"

                                elif hpt.tag(tags)[n][1] == 'DT':
                                        text_file = open("dictionaries/determiner", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> determiner"

                                elif hpt.tag(tags)[n][1] == 'EX':
                                        text_file = open("dictionaries/existential", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> existential"

                                elif hpt.tag(tags)[n][1] == 'FW':
                                        text_file = open("dictionaries/foreign-word", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> foreign-word"

                                elif hpt.tag(tags)[n][1] == 'IN':
                                        text_file = open("dictionaries/preposition", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> preposition"

 				elif hpt.tag(tags)[n][1] == 'JJ':
 					text_file = open("dictionaries/adjective", "a")
 					text_file.write(outputstring)
 					text_file.close()
 					print outputstring + " >> adjective"

                                elif hpt.tag(tags)[n][1] == 'JJR':
                                        text_file = open("dictionaries/comparative", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> comparative"

                                elif hpt.tag(tags)[n][1] == 'JJS':
                                        text_file = open("dictionaries/superlative", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> superlative"

                                elif hpt.tag(tags)[n][1] == 'LS':
                                        text_file = open("dictionaries/list-item-marker", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> list-item-marker"

                                elif hpt.tag(tags)[n][1] == 'MD':
                                        text_file = open("dictionaries/modal-auxiliary", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> modal-auxiliary"

                                elif hpt.tag(tags)[n][1] == 'NN':
                                        text_file = open("dictionaries/noun-common-singular-or-mass", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> noun-common-singular-or-mass"

                                elif hpt.tag(tags)[n][1] == 'NNP':
                                        text_file = open("dictionaries/noun-proper-singular", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> noun-proper-singular"

                                elif hpt.tag(tags)[n][1] == 'NNPS':
                                        text_file = open("dictionaries/noun-proper-plural", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> noun-proper-plural"

                                elif hpt.tag(tags)[n][1] == 'NNS':
                                        text_file = open("dictionaries/noun-common-plural", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> noun-common-plural"

                                elif hpt.tag(tags)[n][1] == 'PDT':
                                        text_file = open("dictionaries/pre-determiner", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> pre-determiner"

                                elif hpt.tag(tags)[n][1] == 'PRP':
                                        text_file = open("dictionaries/pronoun", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> pronoun"

                                elif hpt.tag(tags)[n][1] == 'PRP$':
                                        text_file = open("dictionaries/possessive", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> possessive"

                                elif hpt.tag(tags)[n][1] == 'RB':
                                        text_file = open("dictionaries/adverb", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> adverb"

                                elif hpt.tag(tags)[n][1] == 'RBR':
                                        text_file = open("dictionaries/adverb-comparative", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> adverb-comparative"

                                elif hpt.tag(tags)[n][1] == 'RBS':
                                        text_file = open("dictionaries/adverb-superlative", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> adverb-superlative"

                                elif hpt.tag(tags)[n][1] == 'RP':
                                        text_file = open("dictionaries/particle", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> particle"

                                elif hpt.tag(tags)[n][1] == 'TO':
                                        text_file = open("dictionaries/to", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> to"

                                elif hpt.tag(tags)[n][1] == 'UH':
                                        text_file = open("dictionaries/interjection", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> interjection"

                                elif hpt.tag(tags)[n][1] == 'VB':
                                        text_file = open("dictionaries/verb-base-form", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> verb-base-form"

                                elif hpt.tag(tags)[n][1] == 'VBD':
                                        text_file = open("dictionaries/verb-past-tense", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> verb-past-tense"

                                elif hpt.tag(tags)[n][1] == 'VBG':
                                        text_file = open("dictionaries/verb-present-participle", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> verb-present-participle"

                                elif hpt.tag(tags)[n][1] == 'VBN':
                                        text_file = open("dictionaries/verb-past-participle", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> verb-past-participle"

                                elif hpt.tag(tags)[n][1] == 'VBP':
                                        text_file = open("dictionaries/verb-present-tense-not-3rd-person-singular", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> verb-present-tense-not-3rd-person-singular"

                                elif hpt.tag(tags)[n][1] == 'VBZ':
                                        text_file = open("dictionaries/verb-present-tense-3rd-person-singular", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> verb-present-tense-3rd-person-singular"

                                elif hpt.tag(tags)[n][1] == 'WDT':
                                        text_file = open("dictionaries/wh-determiner", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> wh-determiner"

                                elif hpt.tag(tags)[n][1] == 'WP':
                                        text_file = open("dictionaries/wh-pronoun", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> wh-pronoun"

                                elif hpt.tag(tags)[n][1] == 'WP$':
                                        text_file = open("dictionaries/wh-pronoun-possessive", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> wh-pronoun-possessive"

                                elif hpt.tag(tags)[n][1] == 'WRB':
                                        text_file = open("dictionaries/wh-adverb", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> wh-adverb"

                                else:
                                        text_file = open("dictionaries/missing", "a")
                                        text_file.write(outputstring)
                                        text_file.close()
                                        print outputstring + " >> missing"

 				n += 1
 				if "(" or "'" in parts[0]: break


 if __name__ == '__main__':
 	try:
 		command(sys.stdin)
 	except KeyboardInterrupt:
 		sys.exit(1)
	#!/usr/bin/python

	import sys
	from subprocess import call
	import nltk
	from nltk import word_tokenize,sent_tokenize
	from nltk.tag.hunpos import HunposTagger

	def command(dict):
	while(True):
	outputstring = ""
	line = dict.readline()
	parts = line.split() # split line into parts
	if len(parts) > 1: # if at least 2 parts/columns
	print parts[0]
	outputstring = parts[0]
	for x in parts[1::1]: outputstring += " " + x
	outputstring = outputstring[:-3]
	outputstring += "\n"
	tags = nltk. word_tokenize(parts[0])
	hpt = HunposTagger(path_to_model='hunpos-1.0-linux/en_wsj.model',path_to_bin='hunpos-read-only/tagger.native',encoding='hunpos-1.0-linux/hunpos-tag')
	n = 0
	#print len(nltk.pos_tag(tags))
	while n < len(nltk.pos_tag(tags)):
	#print nltk.pos_tag(tags)[n][1]

	if hpt.tag(tags)[n][1] == 'CC':
	text_file = open("dictionaries/conjunction", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> conjunction"

	elif hpt.tag(tags)[n][1] == 'CD':
	text_file = open("dictionaries/numeral", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> numeral"

	elif hpt.tag(tags)[n][1] == 'DT':
	text_file = open("dictionaries/determiner", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> determiner"

	elif hpt.tag(tags)[n][1] == 'EX':
	text_file = open("dictionaries/existential", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> existential"

	elif hpt.tag(tags)[n][1] == 'FW':
	text_file = open("dictionaries/foreign-word", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> foreign-word"

	elif hpt.tag(tags)[n][1] == 'IN':
	text_file = open("dictionaries/preposition", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> preposition"

	elif hpt.tag(tags)[n][1] == 'JJ':
	text_file = open("dictionaries/adjective", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> adjective"

	elif hpt.tag(tags)[n][1] == 'JJR':
	text_file = open("dictionaries/comparative", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> comparative"

	elif hpt.tag(tags)[n][1] == 'JJS':
	text_file = open("dictionaries/superlative", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> superlative"

	elif hpt.tag(tags)[n][1] == 'LS':
	text_file = open("dictionaries/list-item-marker", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> list-item-marker"

	elif hpt.tag(tags)[n][1] == 'MD':
	text_file = open("dictionaries/modal-auxiliary", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> modal-auxiliary"

	elif hpt.tag(tags)[n][1] == 'NN':
	text_file = open("dictionaries/noun-common-singular-or-mass", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> noun-common-singular-or-mass"

	elif hpt.tag(tags)[n][1] == 'NNP':
	text_file = open("dictionaries/noun-proper-singular", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> noun-proper-singular"

	elif hpt.tag(tags)[n][1] == 'NNPS':
	text_file = open("dictionaries/noun-proper-plural", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> noun-proper-plural"

	elif hpt.tag(tags)[n][1] == 'NNS':
	text_file = open("dictionaries/noun-common-plural", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> noun-common-plural"

	elif hpt.tag(tags)[n][1] == 'PDT':
	text_file = open("dictionaries/pre-determiner", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> pre-determiner"

	elif hpt.tag(tags)[n][1] == 'PRP':
	text_file = open("dictionaries/pronoun", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> pronoun"

	elif hpt.tag(tags)[n][1] == 'PRP$':
	text_file = open("dictionaries/possessive", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> possessive"

	elif hpt.tag(tags)[n][1] == 'RB':
	text_file = open("dictionaries/adverb", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> adverb"

	elif hpt.tag(tags)[n][1] == 'RBR':
	text_file = open("dictionaries/adverb-comparative", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> adverb-comparative"

	elif hpt.tag(tags)[n][1] == 'RBS':
	text_file = open("dictionaries/adverb-superlative", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> adverb-superlative"

	elif hpt.tag(tags)[n][1] == 'RP':
	text_file = open("dictionaries/particle", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> particle"

	elif hpt.tag(tags)[n][1] == 'TO':
	text_file = open("dictionaries/to", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> to"

	elif hpt.tag(tags)[n][1] == 'UH':
	text_file = open("dictionaries/interjection", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> interjection"

	elif hpt.tag(tags)[n][1] == 'VB':
	text_file = open("dictionaries/verb-base-form", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> verb-base-form"

	elif hpt.tag(tags)[n][1] == 'VBD':
	text_file = open("dictionaries/verb-past-tense", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> verb-past-tense"

	elif hpt.tag(tags)[n][1] == 'VBG':
	text_file = open("dictionaries/verb-present-participle", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> verb-present-participle"

	elif hpt.tag(tags)[n][1] == 'VBN':
	text_file = open("dictionaries/verb-past-participle", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> verb-past-participle"

	elif hpt.tag(tags)[n][1] == 'VBP':
	text_file = open("dictionaries/verb-present-tense-not-3rd-person-singular", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> verb-present-tense-not-3rd-person-singular"

	elif hpt.tag(tags)[n][1] == 'VBZ':
	text_file = open("dictionaries/verb-present-tense-3rd-person-singular", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> verb-present-tense-3rd-person-singular"

	elif hpt.tag(tags)[n][1] == 'WDT':
	text_file = open("dictionaries/wh-determiner", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> wh-determiner"

	elif hpt.tag(tags)[n][1] == 'WP':
	text_file = open("dictionaries/wh-pronoun", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> wh-pronoun"

	elif hpt.tag(tags)[n][1] == 'WP$':
	text_file = open("dictionaries/wh-pronoun-possessive", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> wh-pronoun-possessive"

	elif hpt.tag(tags)[n][1] == 'WRB':
	text_file = open("dictionaries/wh-adverb", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> wh-adverb"

	else:
	text_file = open("dictionaries/missing", "a")
	text_file.write(outputstring)
	text_file.close()
	print outputstring + " >> missing"

	n += 1
	if "(" or "'" in parts[0]: break


	if __name__ == '__main__':
	try:
	command(sys.stdin)
	except KeyboardInterrupt:
	sys.exit(1)