Skip to content

Instantly share code, notes, and snippets.

@mertyildiran
Created April 19, 2016 23:48
Show Gist options
  • Save mertyildiran/2a57aa630c1538050bc7d74ec508c730 to your computer and use it in GitHub Desktop.
Save mertyildiran/2a57aa630c1538050bc7d74ec508c730 to your computer and use it in GitHub Desktop.
This script is able to classify words by reading a dictionary line by line.
#!/usr/bin/python
import sys
from subprocess import call
import nltk
from nltk import word_tokenize,sent_tokenize
from nltk.tag.hunpos import HunposTagger
def command(dict):
while(True):
outputstring = ""
line = dict.readline()
parts = line.split() # split line into parts
if len(parts) > 1: # if at least 2 parts/columns
print parts[0]
outputstring = parts[0]
for x in parts[1::1]: outputstring += " " + x
outputstring = outputstring[:-3]
outputstring += "\n"
tags = nltk. word_tokenize(parts[0])
hpt = HunposTagger(path_to_model='hunpos-1.0-linux/en_wsj.model',path_to_bin='hunpos-read-only/tagger.native',encoding='hunpos-1.0-linux/hunpos-tag')
n = 0
#print len(nltk.pos_tag(tags))
while n < len(nltk.pos_tag(tags)):
#print nltk.pos_tag(tags)[n][1]
if hpt.tag(tags)[n][1] == 'CC':
text_file = open("dictionaries/conjunction", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> conjunction"
elif hpt.tag(tags)[n][1] == 'CD':
text_file = open("dictionaries/numeral", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> numeral"
elif hpt.tag(tags)[n][1] == 'DT':
text_file = open("dictionaries/determiner", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> determiner"
elif hpt.tag(tags)[n][1] == 'EX':
text_file = open("dictionaries/existential", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> existential"
elif hpt.tag(tags)[n][1] == 'FW':
text_file = open("dictionaries/foreign-word", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> foreign-word"
elif hpt.tag(tags)[n][1] == 'IN':
text_file = open("dictionaries/preposition", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> preposition"
elif hpt.tag(tags)[n][1] == 'JJ':
text_file = open("dictionaries/adjective", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> adjective"
elif hpt.tag(tags)[n][1] == 'JJR':
text_file = open("dictionaries/comparative", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> comparative"
elif hpt.tag(tags)[n][1] == 'JJS':
text_file = open("dictionaries/superlative", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> superlative"
elif hpt.tag(tags)[n][1] == 'LS':
text_file = open("dictionaries/list-item-marker", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> list-item-marker"
elif hpt.tag(tags)[n][1] == 'MD':
text_file = open("dictionaries/modal-auxiliary", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> modal-auxiliary"
elif hpt.tag(tags)[n][1] == 'NN':
text_file = open("dictionaries/noun-common-singular-or-mass", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> noun-common-singular-or-mass"
elif hpt.tag(tags)[n][1] == 'NNP':
text_file = open("dictionaries/noun-proper-singular", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> noun-proper-singular"
elif hpt.tag(tags)[n][1] == 'NNPS':
text_file = open("dictionaries/noun-proper-plural", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> noun-proper-plural"
elif hpt.tag(tags)[n][1] == 'NNS':
text_file = open("dictionaries/noun-common-plural", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> noun-common-plural"
elif hpt.tag(tags)[n][1] == 'PDT':
text_file = open("dictionaries/pre-determiner", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> pre-determiner"
elif hpt.tag(tags)[n][1] == 'PRP':
text_file = open("dictionaries/pronoun", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> pronoun"
elif hpt.tag(tags)[n][1] == 'PRP$':
text_file = open("dictionaries/possessive", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> possessive"
elif hpt.tag(tags)[n][1] == 'RB':
text_file = open("dictionaries/adverb", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> adverb"
elif hpt.tag(tags)[n][1] == 'RBR':
text_file = open("dictionaries/adverb-comparative", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> adverb-comparative"
elif hpt.tag(tags)[n][1] == 'RBS':
text_file = open("dictionaries/adverb-superlative", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> adverb-superlative"
elif hpt.tag(tags)[n][1] == 'RP':
text_file = open("dictionaries/particle", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> particle"
elif hpt.tag(tags)[n][1] == 'TO':
text_file = open("dictionaries/to", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> to"
elif hpt.tag(tags)[n][1] == 'UH':
text_file = open("dictionaries/interjection", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> interjection"
elif hpt.tag(tags)[n][1] == 'VB':
text_file = open("dictionaries/verb-base-form", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> verb-base-form"
elif hpt.tag(tags)[n][1] == 'VBD':
text_file = open("dictionaries/verb-past-tense", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> verb-past-tense"
elif hpt.tag(tags)[n][1] == 'VBG':
text_file = open("dictionaries/verb-present-participle", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> verb-present-participle"
elif hpt.tag(tags)[n][1] == 'VBN':
text_file = open("dictionaries/verb-past-participle", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> verb-past-participle"
elif hpt.tag(tags)[n][1] == 'VBP':
text_file = open("dictionaries/verb-present-tense-not-3rd-person-singular", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> verb-present-tense-not-3rd-person-singular"
elif hpt.tag(tags)[n][1] == 'VBZ':
text_file = open("dictionaries/verb-present-tense-3rd-person-singular", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> verb-present-tense-3rd-person-singular"
elif hpt.tag(tags)[n][1] == 'WDT':
text_file = open("dictionaries/wh-determiner", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> wh-determiner"
elif hpt.tag(tags)[n][1] == 'WP':
text_file = open("dictionaries/wh-pronoun", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> wh-pronoun"
elif hpt.tag(tags)[n][1] == 'WP$':
text_file = open("dictionaries/wh-pronoun-possessive", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> wh-pronoun-possessive"
elif hpt.tag(tags)[n][1] == 'WRB':
text_file = open("dictionaries/wh-adverb", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> wh-adverb"
else:
text_file = open("dictionaries/missing", "a")
text_file.write(outputstring)
text_file.close()
print outputstring + " >> missing"
n += 1
if "(" or "'" in parts[0]: break
if __name__ == '__main__':
try:
command(sys.stdin)
except KeyboardInterrupt:
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment