Skip to content

Instantly share code, notes, and snippets.

@noisychannel
Created June 13, 2015 03:32
Show Gist options
  • Save noisychannel/2c710ad641bd0e6ab288 to your computer and use it in GitHub Desktop.
Save noisychannel/2c710ad641bd0e6ab288 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
classMap = {}
classFile = open(sys.argv[1])
input = open(sys.argv[2])
output = open(sys.argv[3], 'w+')
# First read classMap
for line in classFile:
line = line.strip()
lineComp = line.split()
classMap[lineComp[0]] = lineComp[1]
# Now read corpus
for line in input:
line = line.strip().lower()
lineComp = line.split()
translation = []
for word in lineComp:
if word in classMap:
translation.append(classMap[word])
else:
translation.append("-1")
output.write(" ".join(translation) + "\n")
classFile.close()
input.close()
output.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment