Skip to content

Instantly share code, notes, and snippets.

@sangheestyle
Created October 10, 2014 23:48
Show Gist options
  • Save sangheestyle/5f37730b0a5554de4a8a to your computer and use it in GitHub Desktop.
Save sangheestyle/5f37730b0a5554de4a8a to your computer and use it in GitHub Desktop.
words1 = "pa’Daq ghah taH tera’ngan ’e".replace("’","'").split()
tags1 = "N PRO V N PRO".split()
words2 = "ja’chuqmeH rojHom neH tera’ngan".replace("’","'").split()
tags2 = "V N V N".split()
words3 = "tera’ngan qIp puq ’eg puq qIp tera’ngan".replace("’","'").split()
tags3 = "N V N CONJ N V N".split()
train = []
train.append(zip(words1, tags1))
train.append(zip(words2, tags2))
train.append(zip(words3, tags3))
from collections import defaultdict
beta = defaultdict(list)
for sent in train:
for word, tag in sent:
beta[word].append(tag)
import csv
f = open('result.csv')
f = open('result.csv', 'wb')
csv_writer = csv.writer(f, delimiter=",")
csv_writer.writerow(["word", "NOUN", "VERB", "CONJ", "PRO"])
for word, tags in sorted(beta.iteritems()):
row = []; row.append(word)
for tag in ["N", "V", "CONJ", "PRO"]:
row.append(tags.count(tag)+0.1)
csv_writer.writerow(row)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment