kachok · May 29, 2012 15:55
diff --git a/tweets_es_pickleit.py b/tweets_es_pickleit.py
 import codecs
 import pickle


 file = "/Users/dkachaev/repos/hltcoe/tweets-es/data/oov.vocab"
 out = codecs.open(file, "r", "utf-8")


 vocab={}

 f=open("/Users/dkachaev/repos/hltcoe/tweets-es/data/tweets_es_vocabulary.pickle","w")

 for line in out:
 	try:
 		line=line.strip()
 		freq, word = line.split(" ")
 		#print word, " - " ,freq

 		vocab[word]={"frequency":int(freq),"context":[""]}
 		# Context - "" <- need text of original tweet where word occurred, or 3 tweets ["tweet1", "tweet2", "tweet3"]
 		
 	except:
 		print "skipping line"

 pickle.dump(vocab,f)
 f.close()
	import codecs
	import pickle


	file = "/Users/dkachaev/repos/hltcoe/tweets-es/data/oov.vocab"
	out = codecs.open(file, "r", "utf-8")


	vocab={}

	f=open("/Users/dkachaev/repos/hltcoe/tweets-es/data/tweets_es_vocabulary.pickle","w")

	for line in out:
	try:
	line=line.strip()
	freq, word = line.split(" ")
	#print word, " - " ,freq

	vocab[word]={"frequency":int(freq),"context":[""]}
	# Context - "" <- need text of original tweet where word occurred, or 3 tweets ["tweet1", "tweet2", "tweet3"]

	except:
	print "skipping line"

	pickle.dump(vocab,f)
	f.close()
No results found