gartenfeld · December 30, 2014 18:24
diff --git a/process_csv.py b/process_csv.py
 import csv
 import codecs
 import re

 def load_dictionary(csv_file, idx, lbl):
 	d = {}
 	f = open(csv_file, encoding="utf-8")
 	s = csv.reader(f)
 	for r in s:
 		i = r[idx] # Read index
 		l = r[lbl] # Read label
 		d[i] = l
 	f.close()
 	return d

 def load_csv(csv_data):
 	f = open(csv_data, encoding="utf-8")
 	rows = csv.reader(f)
 	for row in rows:
 		extract_data(row)
 	f.close()

 	return

 def extract_data(data_row):

 	topic = data_row[7]
 	rank = data_row[4]
 	word_type = data_row[8]
 	audio_file = data_row[9]
 	article = data_row[11]
 	citation = data_row[12]
 	en_gloss = data_row[13]
 	in_situ = data_row[14]
 	blank = data_row[15]
 	focal = data_row[16]

 	if word_type in word_types: word_type = word_types[word_type]
 	if topic in topics: topic = topics[topic]

 	filled = blank

 	if citation != "":
 		# Remove redundant spaces
 		blank = re.sub(r' +',' ',blank)
 		citation = re.sub(r' +',' ',citation)

 		regex = r"_+"
 		madlibs = re.findall(regex, blank)

 		fills = in_situ.split(" ")

 		if len(fills) == len(madlibs):
 			for (i, madlib) in enumerate(madlibs):
 				filled = filled.replace(madlib, "{"+fills[i]+"}", 1)

 		stripped = re.sub(r'{','',filled)
 		stripped = re.sub(r'}','',stripped)

 		if stripped != citation:
 			print (fills)
 			print (blank)
 			print (filled)
 			print (citation)
 			print ("")

 	if 1:
 		print (rank, article, focal, "> "+en_gloss)
 		print (topic)
 		print (filled)
 		print ("")

 	return

 if __name__ == '__main__':

 	wortart_file = "Lextra-Wortart.csv"
 	word_types = load_dictionary(wortart_file, 0, 3) # The arguments: file, index column, label column

 	themen_file = "Lextra-Themen.csv"
 	topics = load_dictionary(themen_file, 0, 5)

 	main_file = "Lextra-Words.csv"
 	load_csv(main_file)

 	print ("Valmis!")
	import csv
	import codecs
	import re

	def load_dictionary(csv_file, idx, lbl):
	d = {}
	f = open(csv_file, encoding="utf-8")
	s = csv.reader(f)
	for r in s:
	i = r[idx] # Read index
	l = r[lbl] # Read label
	d[i] = l
	f.close()
	return d

	def load_csv(csv_data):
	f = open(csv_data, encoding="utf-8")
	rows = csv.reader(f)
	for row in rows:
	extract_data(row)
	f.close()

	return

	def extract_data(data_row):

	topic = data_row[7]
	rank = data_row[4]
	word_type = data_row[8]
	audio_file = data_row[9]
	article = data_row[11]
	citation = data_row[12]
	en_gloss = data_row[13]
	in_situ = data_row[14]
	blank = data_row[15]
	focal = data_row[16]

	if word_type in word_types: word_type = word_types[word_type]
	if topic in topics: topic = topics[topic]

	filled = blank

	if citation != "":
	# Remove redundant spaces
	blank = re.sub(r' +',' ',blank)
	citation = re.sub(r' +',' ',citation)

	regex = r"_+"
	madlibs = re.findall(regex, blank)

	fills = in_situ.split(" ")

	if len(fills) == len(madlibs):
	for (i, madlib) in enumerate(madlibs):
	filled = filled.replace(madlib, "{"+fills[i]+"}", 1)

	stripped = re.sub(r'{','',filled)
	stripped = re.sub(r'}','',stripped)

	if stripped != citation:
	print (fills)
	print (blank)
	print (filled)
	print (citation)
	print ("")

	if 1:
	print (rank, article, focal, "> "+en_gloss)
	print (topic)
	print (filled)
	print ("")

	return

	if __name__ == '__main__':

	wortart_file = "Lextra-Wortart.csv"
	word_types = load_dictionary(wortart_file, 0, 3) # The arguments: file, index column, label column

	themen_file = "Lextra-Themen.csv"
	topics = load_dictionary(themen_file, 0, 5)

	main_file = "Lextra-Words.csv"
	load_csv(main_file)

	print ("Valmis!")