Skip to content

Instantly share code, notes, and snippets.

@gartenfeld
Created December 30, 2014 18:24
Show Gist options
  • Save gartenfeld/be096a4e7a998b35c660 to your computer and use it in GitHub Desktop.
Save gartenfeld/be096a4e7a998b35c660 to your computer and use it in GitHub Desktop.
Processing a CSV file.
import csv
import codecs
import re
def load_dictionary(csv_file, idx, lbl):
d = {}
f = open(csv_file, encoding="utf-8")
s = csv.reader(f)
for r in s:
i = r[idx] # Read index
l = r[lbl] # Read label
d[i] = l
f.close()
return d
def load_csv(csv_data):
f = open(csv_data, encoding="utf-8")
rows = csv.reader(f)
for row in rows:
extract_data(row)
f.close()
return
def extract_data(data_row):
topic = data_row[7]
rank = data_row[4]
word_type = data_row[8]
audio_file = data_row[9]
article = data_row[11]
citation = data_row[12]
en_gloss = data_row[13]
in_situ = data_row[14]
blank = data_row[15]
focal = data_row[16]
if word_type in word_types: word_type = word_types[word_type]
if topic in topics: topic = topics[topic]
filled = blank
if citation != "":
# Remove redundant spaces
blank = re.sub(r' +',' ',blank)
citation = re.sub(r' +',' ',citation)
regex = r"_+"
madlibs = re.findall(regex, blank)
fills = in_situ.split(" ")
if len(fills) == len(madlibs):
for (i, madlib) in enumerate(madlibs):
filled = filled.replace(madlib, "{"+fills[i]+"}", 1)
stripped = re.sub(r'{','',filled)
stripped = re.sub(r'}','',stripped)
if stripped != citation:
print (fills)
print (blank)
print (filled)
print (citation)
print ("")
if 1:
print (rank, article, focal, "> "+en_gloss)
print (topic)
print (filled)
print ("")
return
if __name__ == '__main__':
wortart_file = "Lextra-Wortart.csv"
word_types = load_dictionary(wortart_file, 0, 3) # The arguments: file, index column, label column
themen_file = "Lextra-Themen.csv"
topics = load_dictionary(themen_file, 0, 5)
main_file = "Lextra-Words.csv"
load_csv(main_file)
print ("Valmis!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment