Skip to content

Instantly share code, notes, and snippets.

@tai271828
Created November 3, 2017 12:04
Show Gist options
  • Save tai271828/60d80025cc3ec7a45b75619410528da2 to your computer and use it in GitHub Desktop.
Save tai271828/60d80025cc3ec7a45b75619410528da2 to your computer and use it in GitHub Desktop.
Convert my libreoffice dumped csv to another csv ready to be imported by mysql
#!/usr/bin/python3
import csv
import pprint
import sys
def fill_null_value(input, filler="\\N"):
"""
Replace empty elements of input by a filler.
:param input: an iterable object, e.g. list
:param filler: to fill the empty element
:return: an interable object, e.g. list, without empty elements
"""
output = []
for element in input:
if element == '':
output.append(filler)
else:
output.append(element)
return output
def export_to_mysql_txt(input):
try:
output_filename = sys.argv[2]
except:
output_filename = 'raw_data_output.csv'
with open(output_filename, 'w') as ocsv:
writer = csv.writer(ocsv, dialect='excel-tab')
writer.writerows(input)
def convert2sql_compat(input):
output = []
idx_row = 0
for row in input:
if idx_row == 0:
pass
else:
new_row = []
idx_col = 0
for col in row:
# sample id, group id, data_value
if idx_col == 0:
pass
else:
new_row = [row[0], input[0][idx_col], col]
output.append(new_row)
idx_col += 1
idx_row += 1
return output
def sanity_check(base, converted):
row_num = len(base)
col_num = len(base[0])
base_number = (row_num-1)*(col_num-1)
converted_number = len(converted)
assert (base_number == converted_number), "Sanity Check Failed!"
# tcsv: tab separated delimiter csv
row_data = []
try:
input_filename = sys.argv[1]
except:
input_filename = 'raw_data.csv'
with open(input_filename) as tcsv:
for row in csv.reader(tcsv, dialect='excel-tab'):
row = fill_null_value(row)
row_data.append(row)
#pprint.pprint(row_data)
row_data_converted = convert2sql_compat(row_data)
pprint.pprint(row_data_converted)
sanity_check(row_data, row_data_converted)
export_to_mysql_txt(row_data_converted)
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment