Skip to content

Instantly share code, notes, and snippets.

@dehowell
Created February 4, 2017 01:25
Show Gist options
  • Save dehowell/624b81e9be3d6696ea28395e0cdb5dcf to your computer and use it in GitHub Desktop.
Save dehowell/624b81e9be3d6696ea28395e0cdb5dcf to your computer and use it in GitHub Desktop.
Script to correct to manipulate CBCL data file
#!/usr/bin/env python
'''
usage: %s input_file output_file
'''
import collections
import csv
import sys
def split_column(row, col, name_format):
for n, c in enumerate(row[col][1:]):
try:
name = name_format % (int(n) + 1)
row[name] = c
except:
# Ignore errors from trying to cast to an int, to skip any
# stray characters like the leading '.
pass
del row[col]
def expand_row(row):
row = collections.defaultdict(lambda: '', row)
split_column(row, 'bpitems', 'bpitem%03d')
split_column(row, 'compitems', 'compitem%02d')
return row
def main(rf, wf):
reader = csv.DictReader(rf)
writer = csv.writer(wf)
columns = None
for row in reader:
expanded_row = expand_row(row)
if columns is None:
columns = sorted(expanded_row.keys())
# Move id to first column
columns.remove('id')
columns.insert(0, 'id')
# Write a header line
writer.writerow(columns)
try:
values = [expanded_row[c] for c in columns]
writer.writerow(values)
except:
pass
if __name__ == '__main__':
if len(sys.argv) < 3:
sys.stderr.write(__doc__ % sys.argv[0])
sys.exit(1)
with open(sys.argv[1], 'rU') as rf, open(sys.argv[2], 'w') as wf:
main(rf, wf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment