Created
April 11, 2014 02:30
-
-
Save johnstcn/10437873 to your computer and use it in GitHub Desktop.
Tool to merge csv files given a commonly named column header
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import argparse | |
import sys | |
from collections import defaultdict | |
helptext = """Merge multiple CSV files using a commonly named column. | |
CSV files must have column headings, but do not need to be sorted. | |
Rows with incomplete information can be kept or discarded.""" | |
ap = argparse.ArgumentParser(helptext) | |
ap.add_argument("files", nargs="+", help="CSV files to merge", type=argparse.FileType('r')) | |
ap.add_argument("key", help="Use this column header to merge the files") | |
ap.add_argument("--keep", action='store_true', help="Keep rows with incomplete information") | |
ap.add_argument("--outfile", help="File to write to (default is stdout)", default=sys.stdout, type=argparse.FileType('w')) | |
args = ap.parse_args() | |
output_rows = defaultdict(dict) | |
# keep tab of all fields we've seen | |
allfields = set() | |
# slurp in all the files and store the info | |
for infile in args.files: | |
rdr = csv.DictReader(infile) | |
# add in any new fieldnames that may exist | |
allfields.update(rdr.fieldnames) | |
for row in rdr: | |
output_rows[row[args.key]].update(row) | |
wrtr = csv.DictWriter(args.outfile, fieldnames=sorted(allfields), dialect=csv.excel) | |
# write column headings | |
wrtr.writeheader() | |
# write the output to our file | |
for outrow in output_rows.values(): | |
# only write a row if it has all the fields we're interested in, or if we care about ones with incomplete info | |
if (set(outrow.keys()) == allfields) or args.keep: | |
wrtr.writerow(outrow) | |
args.outfile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment