Skip to content

Instantly share code, notes, and snippets.

@graysonarts
Created February 9, 2015 21:57
Show Gist options
  • Save graysonarts/c277d21da8a83fe07931 to your computer and use it in GitHub Desktop.
Save graysonarts/c277d21da8a83fe07931 to your computer and use it in GitHub Desktop.
merge csv files into a single csv file with a column for the filename
import sys
import glob
import os.path
import os
import argparse
import csv
def usage(args):
parser = argparse.ArgumentParser()
parser.add_argument("--delim", "-d", default="|")
parser.add_argument("--skip", "-s", default=0, type=int)
parser.add_argument("prefix")
return parser.parse_args(args)
def fixup_row(row, filename):
data = list(row)
data.insert(0, os.path.splitext(filename)[0])
return data
def add_file(skip_lines, reader, writer, filename, write_header):
header_read = False
for row in reader:
if skip_lines > 0:
skip_lines -= 1
continue
if not header_read:
if write_header:
row = fixup_row(row, "run")
writer.writerow(row)
write_header = False
header_read = True
else:
row = fixup_row(row, filename)
writer.writerow(row)
return not write_header
def main(args=sys.argv[1:]):
args = usage(args)
output_filename = "combined-{0}.csv".format(args.prefix)
with open(output_filename, "wb") as out:
writer = csv.writer(out)
header_written = False
for filename in glob.glob("{0}*.csv".format(args.prefix)):
with open(filename, "rb") as inp:
reader = csv.reader(inp, delimiter=args.delim)
header_written = add_file(args.skip, reader, writer, filename, not header_written)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment