-
-
Save ianliu/a5b1ab96180880b9cf35c0a80b68c9f3 to your computer and use it in GitHub Desktop.
Simple script to process CSV files in a streaming fashion, with correct escaping of commas in values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import csv | |
| import sys | |
| import argparse | |
| parser = argparse.ArgumentParser(description=( | |
| 'Process CSV files in a streaming fashion. By default, assumes there is a' | |
| ' header row and columns are selected by name. If the -i flag is passed,' | |
| ' columns are selected by indices starting from 0 and no header row is' | |
| ' assumed to exist. The -s argument can be used to skip the first N rows.' | |
| )) | |
| parser.add_argument('-s', type=int, default=0, help='skip first N lines') | |
| parser.add_argument('-i', action='store_true', | |
| help='column indices instead of names, does not read header row') | |
| parser.add_argument('cols', nargs='+', help='columns to be selected') | |
| args = parser.parse_args() | |
| skip = args.s | |
| bufin = sys.stdin | |
| bufout = sys.stdout | |
| while skip > 0: | |
| skip -= 1 | |
| bufin.readline() | |
| rdr = iter(csv.reader(bufin)) | |
| wrt = csv.writer(bufout) | |
| if args.i: | |
| select = [int(x) for x in args.cols] | |
| else: | |
| header = {k: v for v, k in enumerate(next(rdr))} | |
| select = [header[name] for name in args.cols] | |
| wrt.writerow(args.cols) | |
| for row in rdr: | |
| wrt.writerow([row[i] for i in select]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment