Created
December 9, 2013 18:04
-
-
Save Faxn/7876963 to your computer and use it in GitHub Desktop.
Small script to merge and split csv files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv, argparse, _thread, os, time | |
#TODO: break these out into command line opts | |
in_file_open_args = dict(newline='', encoding='CP1252', errors='strict') | |
out_file_open_args = dict(newline='', encoding='UTF-8', errors='backslashreplace') | |
#opens files from the list of paths provided as csv files and writes all | |
#of their rows into the output FILE provides as out_file. | |
def merge(infile_names, out_file): | |
outwriter = csv.writer(out_file, dialect='excel') | |
for infile in infile_names: | |
with open(infile, **in_file_open_args) as infile: | |
inreader = csv.reader(infile, dialect='excel') | |
for row in inreader: | |
outwriter.writerow(row) | |
#reads rows from the provided file and writes them out to csv files with | |
# at most out_file_rows rows each. | |
def split(in_file, out_file_name_base, out_file_rows): | |
file_number = 1 | |
csv_reader = csv.reader(in_file) | |
try: | |
while 1: | |
with open(out_file_name_base + str(file_number)+ '.csv', 'w', **out_file_open_args) as out_file: | |
csv_writer = csv.writer(out_file) | |
file_number+=1 | |
for i in range(0, out_file_rows): | |
csv_writer.writerow(next(csv_reader)) | |
except StopIteration: | |
pass | |
parser = argparse.ArgumentParser() | |
parser.add_argument('infile', nargs='+', help="input csv file or files") | |
parser.add_argument('outfile', help="output csv file or name base if using split.") | |
parser.add_argument('-s', '--split', help="Split output into files with this many rows.", type=int) | |
args = parser.parse_args() | |
print(args) | |
if args.split: | |
r, w = os.pipe() | |
_thread.start_new_thread(merge, (args.infile, os.fdopen(w, 'w'))) | |
_thread.start_new_thread(split, (os.fdopen(r), args.outfile, args.split)) | |
time.sleep(1) | |
else: | |
merge(args.infile, open(args.outfile, 'a', **out_file_open_args)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment