aSipiere · August 5, 2020 10:15 · aSipiere · Aug 5, 2020
diff --git a/csv_splitter.py b/csv_splitter.py
 import os
 import csv
 import argparse
 from tqdm import tqdm

 def split(filehandler, delimiter=',', row_limit=10000,
    output_name_template='output_%s.csv', output_path='.', keep_headers=True):
    """
    Splits a CSV file into multiple pieces.

    A quick bastardization of the Python CSV library.
    Arguments:
        `row_limit`: The number of rows you want in each output file. 10,000 by default.
        `output_name_template`: A %s-style template for the numbered output files.
        `output_path`: Where to stick the output files.
        `keep_headers`: Whether or not to print the headers in each output file.
    Example usage:

        >> from toolbox import csv_splitter;
        >> csv_splitter.split(open('/home/ben/input.csv', 'r'));

    """

    reader = csv.reader(filehandler, delimiter=delimiter)
    current_piece = 1
    current_out_path = os.path.join(
         output_path,
         f"{output_name_template}{current_piece}.csv"
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
    current_limit = row_limit
    if keep_headers:
        headers = next(reader)
        current_out_writer.writerow(headers)
    for i, row in tqdm(enumerate(reader), total=142168021):
        if i + 1 > current_limit:
            current_piece += 1
            current_limit = row_limit * current_piece
            current_out_path = os.path.join(
               output_path,
               f"{output_name_template}{current_piece}.csv"
            )
            current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
            if keep_headers:
                current_out_writer.writerow(headers)
        current_out_writer.writerow(row)

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--infile", "-I", help="input csv path")
    parser.add_argument("--outpath", "-O", nargs='?', default='.', type=str, help="Where to stick the output files.")
    parser.add_argument("--delimiter", "-D", nargs='?', default=',', type=str, help="delimiter")
    parser.add_argument("--rows", "-R", nargs='?', default=10000, type=int, help="number of rows")
    parser.add_argument("--name", "-N", nargs='?', default='output_', type=str, help="A %s-style template for the numbered output files")

    args = parser.parse_args()

    with open(args.infile) as csvfile:
        split(
            csvfile,
            delimiter=args.delimiter,
            output_name_template=args.name,
            row_limit=args.rows,
            output_path=args.outpath,
            keep_headers=True
        )
	import os
	import csv
	import argparse
	from tqdm import tqdm

	def split(filehandler, delimiter=',', row_limit=10000,
	output_name_template='output_%s.csv', output_path='.', keep_headers=True):
	"""
	Splits a CSV file into multiple pieces.

	A quick bastardization of the Python CSV library.
	Arguments:
	`row_limit`: The number of rows you want in each output file. 10,000 by default.
	`output_name_template`: A %s-style template for the numbered output files.
	`output_path`: Where to stick the output files.
	`keep_headers`: Whether or not to print the headers in each output file.
	Example usage:

	>> from toolbox import csv_splitter;
	>> csv_splitter.split(open('/home/ben/input.csv', 'r'));

	"""

	reader = csv.reader(filehandler, delimiter=delimiter)
	current_piece = 1
	current_out_path = os.path.join(
	output_path,
	f"{output_name_template}{current_piece}.csv"
	)
	current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
	current_limit = row_limit
	if keep_headers:
	headers = next(reader)
	current_out_writer.writerow(headers)
	for i, row in tqdm(enumerate(reader), total=142168021):
	if i + 1 > current_limit:
	current_piece += 1
	current_limit = row_limit * current_piece
	current_out_path = os.path.join(
	output_path,
	f"{output_name_template}{current_piece}.csv"
	)
	current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
	if keep_headers:
	current_out_writer.writerow(headers)
	current_out_writer.writerow(row)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--infile", "-I", help="input csv path")
	parser.add_argument("--outpath", "-O", nargs='?', default='.', type=str, help="Where to stick the output files.")
	parser.add_argument("--delimiter", "-D", nargs='?', default=',', type=str, help="delimiter")
	parser.add_argument("--rows", "-R", nargs='?', default=10000, type=int, help="number of rows")
	parser.add_argument("--name", "-N", nargs='?', default='output_', type=str, help="A %s-style template for the numbered output files")

	args = parser.parse_args()

	with open(args.infile) as csvfile:
	split(
	csvfile,
	delimiter=args.delimiter,
	output_name_template=args.name,
	row_limit=args.rows,
	output_path=args.outpath,
	keep_headers=True
	)