Skip to content

Instantly share code, notes, and snippets.

@catogonzalez
Last active September 13, 2024 14:01
Show Gist options
  • Save catogonzalez/5e1e62dd412400bda28175158c90b904 to your computer and use it in GitHub Desktop.
Save catogonzalez/5e1e62dd412400bda28175158c90b904 to your computer and use it in GitHub Desktop.
Split a CSV file into two files with approximately half the rows each
import argparse
import csv
def split_csv(filename):
"""
Splits a CSV file into two files with approximately half the rows each.
Args:
filename: The path to the CSV file to split.
"""
with open(filename, 'r') as infile:
reader = csv.reader(infile)
header = next(reader) # Read the header row
# Count total lines (excluding header)
total_lines = sum(1 for row in reader) + 1 # Add 1 for header
# Calculate lines per split file (rounded down)
lines_per_file = total_lines // 2
# Open output files
outfile1 = open(f"{filename}_split1.csv", 'w', newline='')
outfile2 = open(f"{filename}_split2.csv", 'w', newline='')
# Write header to both files
writer1 = csv.writer(outfile1)
writer2 = csv.writer(outfile2)
writer1.writerow(header)
writer2.writerow(header)
# Write data rows, handling odd number of lines
with open(filename, 'r') as infile:
reader = csv.reader(infile)
next(reader) # Skip header again
count = 0
for row in reader:
if count < lines_per_file:
writer1.writerow(row)
else:
writer2.writerow(row)
count += 1
# Close files
outfile1.close()
outfile2.close()
if __name__ == "__main__":
# Define argument parser
parser = argparse.ArgumentParser(description="Split a CSV file in half")
parser.add_argument("filename", help="Path to the CSV file to split")
# Parse arguments
args = parser.parse_args()
# Call split_csv function with parsed argument
split_csv(args.filename)
print(f"CSV file '{args.filename}' split successfully.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment