Created
February 4, 2021 14:19
-
-
Save asharma327/8fd52a885fedecf77f1c9f4d9bb72ce3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import math | |
# Function to write the split file | |
def create_split_file(split_rows, header, split_file_name): | |
with open(split_file_name, 'w') as split_file: | |
output = csv.writer(split_file, delimiter=",") | |
# Add header to rows if you want | |
split_rows.insert(0, header) | |
output.writerows(split_rows) | |
# | |
# | |
# CHANGE THESE TO MATCH YOUR SPECIFICATIONS | |
# | |
# | |
split_file_rows = 50000 | |
file_path = "./YOUR_FILE_PATH.csv" | |
# Read and split the rows | |
split_rows = [] | |
with open(file_path) as csv_file: | |
# Read the CSV file | |
csv_reader = csv.reader(csv_file, delimiter=',') | |
# Loop over all rows to get the number of row and all values of the row | |
for row_num, row in enumerate(csv_reader): | |
# Get the header from the first row | |
if row_num == 0: | |
header = row | |
else: | |
# If we have reached the split mark, create a split file | |
if row_num % split_file_rows == 0: | |
# Get the number of the split file and convert to string | |
file_suffix = str(int(row_num / split_file_rows)) | |
# Create the name of the split file | |
split_file_name = "split_file_" + file_suffix + ".csv" | |
# Output the split file | |
create_split_file(split_rows, header, split_file_name) | |
# Make split rows start with this row | |
split_rows = [row] | |
# If we haven't reached the split point, add the row to the current split_row | |
else: | |
split_rows.append(row) | |
# When it is the end of file then output all the leftover rows | |
split_rows.append(row) | |
file_suffix = str(math.ceil(row_num / split_file_rows)) | |
split_file_name = "split_file_" + file_suffix + ".csv" | |
create_split_file(split_rows, header, split_file_name) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment