Created
November 8, 2023 14:16
-
-
Save hitenpratap/eb2f579074bc1b043542712c1ff092ff to your computer and use it in GitHub Desktop.
A bash script to split a CSV file into equals part, it also put the headers automatically into each of the newly generated file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check if a file path is provided | |
if [ -z "$1" ]; then | |
echo "Usage: $0 <csv_file>" | |
exit 1 | |
fi | |
csv_file="$1" | |
# Check if the file exists | |
if [ ! -f "$csv_file" ]; then | |
echo "File not found: $csv_file" | |
exit 1 | |
fi | |
# Calculate the number of lines to split (excluding the header) | |
total_lines=$(tail -n +2 "$csv_file" | wc -l) | |
total_lines=$((total_lines)) # This will ensure we have just the number | |
lines_per_file=$((total_lines / 5)) | |
# Exit if the file has fewer than 3 lines | |
if [ "$lines_per_file" -eq 0 ]; then | |
echo "The file is too small to split into 3 parts." | |
exit 1 | |
fi | |
# Split the file, preserving the header | |
header=$(head -1 "$csv_file") | |
tail -n +2 "$csv_file" | split -l "$lines_per_file" - "temp_part_" | |
# Add the header to each split file and rename | |
counter=1 | |
for file in temp_part_*; do | |
output_file="part_${counter}.csv" | |
echo "$header" > "$output_file" | |
cat "$file" >> "$output_file" | |
rm "$file" | |
((counter++)) | |
done | |
echo "CSV file split into part_1.csv, part_2.csv, and part_3.csv" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment