Last active
June 13, 2024 07:00
-
-
Save megasuperlexa/ecac75728e5b79338e18679e085791a3 to your computer and use it in GitHub Desktop.
CSV splitter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#split csv file with header into separate files based on key column (input should be sorted by key column) | |
if [ $# -eq 0 ]; then | |
echo "Usage: $0 <input_csv_file>" | |
exit 1 | |
fi | |
INPUT_CSV="$1" | |
current_key="" | |
output_file="" | |
read -r header <<< "$(head -n 1 "$INPUT_CSV")" | |
# IFS sets splitter as comma, then read takes column 1, column 2 as 'key' and 'rest' as unseparated values | |
# so for 1st column it would be "read key rest" | |
# and for 2nd "read col1 col2 key rest" | |
# with -r, the backslashes are treated as literal characters, preserving them in the output | |
while IFS=, read -r col1 key rest; do | |
if [[ $key == "" ]]; then | |
continue | |
fi | |
if [[ $key != $current_key ]]; then | |
current_key="$key" | |
output_file="${current_key}.csv" | |
touch "./$output_file" | |
echo "$header" >> "./$output_file" | |
fi | |
echo "$col1,$key,$rest," >> "./$output_file" | |
done < "$INPUT_CSV" | |
echo "Files have been created based on the first column values." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment