Created
November 17, 2019 16:09
-
-
Save pganti/7c0382260852f07346b683fe82e0694a to your computer and use it in GitHub Desktop.
split csv files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| FILENAME=$1 | |
| FILESTEM=`echo "$FILENAME" | cut -d'.' -f1` | |
| NUM_FILES=$2 | |
| # Hack to get the right number of files | |
| NUM_FILES=$((NUM_FILES-1)) | |
| LC=`wc -l $FILENAME | awk '{print $1}'` | |
| LC_FILE=$((LC / NUM_FILES)) | |
| HDR=$(head -1 $FILENAME) # Pick up CSV header line to apply to each file | |
| echo 'Going to split the files now ...' | |
| sed '1d' $FILENAME > $FILENAME.tmp | |
| split -l $LC_FILE $FILENAME.tmp Part # Split the file into chunks of 20 lines each | |
| n=1 | |
| echo 'Adding the headers to each file' | |
| for f in Part* # Go through all newly created chunks | |
| do | |
| echo $HDR > ${FILESTEM}-Part${n}.csv # Write out header to new file called "Part(n)" | |
| cat $f >> ${FILESTEM}-Part${n}.csv # Add in the 20 lines from the "split" command | |
| rm $f # Remove temporary file | |
| echo 'Finished Processing File: ' $n | |
| ((n++)) # Increment name of output part | |
| done | |
| rm $FILENAME.tmp | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment