Last active
November 8, 2018 17:36
-
-
Save sathish-kumar-subramani/f5464bd8c289099cba31150e5e8b713d to your computer and use it in GitHub Desktop.
Split csv file into multiple csv files usind sed, retaining header
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
fileName=$1 | |
maxLines=$2 | |
echo "File name: $fileName" | |
echo "Max Lines: $maxLines" | |
getGotalLinesCommand="cat $fileName | wc -l" | |
totalLines="$(eval $getGotalLinesCommand)" | |
let totalLines-- | |
echo "Number of Rows: $totalLines" | |
if [ $totalLines -lt 2 ] || [ $maxLines -lt 1 ] | |
then | |
echo "Exiting" | |
exit 1 | |
fi | |
let totalFiles=totalLines/maxLines | |
if [ $(expr $totalLines % $maxLines) -gt 0 ] | |
then | |
let totalFiles++ | |
fi | |
echo "Total files: $totalFiles" | |
extractHeaderCommand="sed -n '1,1p;2q' $fileName" | |
header="$(eval $extractHeaderCommand)" | |
echo "Header: $header" | |
let startLine=2 | |
let "endLine = $startLine + $maxLines - 2" | |
for ((i=1; i <= $totalFiles; i++)) | |
do | |
newFileName=${fileName%.csv}-$i.csv | |
echo $header > $newFileName | |
sed -n "$startLine,${endLine}p" $fileName >> $newFileName | |
echo "$newFileName Start: $startLine | End: $endLine | Total rows written: $( expr $endLine - $startLine + 1 )" | |
let "startLine = $startLine + $maxLines - 1" | |
let "endLine = $startLine + $maxLines - 2" | |
let "quitLine = $endLine + 1" | |
if [ $i -eq $( expr $totalFiles - 1 ) ] | |
then | |
let "endLine = $totalLines" | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment