Last active
December 1, 2015 00:52
-
-
Save amosr/ec51e33a52e519fc16f3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## $FILES is names of all files to be split | |
| ## splits files into two | |
| # find largest file | |
| largest=`ls $FILES -l | sort -k5 -nr | head -n1 | awk '{ print $9 }'` | |
| # count how many lines in it | |
| lines=`wc -l $largest` | |
| # find entity at middle | |
| split_at=$((lines / 2)) | |
| # get nth line and find its entity (first field) | |
| split_ent=`cat $largest | tail -n $split_at | head -n1 | awk -F '|' '{ print $1 }'` | |
| # go through all the files (including the largest one..) | |
| for file in $FILES | |
| do | |
| # find total number of lines in the file | |
| total_lines=`wc -l $file` | |
| # and the line to split at, which is the first line of split entity | |
| split_line=`grep -n "^$split_ent|" $file | head -n1 | awk -F : '{ print $1 }'` | |
| # find all lines before split entity | |
| head -n $((split_line)) $file > $file.1 | |
| # all lines after split entity | |
| tail -n $((total_lines-split_line)) $file > $file.2 | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment