Last active
November 7, 2019 08:53
-
-
Save bokwoon95/4ef6e5b130403ea5cf458b5f12c89bd2 to your computer and use it in GitHub Desktop.
joinlines.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
SCRIPT_PATH=${BASH_SOURCE[0]} | |
SCRIPT_NAME=${SCRIPT_PATH##*/} | |
SCRIPT_DIR="$(cd "$(dirname "${SCRIPT_PATH:-$PWD}")" 2>/dev/null 1>&2 && pwd)" | |
HELP=$(cat <<EOF | |
Usage: $SCRIPT_NAME <file.csv> | |
Joins lines in a csv file that are meant to be one line. If CRLF + LF | |
terminated lines are present, this script will join all LF terminated lines. | |
If CRLF + CR + LF lines are present, this script will join all CR terminated | |
lines. This may or may not be the desired behavior, but has been the | |
observed pattern so far. | |
If $SCRIPT_NAME was unsuccessful in joining the lines, it will leave a copy | |
of the modified file behind for inspection. Else, it will overwrite the | |
original file with the modified copy. | |
EOF | |
) | |
argc="$#";: "$((i=0))" | |
while [ "$i" -lt "$argc" ]; do | |
if [ "$1" = '--help' ] || [ "$1" = '-h' ] ; then | |
echo "$HELP" | |
exit 0 | |
elif [ "$1" = '--cr' ]; then | |
JOIN_CR='true' | |
elif [ "$1" = '--lf' ]; then | |
JOIN_LF='true' | |
else | |
FILE="$1" | |
fi | |
shift;: "$((i=i+1))" | |
done | |
# printf "Checking for (CRLF + LF) or (CRLF + CR + LF) line terminators in $FILE... " | |
# if file "$FILE" | grep 'CRLF, LF' >/dev/null 2>&1; then | |
# JOIN_LF='true' | |
# printf "CRLF + LF found" | |
# elif file "$FILE" | grep 'CRLF, CR, LF' >/dev/null 2>&1; then | |
# JOIN_CR='true' | |
# printf "CRLF + CR + LF found" | |
# else | |
# JOIN_NOTHING='true' | |
# fi | |
# printf "\n" | |
# if [ "$JOIN_NOTHING" ]; then | |
# echo "$FILE does not have (CRLF + LF) nor (CRLF + CR + LF) line terminators present, exiting" | |
# exit 0 | |
# fi | |
if [ ! "$JOIN_CR" ] && [ ! "$JOIN_LF" ]; then | |
echo 'please specify either join by --cr (^M) or --lf' | |
exit 1 | |
fi | |
RANDSTRING="$(date +%s)$(< /dev/urandom base64 | head -c10 | sed 's/\//+/')" | |
echo "making a copy of $FILE to $FILE.$RANDSTRING..." | |
cp "$FILE" "$FILE.$RANDSTRING" || exit 1 | |
echo "modifying $FILE.$RANDSTRING..." | |
if [ "$JOIN_LF" ]; then | |
vim -Es \ | |
-c 'g/[^^M]$/.,/^M$/join' \ | |
-c 'g/[^^M]$/print' \ | |
-c '%s/^M$//g' \ | |
-c 'wq' \ | |
"$FILE.$RANDSTRING" | |
elif [ "$JOIN_CR" ]; then | |
vim -Es \ | |
-c 'g/^M$/.,/[^^M]$/join' \ | |
-c 'g/^M$/print' \ | |
-c 'wq' \ | |
"$FILE.$RANDSTRING" | |
fi | |
if [ "$JOIN_LF" ] && file "$FILE.$RANDSTRING" | grep 'CRLF, LF' >/dev/null 2>&1; then | |
echo "did not join lines successfully, leaving $FILE.$RANDSTRING behind" | |
exit 1 | |
fi | |
echo "moving $FILE.$RANDSTRING back into $FILE" | |
mv "$FILE.$RANDSTRING" "$FILE" || exit 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment