Last active
September 24, 2018 03:37
-
-
Save lox/04337c6f5e98f823075ed63a23706bbc to your computer and use it in GitHub Desktop.
Quoted string parsing in bash
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -euo pipefail | |
| input="$(cat <&0)" | |
| if [[ "${COMPARE_EVAL:-}" == "1" ]] ; then | |
| printf 'parsing of args with native bash eval:\n\n' | |
| eval "array=($input)" | |
| for token in "${array[@]}"; do | |
| printf '[%q]\n' "$token" | |
| done | |
| echo | |
| fi | |
| result=() | |
| tokenize_shell_string() { | |
| local input="$1" | |
| local token='' | |
| local escape='' | |
| local quote='' | |
| # Process the input character by character | |
| for (( i=0 ; i<=${#input} ; i++ )) ; do | |
| c=${input:i:1} | |
| # Handle an escaped character | |
| if [[ -n "$escape" ]] ; then | |
| token+="$c" | |
| escape='' | |
| # Handles an unescaped backslash, denoting that the next char is escaped | |
| # Note that single quotes don't support escaping internally | |
| elif [[ "$c" == \\ ]] && [[ "$quote" != "'" ]] ; then | |
| escape="$c" | |
| # Handle open quotes | |
| elif [[ "$c" =~ [\"|\'] ]] && [[ -z "$quote" ]]; then | |
| quote="$c" | |
| # Handle matching closed quotes | |
| elif [[ -n "$quote" ]] && [[ "$c" == "$quote" ]]; then | |
| quote='' | |
| # Handle whitespace delimiters when outside of quotes | |
| elif [[ -z "$quote" ]] && [[ "$c" =~ [[:space:]] ]] ; then | |
| result+=("$token") | |
| token='' | |
| # End of input | |
| elif [[ $i == "${#input}" ]] && [[ -n "$token" ]] ; then | |
| result+=("$token") | |
| token='' | |
| # Any other token | |
| else | |
| # Detect unescaped dollar signs in double quotes | |
| if [[ "$c" == '$' ]] && [[ "$quote" == '"' ]] ; then | |
| return 100 | |
| # Detect shell characters when not quoted | |
| elif [[ -z "$quote" ]] && [[ "$c" =~ [][\!\#\$\&\(\)\*\;\<\>\?\\\^\`\{\}] ]] ; then | |
| return 100 | |
| fi | |
| token+="$c" | |
| fi | |
| done | |
| } | |
| if ! tokenize_shell_string "$input" ; then | |
| echo "NEEDS SHELL" | |
| exit 1 | |
| fi | |
| for str in "${result[@]}" ; do | |
| printf "[%q]\\n" "$str" | |
| done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| printf "alpaca \\'blah 'llamas\\n\\nalpa'\"'\"'cas'" | COMPARE_EVAL=1 ./bash_parser.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment