Last active
December 14, 2015 09:09
-
-
Save hypersoft/5062877 to your computer and use it in GitHub Desktop.
Hand me your stream beginning with the first TOKEN, and I will return to you, the
content of your first TOKEN, the data that lies between the first TOKEN, and the
matching closing TOKEN or, I will return detailed data fields on What Went Wrong,
along with full content of the buffer I have searched through.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parse.nested.block() # char start, char end[, buffer] | |
{ declare \ | |
_Usage=" | |
USAGE: $FUNCNAME [-u|-h|--help|-c|--chained] START END [BUFFER] | |
" \ | |
_Help=" | |
My name is \`$FUNCNAME' I was constructed by Triston J. Taylor (pc.wiz.tt) | |
as a generic method to parse nested blocks at my standard input on the | |
28th of February 2013. | |
TOKENS: START, END | |
I require my first character of input to be the START TOKEN. | |
I require 2 character TOKENS to pair and search for. These TOKENS may not match. | |
I support backslash escaped anything I would interpret as a character TOKEN. | |
I do not eat backslashes, I serve them. | |
CHAINED: MODE | |
I will not require the START TOKEN to be the first item of input, nor will it be | |
the first item of buffered output. | |
BUFFER | |
I accept a previously defined bash variable label as an optional output buffer. | |
I write all data to standard out (and possibly standard error as well) if you do | |
not instruct me to write to your buffer. | |
RETURN | |
I have 2 modes of error reporting. | |
Buffered, which simply prepends a single line CSV Error Key to the output buffer, | |
and Unbuffered, which generates a generic error report on standard error. | |
My CSV Error Keys consist of the following fields: | |
1.) stream index: byte position in stream at which block begins | |
2.) stream line: line number in stream at which block begins | |
3.) stream column: column number at stream line where block begins | |
ERROR STATUS | |
1. Your parameters are incorrect | |
2. Incorrect block delimiter | |
3. Unterminated block | |
An error status lower than level 2 generates no buffer modifications or output, | |
as no data is read or stored from input. | |
"; | |
[[ $1 == -u ]] && { printf %s "$_Usage"; return; } | |
[[ $1 == -h || $1 == --help ]] && { printf %s " ${_Usage%$'\n'}$_Help"; return; } | |
declare -i chained=0; | |
[[ $1 == -c || $1 == --chained ]] && { chained=1; shift; } | |
(( $# >= 2 )) || { $FUNCNAME -u; return 1; }; | |
declare _Error _From _BlockStart="${1:0:1}" _BlockEnd="${2:0:1}"; | |
[[ "$3" =~ ^_[[:upper:]] ]] && { | |
printf "$FUNCNAME: error: %s\\n" "$3 is a reserved buffer name" >&2; | |
return 1; | |
} | |
[[ "$_BlockStart" == "$_BlockEnd" ]] && { | |
printf "$FUNCNAME: error: %s\\n" "matching block delimiters not supported" >&2; | |
return 1; | |
} | |
(( ${#1} == 1 )) || { | |
printf "$FUNCNAME: error: %s\\n" "parameter 1 is not of type char" >&2; | |
return 1; | |
} | |
(( ${#2} == 1 )) || { | |
printf "$FUNCNAME: error: %s\\n" "parameter 2 is not of type char" >&2; | |
return 1; | |
} | |
(( $# == 3 )) && { | |
# check if user declared their buffer | |
declare -p $3 2>&1 >/dev/null || { | |
printf "$FUNCNAME: error: %s\\n" "unbound output buffer" >&2; | |
return 1; | |
} | |
} | |
{ read _Error _From; IFS='' read -rN0 $3; } < <( | |
declare char buffer from=0,1,1; declare -i index=-1 line=1 column=1 depth=0; | |
get () { IFS='' read -rN1 $1 && { let ++index ++column; buffer+="${!1}"; }; }; | |
next() { from="$from:$index,$line,$column"; let ++depth; }; | |
previous() { from="${from%:*}"; let --depth; } | |
status () { printf -- "$1 ${from##*:}\\n%s" "$buffer"; return $1; } | |
(( chained == 1 )) && { let ++index ++column; next; } || { | |
if get char; then | |
if [[ "$char" != "$_BlockStart" ]]; then status 2; exit; fi; | |
next; | |
fi; | |
} | |
while get char; do | |
[[ "$char" == $'\n' ]] && { let column=1 line++; continue; } | |
[[ "$char" == \\ ]] && { get char || break; continue; } | |
[[ "$char" == "${_BlockStart}" ]] && { next; continue; } | |
[[ "$char" == "${_BlockEnd}" ]] && { previous || break; continue; } | |
done; | |
(( ! depth )) || { status 3; exit; } | |
printf -- '0\n%s' "$buffer"; | |
); | |
if [[ -n $3 ]]; then | |
(( _Error )) && { | |
IFS='' read -rN0 $3 < <(echo "$_From"; printf %s "${!3}"); | |
} | |
else | |
(( _Error )) && { | |
(IFS=,; printf "$FUNCNAME: error: "'stream[index]=%i stream[line]=%i stream[column]=%i\n' $_From) >&2; | |
} | |
printf %s "$REPLY"; | |
# if the output is terminal, and the data, does not have a newline, affix one | |
[[ -t 1 ]] && { [[ "${REPLY:$((${#REPLY} - 1))}" == $'\n' ]] || echo ''; }; | |
fi; | |
return $_Error; | |
} |
Revision 4 fixes a bash word splitting bug.
Revision 5 adds missing lines to terminal output for prompt/environment compatibility.
Revision 6 adds minor improvements to the source documentation.
Revision 7 Complements github's poorly written syntax highlighter's ability to cope with single quoted backslash characters.
Revision 8 Adds chained mode, which assumes you have already processed the start token.
Revision 9 Sacrifices arrayed, index buffering for simplified bound checking, and error reporting has been made more reliable.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ABOUT
Typically I would be used to retrieve the extent of a text based object's contents,
such as a bash function body:
In the above example, you would start block parsing by first, parsing your function
header
function gotme ()', and absorbing any whitespace after the football:
()'.Hand me your stream beginning with the first TOKEN, and I will return to you, the
content of your first TOKEN, the data that lies between the first TOKEN, and the
matching closing TOKEN or, I will return detailed data fields on What Went Wrong,
along with full content of the buffer I have searched through.