Skip to content

Instantly share code, notes, and snippets.

@sfinktah
Last active September 20, 2023 16:36
Show Gist options
  • Save sfinktah/a432630706393d7bbe51f01508805cc6 to your computer and use it in GitHub Desktop.
Save sfinktah/a432630706393d7bbe51f01508805cc6 to your computer and use it in GitHub Desktop.
string_between bash function
unset EXPLODED
declare -a EXPLODED
function explode
{
local c=$#
(( c < 2 )) &&
{
echo function "$0" is missing parameters
echo "$0 <delimiter> <string> <limit>"
echo "result is in EXPLODED"
return 1
}
local delimiter="$1"
local string="$2"
local limit=${3-99}
local tmp_delim=$'\x07'
local delin=${string//$delimiter/$tmp_delim}
local oldifs="$IFS"
IFS="$tmp_delim"
EXPLODED=($delin)
IFS="$oldifs"
}
. include upvars explode
strpos() {
haystack=$1
needle=${2//\*/\\*}
x="${haystack%%$needle*}"
[[ "$x" = "$haystack" ]] && { echo -1; return 1; } || echo "${#x}"
}
strrpos() {
haystack=$1
needle=${2//\*/\\*}
x="${haystack%$needle*}"
[[ "$x" = "$haystack" ]] && { echo -1; return 1 ;} || echo "${#x}"
}
startswith() {
haystack=$1
needle=${2//\*/\\*}
x="${haystack#$needle}"
[[ "$x" = "$haystack" ]] && return 1 || return 0
}
endswith() {
haystack=$1
needle=${2//\*/\\*}
x="${haystack%$needle}"
[[ "$x" = "$haystack" ]] && return 1 || return 0
}
_string_find_common() {
# def _string_find(haystack, needle, start=0, end=None):
local func=$1; shift || invalid_args
local haystack=$1; shift || invalid_args
local needle=$1; shift || invalid_args
local -i start=${1:-0}
local -i end=${2:-${#haystack}}
# declare -p haystack needle start end
NEEDLE_LEN=${#needle}
NEEDLE_POS=$( "$func" "${haystack:$start:$end}" "$needle" )
(( NEEDLE_POS += start ))
# return len(needle), haystack.find(needle, start, end)
}
_string_find() {
_string_find_common strpos "$@"
}
_string_rfind() {
_string_find_common strrpos "$@"
}
invalid_args() {
cat <<'END'
string_between: string_between [-v var] left right subject [start=0]
[end=None] [repl=None] [inclusive=0]
[greedy=0] [rightmost=0] [retn_all_on_fail=0]
Return the string between `left` and `right`.
Return the substring `sub` delineated by being between the
strings `left` and `right`, and optionally such that sub is
contained within subject[start:end]. `start` and `end` are
interpreted as python slice notation.
If -v is not supplied, `sub` is stored in the REPLY variable.
If -s is specified, the result is returned on the standard
output instead of in a variable.
Options:
-v var assign the result to shell variable VAR
-i file read input from file or '-' to read from
the standard input
-s display the result on the standard output
-- end -option processing
If no substring is found, and either the replace option `repl`
or `retn_all_on_fail` option are specified, `subject` is
returned.
If the substring is found, and `repl` is specified, the result
will be `subject` with `sub` replaced by `repl`, otherwise
the matching substring `sub` is returned.
Exit Status:
The return code is always zero.
Example:
$ string_between "the" "with" "thecatinthematwiththehatwithoutme" \
greedy=1 inclusive=1
$ echo $REPLY
thecatinthematwiththehatwith
END
return 1
}
string_between ()
{
cat > /dev/null <<END
string_between(left, right, subject, [,start [,end]] [,greedy=False] [,inclusive=False] [,repl=None] [,retn_all_on_fail=False] [,retn_class=False] [,rightmost=False]) -> str
Return the substring sub delineated by being between the
strings \`left\` and \`right\`, and such that sub is contained
within subject[start:end]. Optional default_arguments start and
end are interpreted as in slice notation.
Return the string between \`left\` and \`right\` or empty string on failure
@param left str|re|list: left anchor, or '' for start of subject; regex must be compiled
@param right str|re|list: right anchor, or '' for end of subject; regex must be compiled
@param subject: string to be searched
@param start: start index for search
@param end: start and end are interpreted as in slice notation.
@param greedy: match biggest span possible
@param inclusive: include anchors in result
@param repl [str|callable]: replace span with string (or callable)
@param retn_all_on_fail: return original string if match not made
@param retn_class: return result as StringBetweenResult object
@param rightmost: match rightmost span possible by greedily searching for \`left\`; implies \`greedy\`
@return matched span | modified string | original string | empty StringBetweenResult object
Note: regular expressions must be compiled
If left and right are lists, then string_between() takes a value from
each list and uses them as left and right on subject. If right has
fewer values than left, then an empty string is used for the rest of
replacement values. The converse applies. If left is a list and right
is a string, then this replacement string is used for every value of left.
The converse also applies.
Examples:
---------
>>> s = 'The *quick* brown [fox] jumps _over_ the **lazy** [dog]'
>>> string_between('[', ']', s)
'fox'
>>> string_between('[', ']', s, inclusive=True)
'[fox]'
>>> string_between('[', ']', s, rightmost=True)
'dog'
>>> string_between('[', ']', s, inclusive=True, greedy=True)
'[fox] jumps _over_ the **lazy** [dog]'
END
local -A default_arguments=([start]=0 [end]=None [repl]=None [inclusive]=0 [greedy]=0 [rightmost]=0 [retn_all_on_fail]=0 [retn_class]=0 [upvar]=REPLY [infile]=)
local option_processing=1;
while (( option_processing )); do
case "$1" in
-v)
shift;
default_arguments[upvar]=$1;
shift
;;
-s)
default_arguments[upvar]=/dev/stdout;
shift
;;
-i)
shift;
default_arguments[infile]=$1;
shift
;;
--)
option_processing=0;
shift
;;
*)
option_processing=0
;;
esac;
done;
local left=$1;
shift || invalid_args;
local right=$1;
shift || invalid_args;
local subject
if [ -n "${default_arguments[infile]}" ]; then
if [ "${default_arguments[infile]}" = "-" ]; then
default_arguments[infile]="/dev/stdin"
fi
subject=$(cat "${default_arguments[infile]}"; echo -ne '\r'); subject=${subject%\r}
else
subject=$1;
shift || invalid_args;
fi
ROFFSET=;
while :; do
arg=$1;
shift || break;
explode "=" "$arg";
local -i len=${#EXPLODED[@]};
(( len == 2 )) || {
echo "trailing or invalid arguments";
break
};
k=${EXPLODED[0]};
v=${EXPLODED[1]};
[[ ${default_arguments[$k]} == '' ]] && {
echo "invalid argument key";
break
};
default_arguments[$k]=$v;
done;
for KEY in "${!default_arguments[@]}";
do
VALUE=${default_arguments[$KEY]};
local "$KEY"="$VALUE";
done;
local -i subject_len=${#subject};
local -i llen;
local -i rlen;
local -i l=-2;
local -i r;
(( r = subject_len - start ));
[[ $end == "None" ]] && end=$subject_len;
if (( rightmost )); then
greedy=1;
if [[ right == '' ]]; then
_string_rfind "$subject" "$left" "$start" "$end";
llen=$NEEDLE_LEN;
l=$NEEDLE_POS;
fi;
fi;
if (( l == -2 )); then
_string_find "$subject" "$left" "$start" "$end";
llen=$NEEDLE_LEN;
l=$NEEDLE_POS;
fi;
if (( l == -1 )); then
if [[ $repl != "None" || $retn_all_on_fail != 0 ]]; then
result "${default_arguments[upvar]}" "${subject:$l}";
OFFSET=$l;
return;
fi;
result "${default_arguments[upvar]}" "";
OFFSET=$l;
return;
fi;
if [[ $right != "" ]]; then
if (( greedy )); then
_string_rfind "$subject" "$right" "$start" "$end";
rlen=$NEEDLE_LEN;
r=$NEEDLE_POS;
if (( rightmost && r > -1 )); then
_string_rfind "$subject" "$left" "$start" "$r";
llen=$NEEDLE_LEN;
l=$NEEDLE_POS;
fi;
else
_string_find "$subject" "$right" "$(( l + llen ))" "$end";
rlen=$NEEDLE_LEN;
r=$NEEDLE_POS;
fi;
else
rlen=0;
fi;
if (( r == -1 || r < ( l + llen ) )); then
if [[ $repl != "None" || retn_all_on_fail != 0 ]]; then
result "${default_arguments[upvar]}" "$subject";
OFFSET=$l;
ROFFSET=$r;
return;
fi;
result "${default_arguments[upvar]}" "";
OFFSET=$l;
ROFFSET=$r;
return;
fi;
if (( inclusive && r )); then
(( r += rlen ));
else
(( l += llen ));
fi;
if [[ $repl == "None" ]]; then
result "${default_arguments[upvar]}" "${subject:$l:$((r-l))}";
OFFSET=$l;
ROFFSET=$r;
return;
fi;
result "${default_arguments[upvar]}" "${subject:0:$l}${repl}${subject:$r}";
OFFSET=$l;
ROFFSET=$r;
return
}
# vim: set ts=4 sts=0 sw=0 et:
result () {
local __name=$1
local __value=$2
if startswith "$__name" /; then
echo "$__value"
else
local $__name && upvar $__name "$__value"
fi
}
string_between_pipe() {
exec 3<&0
while IFS='$\n' read -u 3 -r line || [[ -n "$line" ]]; do
string_between -s "$@" "$line"
done
}
# string_between "c" "t" "thecatinthematwiththehat" start=7 end=9 greedy=1 why=9
# string_between "the" "with" "thecatinthematwiththehatwithoutme" greedy=1 inclusive=1
# string_between "the" "" "thecatinthematwiththehatwithoutme" rightmost=1
# string_between "the" "" "thecatinthematwiththehatwithoutme"
# declare -p REPLY OFFSET ROFFSET
upvar_assoc() {
getarg array_name
get_array_by_ref
if unset -v "$array_name"; then # Unset & validate varname
if (( $# == 2 )); then
eval $array_name=\"\$e\" # Return single value
else
throw exception $FUNCNAME invalid argument count
fi
fi
}
# Assign variable one scope above the caller
# Usage: local "$1" && upvar $1 "value(s)"
# Param: $1 Variable name to assign value to
# Param: $* Value(s) to assign. If multiple values, an array is
# assigned, otherwise a single value is assigned.
# NOTE: For assigning multiple variables, use 'upvars'. Do NOT
# use multiple 'upvar' calls, since one 'upvar' call might
# reassign a variable to be used by another 'upvar' call.
# See: http://fvue.nl/wiki/Bash:_Passing_variables_by_reference
upvar() {
if unset -v "$1"; then # Unset & validate varname
if (( $# == 2 )); then
eval $1=\"\$2\" # Return single value
else
eval $1=\(\"\${@:2}\"\) # Return array
fi
fi
}
# Appendix B: upvars.sh
# Assign variables one scope above the caller
# Usage: local varname [varname ...] &&
# upvars [-v varname value] | [-aN varname [value ...]] ...
# Available OPTIONS:
# -aN Assign next N values to varname as array
# -v Assign single value to varname
# Return: 1 if error occurs
# See: http://fvue.nl/wiki/Bash:_Passing_variables_by_reference
upvars() {
if ! (( $# )); then
echo "${FUNCNAME[0]}: usage: ${FUNCNAME[0]} [-v varname value] | [-aN varname [value ...]] ..." 1>&2
return 2
fi
while (( $# )); do
case $1 in
-a*)
# Error checking
[[ ${1#-a} ]] || { echo "bash: ${FUNCNAME[0]}: \`$1': missing number specifier" 1>&2; return 1; }
printf %d "${1#-a}" &> /dev/null || { echo "bash: ${FUNCNAME[0]}: \`$1': invalid number specifier" 1>&2
return 1; }
# Assign array of -aN elements
[[ "$2" ]] && unset -v "$2" && eval $2=\(\"\${@:3:${1#-a}}\"\) &&
shift $((${1#-a} + 2)) || { echo "bash: ${FUNCNAME[0]}: \`$1${2+ }$2': missing argument(s)" 1>&2; return 1; }
;;
-v)
# Assign single value
[[ "$2" ]] && unset -v "$2" && eval $2=\"\$3\" &&
shift 3 || { echo "bash: ${FUNCNAME[0]}: $1: missing argument(s)" 1>&2; return 1; }
;;
--help) echo "\
Usage: local varname [varname ...] &&
${FUNCNAME[0]} [-v varname value] | [-aN varname [value ...]] ...
Available OPTIONS:
-aN VARNAME [value ...] assign next N values to varname as array
-v VARNAME value assign single value to varname
--help display this help and exit
--version output version information and exit"
return 0 ;;
--version) echo "\
${FUNCNAME[0]}-0.9.dev
Copyright (C) 2010 Freddy Vulto
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law."
return 0 ;;
*)
echo "bash: ${FUNCNAME[0]}: $1: invalid option" 1>&2
return 1 ;;
esac
done
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment