Skip to content

Instantly share code, notes, and snippets.

@icio
Created March 19, 2026 19:28
Show Gist options
  • Select an option

  • Save icio/7e65e2643ee24deb8f2dd6d06aa99396 to your computer and use it in GitHub Desktop.

Select an option

Save icio/7e65e2643ee24deb8f2dd6d06aa99396 to your computer and use it in GitHub Desktop.
textblock
#!/bin/sh
# textblock - extract indentation-delimited (or pattern-delimited) blocks from text
set -e
usage() {
cat <<'EOF'
Usage: textblock [OPTIONS] START [file ...]
Split text into blocks. A block begins on any line matching START and, by
default, continues on subsequent lines whose indentation is greater than
the start line. With -e, blocks instead continue until an end-pattern is
matched.
Required:
START Regex to identify the start of a block
Options:
-n Number each output block
-q Output each block on a single shell-quoted line
-g Do not start a new block while already inside one
-m PATTERN Only include blocks whose full text matches PATTERN
-s PATTERN Exclude blocks if any line matches PATTERN
-e PATTERN Keep block open until a line matches PATTERN
-h, --help Show this help
EOF
}
# --- parse options ---
opt_n=0
opt_q=0
opt_g=0
opt_m=""
opt_s=""
opt_e=""
while [ $# -gt 0 ]; do
case "$1" in
-h|--help) usage; exit 0 ;;
-n) opt_n=1; shift ;;
-q) opt_q=1; shift ;;
-g) opt_g=1; shift ;;
-m) opt_m="$2"; shift 2 ;;
-s) opt_s="$2"; shift 2 ;;
-e) opt_e="$2"; shift 2 ;;
--) shift; break ;;
-*) echo "textblock: unknown option: $1" >&2; usage >&2; exit 1 ;;
*) break ;;
esac
done
if [ $# -lt 1 ]; then
echo "textblock: missing START pattern" >&2
usage >&2
exit 1
fi
start_pat="$1"
shift
# remaining args are files (or empty => stdin)
exec awk \
-v start_pat="$start_pat" \
-v opt_n="$opt_n" \
-v opt_q="$opt_q" \
-v opt_g="$opt_g" \
-v opt_m="$opt_m" \
-v opt_s="$opt_s" \
-v opt_e="$opt_e" \
'
function indent_of(line, i, c) {
i = 0
while (i < length(line)) {
c = substr(line, i + 1, 1)
if (c == " " || c == "\t")
i++
else
break
}
return i
}
# Shell-quote a string using $'"'"'...'"'"' syntax
function shell_quote(s, out, i, c, n, SQ) {
SQ = "\047"
if (s == "") return "$" SQ SQ
out = ""
n = length(s)
for (i = 1; i <= n; i++) {
c = substr(s, i, 1)
if (c == "\\") out = out "\\\\"
else if (c == SQ) out = out "\\" SQ
else if (c == "\n") out = out "\\n"
else if (c == "\t") out = out "\\t"
else if (c == "\r") out = out "\\r"
else out = out c
}
return "$" SQ out SQ
}
function flush_block( i, text) {
if (blk_len == 0) return
# -s: exclude if any line matches
if (opt_s != "") {
for (i = 0; i < blk_len; i++) {
if (blk[i] ~ opt_s) {
reset_block()
return
}
}
}
# -m: include only if the full block text matches
if (opt_m != "") {
text = ""
for (i = 0; i < blk_len; i++) {
if (i > 0) text = text "\n"
text = text blk[i]
}
if (text !~ opt_m) {
reset_block()
return
}
}
# separate successive blocks with a blank line (unless -q)
if (block_count > 0 && opt_q == 0) printf "\n"
block_count++
if (opt_q) {
text = ""
for (i = 0; i < blk_len; i++) {
if (i > 0) text = text "\n"
text = text blk[i]
}
print shell_quote(text)
} else if (opt_n) {
printf "%6d\t%s\n", block_count, blk[0]
for (i = 1; i < blk_len; i++) {
printf "\t%s\n", blk[i]
}
} else {
for (i = 0; i < blk_len; i++) {
print blk[i]
}
}
reset_block()
}
function reset_block() {
blk_len = 0
in_block = 0
base_indent = -1
}
BEGIN {
in_block = 0
blk_len = 0
base_indent = -1
block_count = 0
}
{
line = $0
ind = indent_of(line)
if (in_block) {
# -e mode: block ends when end-pattern matches
if (opt_e != "") {
if (line ~ opt_e) {
flush_block()
# End-pattern line is NOT part of the closed block,
# but it might start a new block.
if (line ~ start_pat) {
in_block = 1
base_indent = ind
blk[0] = line
blk_len = 1
}
next
}
if (opt_g == 0 && line ~ start_pat) {
flush_block()
in_block = 1
base_indent = ind
blk[0] = line
blk_len = 1
next
}
blk[blk_len++] = line
next
}
# Default (indent) mode
if (ind > base_indent) {
if (opt_g == 0 && line ~ start_pat) {
flush_block()
in_block = 1
base_indent = ind
blk[0] = line
blk_len = 1
next
}
blk[blk_len++] = line
next
}
# Indentation <= base_indent: block ends
flush_block()
# Fall through to check for new block start
}
if (line ~ start_pat) {
in_block = 1
base_indent = ind
blk[0] = line
blk_len = 1
}
}
END {
flush_block()
}
' "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment