Created
March 19, 2026 19:28
-
-
Save icio/7e65e2643ee24deb8f2dd6d06aa99396 to your computer and use it in GitHub Desktop.
textblock
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| # textblock - extract indentation-delimited (or pattern-delimited) blocks from text | |
| set -e | |
| usage() { | |
| cat <<'EOF' | |
| Usage: textblock [OPTIONS] START [file ...] | |
| Split text into blocks. A block begins on any line matching START and, by | |
| default, continues on subsequent lines whose indentation is greater than | |
| the start line. With -e, blocks instead continue until an end-pattern is | |
| matched. | |
| Required: | |
| START Regex to identify the start of a block | |
| Options: | |
| -n Number each output block | |
| -q Output each block on a single shell-quoted line | |
| -g Do not start a new block while already inside one | |
| -m PATTERN Only include blocks whose full text matches PATTERN | |
| -s PATTERN Exclude blocks if any line matches PATTERN | |
| -e PATTERN Keep block open until a line matches PATTERN | |
| -h, --help Show this help | |
| EOF | |
| } | |
| # --- parse options --- | |
| opt_n=0 | |
| opt_q=0 | |
| opt_g=0 | |
| opt_m="" | |
| opt_s="" | |
| opt_e="" | |
| while [ $# -gt 0 ]; do | |
| case "$1" in | |
| -h|--help) usage; exit 0 ;; | |
| -n) opt_n=1; shift ;; | |
| -q) opt_q=1; shift ;; | |
| -g) opt_g=1; shift ;; | |
| -m) opt_m="$2"; shift 2 ;; | |
| -s) opt_s="$2"; shift 2 ;; | |
| -e) opt_e="$2"; shift 2 ;; | |
| --) shift; break ;; | |
| -*) echo "textblock: unknown option: $1" >&2; usage >&2; exit 1 ;; | |
| *) break ;; | |
| esac | |
| done | |
| if [ $# -lt 1 ]; then | |
| echo "textblock: missing START pattern" >&2 | |
| usage >&2 | |
| exit 1 | |
| fi | |
| start_pat="$1" | |
| shift | |
| # remaining args are files (or empty => stdin) | |
| exec awk \ | |
| -v start_pat="$start_pat" \ | |
| -v opt_n="$opt_n" \ | |
| -v opt_q="$opt_q" \ | |
| -v opt_g="$opt_g" \ | |
| -v opt_m="$opt_m" \ | |
| -v opt_s="$opt_s" \ | |
| -v opt_e="$opt_e" \ | |
| ' | |
| function indent_of(line, i, c) { | |
| i = 0 | |
| while (i < length(line)) { | |
| c = substr(line, i + 1, 1) | |
| if (c == " " || c == "\t") | |
| i++ | |
| else | |
| break | |
| } | |
| return i | |
| } | |
| # Shell-quote a string using $'"'"'...'"'"' syntax | |
| function shell_quote(s, out, i, c, n, SQ) { | |
| SQ = "\047" | |
| if (s == "") return "$" SQ SQ | |
| out = "" | |
| n = length(s) | |
| for (i = 1; i <= n; i++) { | |
| c = substr(s, i, 1) | |
| if (c == "\\") out = out "\\\\" | |
| else if (c == SQ) out = out "\\" SQ | |
| else if (c == "\n") out = out "\\n" | |
| else if (c == "\t") out = out "\\t" | |
| else if (c == "\r") out = out "\\r" | |
| else out = out c | |
| } | |
| return "$" SQ out SQ | |
| } | |
| function flush_block( i, text) { | |
| if (blk_len == 0) return | |
| # -s: exclude if any line matches | |
| if (opt_s != "") { | |
| for (i = 0; i < blk_len; i++) { | |
| if (blk[i] ~ opt_s) { | |
| reset_block() | |
| return | |
| } | |
| } | |
| } | |
| # -m: include only if the full block text matches | |
| if (opt_m != "") { | |
| text = "" | |
| for (i = 0; i < blk_len; i++) { | |
| if (i > 0) text = text "\n" | |
| text = text blk[i] | |
| } | |
| if (text !~ opt_m) { | |
| reset_block() | |
| return | |
| } | |
| } | |
| # separate successive blocks with a blank line (unless -q) | |
| if (block_count > 0 && opt_q == 0) printf "\n" | |
| block_count++ | |
| if (opt_q) { | |
| text = "" | |
| for (i = 0; i < blk_len; i++) { | |
| if (i > 0) text = text "\n" | |
| text = text blk[i] | |
| } | |
| print shell_quote(text) | |
| } else if (opt_n) { | |
| printf "%6d\t%s\n", block_count, blk[0] | |
| for (i = 1; i < blk_len; i++) { | |
| printf "\t%s\n", blk[i] | |
| } | |
| } else { | |
| for (i = 0; i < blk_len; i++) { | |
| print blk[i] | |
| } | |
| } | |
| reset_block() | |
| } | |
| function reset_block() { | |
| blk_len = 0 | |
| in_block = 0 | |
| base_indent = -1 | |
| } | |
| BEGIN { | |
| in_block = 0 | |
| blk_len = 0 | |
| base_indent = -1 | |
| block_count = 0 | |
| } | |
| { | |
| line = $0 | |
| ind = indent_of(line) | |
| if (in_block) { | |
| # -e mode: block ends when end-pattern matches | |
| if (opt_e != "") { | |
| if (line ~ opt_e) { | |
| flush_block() | |
| # End-pattern line is NOT part of the closed block, | |
| # but it might start a new block. | |
| if (line ~ start_pat) { | |
| in_block = 1 | |
| base_indent = ind | |
| blk[0] = line | |
| blk_len = 1 | |
| } | |
| next | |
| } | |
| if (opt_g == 0 && line ~ start_pat) { | |
| flush_block() | |
| in_block = 1 | |
| base_indent = ind | |
| blk[0] = line | |
| blk_len = 1 | |
| next | |
| } | |
| blk[blk_len++] = line | |
| next | |
| } | |
| # Default (indent) mode | |
| if (ind > base_indent) { | |
| if (opt_g == 0 && line ~ start_pat) { | |
| flush_block() | |
| in_block = 1 | |
| base_indent = ind | |
| blk[0] = line | |
| blk_len = 1 | |
| next | |
| } | |
| blk[blk_len++] = line | |
| next | |
| } | |
| # Indentation <= base_indent: block ends | |
| flush_block() | |
| # Fall through to check for new block start | |
| } | |
| if (line ~ start_pat) { | |
| in_block = 1 | |
| base_indent = ind | |
| blk[0] = line | |
| blk_len = 1 | |
| } | |
| } | |
| END { | |
| flush_block() | |
| } | |
| ' "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment