-
-
Save alganet/4dfd501a3377a60f7825901114d65c77 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ISC License | |
| # Copyright (c) 2026 Alexandre Gomes Gaigalas <alganet@gmail.com> | |
| # Permission to use, copy, modify, and/or distribute this software for any | |
| # purpose with or without fee is hereby granted, provided that the above | |
| # copyright notice and this permission notice appear in all copies. | |
| # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
| # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
| # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
| # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
| use ast_core | |
| use bnf_parser | |
| use str_core | |
| use str_esc | |
| use io_readall | |
| use ds_list | |
| use codegen_core | |
| # ============================================================ | |
| # Global Variables | |
| # ============================================================ | |
| # Phase 1 outputs (set by _bnf_gen_p1): | |
| # _GN grammar name _GP state code prefix | |
| # _GW whitespace mode _GR rule names (space-sep) | |
| # _G1 root rule name _GDC document root state code | |
| # _GST all state codes (space-sep) _GSN state code counter | |
| # _GNUM number rule name _GVALNUM number validation flag | |
| # _GSTRICT strict mode flag _GCMT comment start char | |
| # _GEXTERN extern rule names _GKW_RULE keyword ident rule | |
| # _GKW_LIST keyword list (uppercase) | |
| # _GPREC_RULE precedence rule _GPREC_N operator count | |
| # _GPREC_RULE_CODE prec rule state code _GPREC_BIN binary op state code | |
| # _GPOST_N postfix op count _GUNARY_N unary op count | |
| # _GSTR_N string accumulator count | |
| # _GTERNARY ternary enabled flag | |
| # _GTERNARY_OPEN/SEP/PREC/STATE/COLON/DONE ternary config | |
| # | |
| # Phase 1 dynamic arrays (eval-indexed by rule name or number): | |
| # _RCODE_<rule> state code per rule | |
| # _RNODE_<rule> AST node ID per rule | |
| # _FIRST_<rule> FIRST set chars per rule | |
| # _GPREC_O/P/A_<i> operator/prec/assoc (i=1.._GPREC_N) | |
| # _GUNARY_O/P_<i> unary op/prec (i=1.._GUNARY_N) | |
| # _GPOST_OPEN/INNER/CLOSE/STATE/CONT_<i> postfix config (i=1.._GPOST_N) | |
| # _GSTR_RULE/CLOSE/ESC/CODE_<i> string config (i=1.._GSTR_N) | |
| # _STR_GLOBVAR_<i> string glob var (i=1.._GSTR_N) | |
| # | |
| # Phase 2 outputs (set by _bnf_gen_p2): | |
| # _DA_CODES char codes for dispatch iteration | |
| # _DA_<code> "state=action" entries per char code | |
| # _DA_RAW_<code> raw char per code | |
| # _DA_CLASS "pattern=state=action" for char classes | |
| # _DA_WILD "state=action" for wildcard dispatch | |
| # _DA_INHERIT "target=source" for state inheritance | |
| # _GALT auto-close states (pipe-sep) | |
| # _GDONE done states (space-sep) | |
| # _GNOCOL no-collapse done states | |
| # _ACCUM_<code> accumulator type:value | |
| # _CLOSESEQ_<code> close sequences | |
| # _EXP_<code> expected-token strings | |
| # _GLOB_VARS glob var names | |
| # _GLOB_PAT_<code> glob patterns | |
| # _GNUM_CODE number rule state code | |
| # | |
| # Emit-phase shared (set in gen_bnf): | |
| # _rc document root state code | |
| # _tok token list for ast_tokens | |
| # | |
| # Cross-function return values: | |
| # REPLY standard return convention | |
| # _done_state set by _bnf_gen_alloc_done | |
| # _tq_* template queue (set by _bnf_gen_tq_build, read by _bnf_gen_emit_seq) | |
| # Resolve a state code to its rule name. Sets REPLY (empty if not found). | |
| _bnf_gen_code_to_name () { | |
| REPLY= | |
| for _r in $_GR; do | |
| eval "case \"\$_RCODE_$_r\" in \"\$1\") REPLY=\$_r; return;; esac" | |
| done | |
| } | |
| # Check if $1 is a multi-char keyword in the #!keywords list. Sets REPLY=1 or empty. | |
| _bnf_gen_is_kw () { | |
| case ${#1} in 1) REPLY=;; *) | |
| case "$_GKW_LIST" in *"$1"*) REPLY=1;; *) REPLY=;; esac;; | |
| esac | |
| } | |
| # ============================================================ | |
| # Phase 1: Grammar Extraction & State Allocation | |
| # ============================================================ | |
| _bnf_gen_code () { | |
| local _name="$1" _code | |
| local _p; case ${#_GP} in 1) _p="$_GP";; *) _p="${_GP%?}";; esac | |
| _GSN=$((_GSN + 1)) | |
| case $((_GSN > 26)) in | |
| 1) _code="${_p}$((_GSN - 26))";; | |
| *) set -- a b c d e f g h i j k l m n o p q r s t u v w x y z | |
| eval "_code=\"\$_p\${$_GSN}\"";; | |
| esac | |
| eval "_RCODE_$_name=\$_code" | |
| _GST="$_GST $_code" | |
| } | |
| # (Unary state codes allocated on demand if #!unary directives used) | |
| # ============================================================ | |
| # Phase 1c: Compute FIRST sets (fixpoint) | |
| # ============================================================ | |
| # Iterative FIRST-set computation (uses _gf_ prefix to avoid variable | |
| # conflicts with callers on ksh93 where local doesn't scope in POSIX fns) | |
| _bnf_gen_first_node () { | |
| local _gf_n=$1 _gf_t _gf_v _gf_r _gf_wk | |
| _gf_wk="$_gf_n" _gf_r= | |
| while :; do | |
| case "$_gf_wk" in '') break;; esac | |
| _gf_n="${_gf_wk%% *}"; case "$_gf_wk" in *' '*) _gf_wk="${_gf_wk#* }";; *) _gf_wk=;; esac | |
| eval "_gf_t=\"\${X$_gf_n%% *}\"; _gf_v=\"\${V$_gf_n:-}\"" | |
| case "$_gf_t" in | |
| Bt) _gf_r="$_gf_r${_gf_v%"${_gf_v#?}"}";; | |
| Bi) eval "_gf_r=\"\$_gf_r\${_FIRST_$_gf_v:-}\"";; | |
| Bc) case "$_gf_v" in '^'*) _gf_r="${_gf_r}W";; *) _gf_r="${_gf_r}[${_gf_v}]";; esac;; | |
| Ba) eval "set -- \$X$_gf_n"; shift | |
| case "$_gf_wk" in '') _gf_wk="$*";; *) _gf_wk="$* $_gf_wk";; esac;; | |
| Bq) eval "set -- \$X$_gf_n"; shift | |
| # Add children through first non-nullable to worklist | |
| # (if first child is Bo/Bm, FIRST includes next child too) | |
| local _gf_bq= | |
| for _gf_ch in "$@"; do | |
| eval "_gf_ct=\"\${X$_gf_ch%% *}\"" | |
| case "$_gf_bq" in '') _gf_bq="$_gf_ch";; *) _gf_bq="$_gf_ch $_gf_bq";; esac | |
| case "$_gf_ct" in Bo|Bm|Be) ;; *) break;; esac | |
| done | |
| case "$_gf_wk" in '') _gf_wk="$_gf_bq";; *) _gf_wk="$_gf_bq $_gf_wk";; esac;; | |
| Bo|Bm|Bs|Bk|Bp|Be) eval "set -- \$X$_gf_n"; shift | |
| case "$_gf_wk" in '') _gf_wk="$1";; *) _gf_wk="$1 $_gf_wk";; esac;; | |
| esac | |
| done | |
| REPLY="$_gf_r" | |
| } | |
| _bnf_gen_p1 () { | |
| local _ch _t _v _r _rn _str_rule _str_close _si _pi | |
| local _pop _pprec _passoc _uop _uprec _uname _post_open _post_inner | |
| local _nid _xt _max_nid _name _node _syn_id _prec_node _body_t _atom_rule | |
| local _atom_node _atom_body _atom_body_t _bt_id _bi_id _bq_id _br_id _ba_id _ref_id | |
| local _code _old _changed _pass _p | |
| # ============================================================ | |
| # Phase 1: Extract grammar info from AST | |
| # ============================================================ | |
| # Extracts directive values and rule list from the parsed BNF. | |
| # _GN grammar name (#!name) | |
| # _GP state code prefix (#!prefix) | |
| # _GW whitespace mode (#!whitespace skip|token|line) | |
| # _GR space-separated rule names | |
| # _G1 first (root) rule name | |
| # _GNUM number accumulator rule (#!number) | |
| # _GVALNUM 1 if number validation (#!validate) | |
| # _GSTRICT 1 if strict mode (#!strict — trailing comma / colon) | |
| # _GCMT line-comment start char (#!comment) | |
| # _GPREC_RULE precedence climbing rule (#!precedence <rule> <ops...>) | |
| # _GPREC_N number of operators | |
| # _GKW_RULE identifier rule for keyword checking (#!keywords <ident_rule> <kw...>) | |
| # _GKW_LIST space-separated keyword list (uppercase) | |
| # _GPOST_N number of postfix operators (#!postfix <rule> <open> [<inner> [<close>]]) | |
| # _GEXTERN space-separated extern rules (#!extern <rule>) | |
| # _GSTR_N number of string accumulators (#!string <rule> [<close> [escape]]) | |
| _GN=generated _GP=Gn _GW=token _GR= _G1= _GNUM= _GVALNUM= _GSTRICT= _GCMT= _GCMT_END= _GEXTERN= | |
| _GPREC_RULE= _GPREC_N=0 _GPREC_RULE_CODE= _GPREC_BIN= | |
| _GKW_RULE= _GKW_LIST= _GKW_CASE= | |
| _GPOST_N=0 # postfix operator count | |
| _GUNARY_N=0 # unary prefix operator count | |
| _GTERNARY= _GTERNARY_OPEN= _GTERNARY_SEP= _GTERNARY_PREC= | |
| _GSTR_N=0 # string accumulator count | |
| eval "set -- \$X0"; shift | |
| for _ch in "$@"; do | |
| eval "_t=\"\${X$_ch%% *}\"; _v=\"\${V$_ch:-}\"" | |
| case "$_t" in | |
| Bd) case "$_v" in | |
| name*) _GN="${_v#name }";; | |
| prefix*) _GP="${_v#prefix }";; | |
| whitespace*) _GW="${_v#whitespace }";; | |
| string*) # #!string <rule> [<close> [escape]] | |
| _v="${_v#string }" | |
| _GSTR_N=$((_GSTR_N + 1)) | |
| _str_rule="${_v%% *}"; _v="${_v#"$_str_rule"}"; _v="${_v# }" | |
| eval "_GSTR_RULE_$_GSTR_N=\"\$_str_rule\"" | |
| case "$_v" in | |
| '') # No args: backward compat (close=", escape=\, JSON mode) | |
| eval "_GSTR_CLOSE_$_GSTR_N='\"'" | |
| eval "_GSTR_ESC_$_GSTR_N=json";; | |
| *) _str_close="${_v%% *}"; _v="${_v#"$_str_close"}"; _v="${_v# }" | |
| eval "_GSTR_CLOSE_$_GSTR_N=\"\$_str_close\"" | |
| case "$_v" in | |
| '') eval "_GSTR_ESC_$_GSTR_N=";; | |
| *) eval "_GSTR_ESC_$_GSTR_N=simple";; | |
| esac;; | |
| esac | |
| ;; | |
| number*) _GNUM="${_v#number }";; # number accumulator rule name | |
| validate*) _GVALNUM=1;; # enable number validation | |
| strict*) _GSTRICT=1;; # trailing comma + colon enforcement | |
| comment*) _v="${_v#comment }" | |
| _GCMT="${_v%% *}" | |
| case "$_v" in *" "*) _GCMT_END="${_v#* }";; *) _GCMT_END=;; esac;; | |
| extern*) _GEXTERN="$_GEXTERN ${_v#extern }";; # externally-implemented rules | |
| keywords_case*) _GKW_CASE="${_v#keywords_case }";; | |
| keywords*) # #!keywords <ident_rule> <kw1> <kw2> ... | |
| _v="${_v#keywords }" | |
| _GKW_RULE="${_v%% *}"; _v="${_v#"$_GKW_RULE" }" | |
| _GKW_LIST="$_v";; | |
| precedence*) # #!precedence <rule> <op> <prec> <assoc> [<op> <prec> <assoc> ...] | |
| _v="${_v#precedence }" | |
| _GPREC_RULE="${_v%% *}"; _v="${_v#"$_GPREC_RULE" }" | |
| # Parse operator triples: op prec assoc | |
| while test ${#_v} -gt 0; do | |
| _pop="${_v%% *}"; _v="${_v#"$_pop"}"; _v="${_v# }" | |
| case "$_pop" in '') break;; esac | |
| _pprec="${_v%% *}"; _v="${_v#"$_pprec"}"; _v="${_v# }" | |
| _passoc="${_v%% *}"; _v="${_v#"$_passoc"}"; _v="${_v# }" | |
| _GPREC_N=$((_GPREC_N + 1)) | |
| eval "_GPREC_P_$_GPREC_N=$_pprec" | |
| eval "_GPREC_A_$_GPREC_N=$_passoc" | |
| eval "_GPREC_O_$_GPREC_N=\"\$_pop\"" | |
| done;; | |
| postfix*) # #!postfix <prec-rule> <open> [<inner-rule> [<close>]] | |
| _v="${_v#postfix }" | |
| # Skip <prec-rule> (already stored in _GPREC_RULE) | |
| _v="${_v#"${_v%% *}" }" | |
| _GPOST_N=$((_GPOST_N + 1)) | |
| _post_open="${_v%% *}"; _v="${_v#"$_post_open"}"; _v="${_v# }" | |
| eval "_GPOST_OPEN_$_GPOST_N=\"\$_post_open\"" | |
| _post_inner="${_v%% *}"; _v="${_v#"$_post_inner"}"; _v="${_v# }" | |
| eval "_GPOST_INNER_$_GPOST_N=\"\$_post_inner\"" | |
| eval "_GPOST_CLOSE_$_GPOST_N=\"\$_v\"";; | |
| ternary*) # #!ternary <prec-rule> <open> <sep> <prec> | |
| _v="${_v#ternary }" | |
| _v="${_v#"${_v%% *}" }" # Skip <prec-rule> | |
| _GTERNARY_OPEN="${_v%% *}"; _v="${_v#"$_GTERNARY_OPEN" }" | |
| _GTERNARY_SEP="${_v%% *}"; _v="${_v#"$_GTERNARY_SEP" }" | |
| _GTERNARY_PREC="${_v%% *}" | |
| _GTERNARY=1;; | |
| unary*) # #!unary <prec-rule> <op> <prec> | |
| _v="${_v#unary }" | |
| _v="${_v#"${_v%% *}" }" # Skip <prec-rule> | |
| _GUNARY_N=$((_GUNARY_N + 1)) | |
| _uop="${_v%% *}"; _v="${_v#"$_uop"}"; _v="${_v# }" | |
| _uprec="${_v%% *}" | |
| eval "_GUNARY_O_$_GUNARY_N=\"\$_uop\"" | |
| eval "_GUNARY_P_$_GUNARY_N=\"\$_uprec\"";; | |
| esac;; | |
| Br) _GR="$_GR $_v" | |
| case "$_G1" in '') _G1="$_v";; esac | |
| eval "_RNODE_$_v=$_ch";; | |
| esac | |
| done | |
| # ---- Synthesize unary prefix operator rules ---- | |
| # For each #!unary directive, create a grammar rule: _unary_N = 'OP' <atom> ; | |
| # and prepend it to the atom alternation (the precedence rule's body). | |
| case "$_GUNARY_N" in 0) ;; *) | |
| # Find the atom rule: the precedence rule's body is 'expr = atom ;' | |
| # where atom is a Bi reference. Find the atom rule's name. | |
| eval "_prec_node=\$_RNODE_$_GPREC_RULE" | |
| eval "set -- \$X$_prec_node"; shift # Br children | |
| eval "_body_t=\"\${X$1%% *}\"" | |
| _atom_rule= | |
| case "$_body_t" in | |
| Bi) eval "_atom_rule=\"\${V$1:-}\"";; # expr = atom ; | |
| Bq) eval "set -- \$X$1"; shift # Bq children | |
| eval "_atom_rule=\"\${V$1:-}\"";; # first child | |
| esac | |
| case "$_atom_rule" in ?*) | |
| # Find max node ID for synthesizing new nodes | |
| _max_nid=0 | |
| for _r in $_GR; do | |
| eval "_rn=\$_RNODE_$_r" | |
| case $((_rn > _max_nid)) in 1) _max_nid=$_rn;; esac | |
| done | |
| # Walk all X nodes to find actual max | |
| _nid=0 | |
| while test $_nid -le $((_max_nid + 200)); do | |
| eval "_xt=\"\${X$_nid:-}\"" | |
| case "$_xt" in ?*) case $((_nid > _max_nid)) in 1) _max_nid=$_nid;; esac;; esac | |
| _nid=$((_nid + 1)) | |
| done | |
| _syn_id=$((_max_nid + 1)) | |
| # Find the atom rule's body (should be Ba alternation or single ref) | |
| eval "_atom_node=\$_RNODE_$_atom_rule" | |
| eval "set -- \$X$_atom_node"; shift # Br children | |
| _atom_body=$1 | |
| eval "_atom_body_t=\"\${X$_atom_body%% *}\"" | |
| # For each unary operator, synthesize a rule and add to alternation | |
| _pi=1 | |
| while test $_pi -le $_GUNARY_N; do | |
| eval "_uop=\"\$_GUNARY_O_$_pi\"" | |
| _uname="_unary_$_pi" | |
| # Create nodes: Bt(op), Bi(atom), Bq(Bt Bi), Br(Bq) | |
| _bt_id=$_syn_id; _syn_id=$((_syn_id + 1)) | |
| _bi_id=$_syn_id; _syn_id=$((_syn_id + 1)) | |
| _bq_id=$_syn_id; _syn_id=$((_syn_id + 1)) | |
| _br_id=$_syn_id; _syn_id=$((_syn_id + 1)) | |
| _ref_id=$_syn_id; _syn_id=$((_syn_id + 1)) | |
| eval "V$_bt_id=\"\$_uop\"; X$_bt_id=Bt" | |
| eval "V$_bi_id=\"\$_atom_rule\"; X$_bi_id=Bi" | |
| eval "X$_bq_id=\"Bq $_bt_id $_bi_id\"" | |
| eval "V$_br_id=\"\$_uname\"; X$_br_id=\"Br $_bq_id\"" | |
| eval "V$_ref_id=\"\$_uname\"; X$_ref_id=Bi" | |
| # Register the rule | |
| _GR="$_GR $_uname" | |
| eval "_RNODE_$_uname=$_br_id" | |
| # Add reference to atom alternation | |
| case "$_atom_body_t" in | |
| Ba) # Prepend to existing alternation | |
| eval "X$_atom_body=\"Ba $_ref_id \${X$_atom_body#Ba }\"";; | |
| *) # Single ref: wrap in alternation | |
| _ba_id=$_syn_id; _syn_id=$((_syn_id + 1)) | |
| eval "X$_ba_id=\"Ba $_ref_id $_atom_body\"" | |
| eval "X$_atom_node=\"Br $_ba_id\"" | |
| _atom_body=$_ba_id; _atom_body_t=Ba;; | |
| esac | |
| _pi=$((_pi + 1)) | |
| done;; | |
| esac;; | |
| esac | |
| # ============================================================ | |
| # Phase 1b: Assign 2-char state codes | |
| # ============================================================ | |
| _GST= _GSN=0 | |
| # Assign a document root state code first | |
| _bnf_gen_code "_doc_" | |
| eval "_GDC=\$_RCODE__doc_" # document root code | |
| for _r in $_GR; do _bnf_gen_code "$_r"; done | |
| # Allocate binary operator state for precedence climbing | |
| _GPREC_BIN= | |
| case "$_GPREC_RULE" in ?*) | |
| _bnf_gen_code "_binop_" | |
| eval "_GPREC_BIN=\$_RCODE__binop_" | |
| eval "_GPREC_RULE_CODE=\$_RCODE_$_GPREC_RULE" | |
| ;; esac | |
| # Allocate state codes for postfix operators | |
| _pi=1 | |
| while test $_pi -le $_GPOST_N; do | |
| _bnf_gen_code "_post_$_pi" | |
| eval "_GPOST_STATE_$_pi=\$_RCODE__post_$_pi" | |
| eval "_post_close=\"\${_GPOST_CLOSE_$_pi:-}\"" | |
| case "$_post_close" in ?*) | |
| # Bracket postfix: allocate continuation state for close token | |
| _bnf_gen_code "_postc_$_pi" | |
| eval "_GPOST_CONT_$_pi=\$_RCODE__postc_$_pi";; | |
| esac | |
| _pi=$((_pi + 1)) | |
| done | |
| # Allocate state codes for ternary operator | |
| _GTERNARY_STATE= _GTERNARY_COLON= _GTERNARY_DONE= | |
| case "$_GTERNARY" in 1) | |
| _bnf_gen_code "_ternary_" | |
| eval "_GTERNARY_STATE=\$_RCODE__ternary_" | |
| _bnf_gen_code "_terncol_" | |
| eval "_GTERNARY_COLON=\$_RCODE__terncol_" | |
| _bnf_gen_code "_terndone_" | |
| eval "_GTERNARY_DONE=\$_RCODE__terndone_" | |
| ;; esac | |
| # Seed FIRST sets and build skip list for accumulator rules (fixed FIRST sets) | |
| _GSTR_SKIP_LIST= | |
| _si=1 | |
| while test $_si -le $_GSTR_N; do | |
| eval "_str_rule=\"\$_GSTR_RULE_$_si\"" | |
| eval "_str_close=\"\$_GSTR_CLOSE_$_si\"" | |
| eval "_FIRST_$_str_rule=\"\$_str_close\"" | |
| _GSTR_SKIP_LIST="$_GSTR_SKIP_LIST|$_str_rule" | |
| _si=$((_si + 1)) | |
| done | |
| case "$_GNUM" in ?*) eval "_FIRST_$_GNUM='0123456789-'";; esac | |
| _changed=1 _pass=0 | |
| while test $_changed -eq 1; do | |
| _changed=0; _pass=$((_pass + 1)) | |
| for _r in $_GR; do | |
| # Skip accumulator rules (FIRST is fixed) | |
| case "$_GNUM" in "$_r") continue;; esac | |
| case "$_GSTR_SKIP_LIST" in *"|$_r"*) continue;; esac | |
| eval "_node=\$_RNODE_$_r" | |
| eval "set -- \$X$_node"; shift | |
| _bnf_gen_first_node "$1" | |
| eval "_old=\"\${_FIRST_$_r:-}\"" | |
| case "$_old" in "$REPLY") ;; *) | |
| eval "_FIRST_$_r=\$REPLY"; _changed=1;; esac | |
| done | |
| case $_pass in 20) break;; esac | |
| done | |
| } # end _bnf_gen_p1 | |
| # ============================================================ | |
| # Phase 2: Dispatch Table Construction | |
| # ============================================================ | |
| # -- Dispatch entry primitives -- | |
| # Add dispatch entry: char, state, action | |
| _bnf_gen_da () { | |
| case "$1" in | |
| '['*']') # Class pattern (has matching ]): store separately | |
| _DA_CLASS="$_DA_CLASS $1=$2=$3" | |
| return;; | |
| esac | |
| local _cc | |
| _cg_char_code "$1"; _cc=$REPLY | |
| # Deduplicate: skip if this (state, action) pair already exists | |
| # Deduplicate by (char, state, action) — exact match only | |
| eval "case \"\${_DA_$_cc:-}\" in *\" \$2=\$3\"*) return;; esac" | |
| eval "_DA_$_cc=\"\${_DA_$_cc:-} \$2=\$3\"" | |
| # Track unique chars via code-based list (avoids glob issues with [ etc.) | |
| eval "case \"\${_DA_SEEN_$_cc:-}\" in '') _DA_CODES=\"\$_DA_CODES \$_cc\"; _DA_SEEN_$_cc=1; _DA_RAW_$_cc=\$1;; esac" | |
| # For keyword actions: also add lowercase first-char entry | |
| case "$3" in SKIP_KW_CONT=*|SKIP_KW_CLOSE=*|OPEN_KW=*) | |
| case "$1" in [A-Z]) | |
| _lcase "$1" | |
| case "$REPLY" in ?*) _bnf_gen_da "$REPLY" "$2" "$3";; esac;; | |
| esac;; | |
| esac | |
| } | |
| # Append a state to _GALT (pipe-separated, dedup) | |
| _bnf_gen_galt_add () { | |
| case "$_GALT" in *"$1"*) return;; esac | |
| case "$_GALT" in '') _GALT="$1";; *) _GALT="$_GALT|$1";; esac | |
| } | |
| # Allocate a done state, add to _GALT and _GDONE. Sets _done_state. | |
| _bnf_gen_alloc_done () { | |
| _bnf_gen_code "$1" | |
| eval "_done_state=\$_RCODE_$1" | |
| _bnf_gen_galt_add "$_done_state" | |
| _GDONE="$_GDONE $_done_state" | |
| } | |
| # Build expected-token strings per state and propagate inherited entries. | |
| # Helper: add a readable token name to a state's _EXP_ string (dedup) | |
| _bnf_gen_exp_add () { | |
| eval "_cur=\"\${_EXP_$1:-}\"" | |
| case "$_cur" in *"$2"*) return;; esac | |
| case "$_cur" in '') eval "_EXP_$1=\"\$2\"";; *) eval "_EXP_$1=\"\$_cur or \$2\"";; esac | |
| } | |
| # Copy all dispatch entries from one state to another | |
| _bnf_gen_copy_dispatch () { | |
| local _src="$1" _dst="$2" _cc _c _entries _entry _st _clentry _clrest _clst _wentry _wst | |
| for _cc in $_DA_CODES; do | |
| eval "_c=\"\$_DA_RAW_$_cc\"" | |
| eval "_entries=\"\${_DA_$_cc:-}\"" | |
| for _entry in $_entries; do | |
| _st="${_entry%%=*}" | |
| case "$_st" in "$_src") | |
| _bnf_gen_da "$_c" "$_dst" "${_entry#*=}";; esac | |
| done | |
| done | |
| for _clentry in $_DA_CLASS; do | |
| _clrest="${_clentry#*=}" | |
| _clst="${_clrest%%=*}" | |
| case "$_clst" in "$_src") | |
| _DA_CLASS="$_DA_CLASS ${_clentry%%=*}=$_dst=${_clrest#*=}";; esac | |
| done | |
| for _wentry in $_DA_WILD; do | |
| _wst="${_wentry%%=*}" | |
| case "$_wst" in "$_src") | |
| _DA_WILD="$_DA_WILD $_dst=${_wentry#*=}";; esac | |
| done | |
| } | |
| # -- Rule walking -- | |
| # Get the first terminal of a sequence rule (for OPEN actions) | |
| # $1=rule_name. Sets REPLY to the terminal value, or empty if not a terminal-start seq. | |
| _bnf_gen_seq_opening () { | |
| local _rname="$1" _rnode _bt _first _ft _fv | |
| eval "_rnode=\$_RNODE_$_rname" | |
| eval "set -- \$X$_rnode"; shift; _first=$1 | |
| eval "_bt=\"\${X$_first%% *}\"" | |
| case "$_bt" in | |
| Bq) eval "set -- \$X$_first"; shift; _first=$1 | |
| eval "_ft=\"\${X$_first%% *}\"; _fv=\"\${V$_first:-}\"" | |
| case "$_ft" in Bt) REPLY="$_fv"; return;; esac;; | |
| esac | |
| REPLY= | |
| } | |
| # Add FIRST chars from a set to dispatch. Handles both individual chars and [class] patterns. | |
| _bnf_gen_add_first_entries () { | |
| local _rcode="$1" _fc="$2" _action="$3" _c _afe_ref _afe_suffix _rest _cls | |
| # Auto-upgrade OPEN/OPEN_CONT to OPEN_KWSKIP/OPEN_KWSKIP_CONT for keyword-starting sequences | |
| case "$_GKW_LIST" in ?*) | |
| _afe_ref=; _afe_suffix= | |
| case "$_action" in | |
| OPEN=*) _afe_ref="${_action#OPEN=}";; | |
| OPEN_CONT=*) _rest="${_action#OPEN_CONT=}" | |
| _afe_ref="${_rest%%=*}"; _afe_suffix="=${_rest#*=}";; | |
| esac | |
| case "$_afe_ref" in ?*) | |
| _bnf_gen_code_to_name "$_afe_ref" | |
| case "$REPLY" in ?*) | |
| _bnf_gen_seq_opening "$REPLY" | |
| case ${#REPLY} in 1) ;; *) | |
| case "$_GKW_LIST" in *"$REPLY"*) | |
| case "$_afe_suffix" in | |
| '') _action="OPEN_KWSKIP=$_afe_ref=$REPLY";; | |
| *) _action="OPEN_KWSKIP_CONT=$_afe_ref=$REPLY$_afe_suffix";; | |
| esac;; esac;; | |
| esac;; | |
| esac;; | |
| esac;; | |
| esac | |
| # For keyword actions: also add lowercase first-char entries for case-insensitive match | |
| case "$_action" in OPEN_KWSKIP=*|OPEN_KWSKIP_CONT=*|SKIP_KW_CONT=*|SKIP_KW_CLOSE=*|OPEN_KW=*) | |
| case "$_fc" in | |
| [A-Z]) _lcase "$_fc" | |
| case "$REPLY" in ?*) _fc="$_fc$REPLY";; esac;; | |
| esac;; | |
| esac | |
| # Parse: extract [..] as class entries, rest as individual char entries. | |
| while test ${#_fc} -gt 0; do | |
| _c="${_fc%"${_fc#?}"}"; _fc="${_fc#?}" | |
| case "$_c" in | |
| '[') case "$_fc" in *']'*) | |
| _cls="[${_fc%%]*}]" | |
| _fc="${_fc#*]}" | |
| _bnf_gen_da "$_cls" "$_rcode" "$_action";; | |
| *) _bnf_gen_da "[" "$_rcode" "$_action";; | |
| esac;; | |
| W) _DA_WILD="$_DA_WILD $_rcode=$_action";; | |
| *) _bnf_gen_da "$_c" "$_rcode" "$_action";; | |
| esac | |
| done | |
| } | |
| # Core ref-push: resolve refcode, check seq_opening, add FIRST entries. | |
| # $1=rcode $2=refname $3=open action prefix $4=push action prefix $5=suffix (optional) | |
| _bnf_gen_ref_push_core () { | |
| local _refcode _fc | |
| eval "_refcode=\"\${_RCODE_$2:-}\"" | |
| case "$_refcode" in '') return;; esac | |
| _bnf_gen_seq_opening "$2" | |
| eval "_fc=\"\${_FIRST_$2:-}\"" | |
| case "$REPLY" in | |
| ?*) _bnf_gen_add_first_entries "$1" "$_fc" "$3=$_refcode${5:+=$5}";; | |
| *) _bnf_gen_add_first_entries "$1" "$_fc" "$4=$_refcode${5:+=$5}";; | |
| esac | |
| } | |
| # Add PUSH entries for FIRST chars of a non-terminal reference in own state | |
| _bnf_gen_add_ref_push () { | |
| local _rcode="$1" _refname="$2" _refcode | |
| eval "_refcode=\"\${_RCODE_$_refname:-}\"" | |
| case "$_refcode" in '') return;; esac | |
| _bnf_gen_seq_opening "$_refname" | |
| case "$REPLY" in | |
| ?*) # Seq starting with terminal: OPEN (create + skip entry) | |
| eval "_fc=\"\${_FIRST_$_refname:-}\"" | |
| _bnf_gen_add_first_entries "$_rcode" "$_fc" "OPEN=$_refcode";; | |
| *) # Check if the referenced rule is an alternation (expand inline) | |
| eval "_rn2=\"\${_RNODE_$_refname:-}\"" | |
| case "$_rn2" in ?*) | |
| eval "set -- \$X$_rn2"; shift; local _body2=$1 | |
| eval "_bt2=\"\${X$_body2%% *}\"" | |
| case "$_bt2" in Ba) | |
| eval "set -- \$X$_body2"; shift | |
| for _alt_ch in "$@"; do | |
| eval "_alt_t=\"\${X$_alt_ch%% *}\"" | |
| case "$_alt_t" in | |
| Bi) eval "_alt_v=\"\${V$_alt_ch:-}\"" | |
| _bnf_gen_add_ref_push "$_rcode" "$_alt_v";; | |
| Bt) eval "_alt_v=\"\${V$_alt_ch:-}\"" | |
| local _alt_c="${_alt_v%"${_alt_v#?}"}" | |
| case ${#_alt_v} in | |
| 1) _bnf_gen_da "$_alt_c" "$_rcode" "OPEN_SKIP_CLOSE=$_refcode";; | |
| *) _bnf_gen_da "$_alt_c" "$_rcode" "OPEN_KW=$_refcode=$_alt_v";; | |
| esac;; | |
| esac | |
| done | |
| return;; | |
| esac;; | |
| esac | |
| eval "_fc=\"\${_FIRST_$_refname:-}\"" | |
| _bnf_gen_add_first_entries "$_rcode" "$_fc" "PUSH=$_refcode";; | |
| esac | |
| } | |
| # Add ref push with continuation: set STATE=next before pushing child. | |
| # With continuation states, the child rule handles its own internal dispatch. | |
| # The parent just needs to PUSH (or OPEN for seq-with-terminal-entry). | |
| _bnf_gen_add_ref_push_cont () { | |
| _bnf_gen_ref_push_core "$1" "$2" "OPEN_CONT" "PUSH_CONT" "$3" | |
| } | |
| # Alternation: dispatch each alternative from this rule's OWN state | |
| _bnf_gen_walk_alt () { | |
| local _rname="$1" _rcode="$2" _anode="$3" _ch _ct _cv _c _refcode _fc _done_state | |
| # Allocate a "done" state — after one alternative completes, close_col | |
| _bnf_gen_alloc_done "_adone_${_rcode}" | |
| eval "set -- \$X$_anode"; shift | |
| for _ch in "$@"; do | |
| eval "_ct=\"\${X$_ch%% *}\"; _cv=\"\${V$_ch:-}\"" | |
| case "$_ct" in | |
| Bi) # Non-terminal reference: push child with done continuation | |
| _bnf_gen_add_ref_push_cont "$_rcode" "$_cv" "$_done_state";; | |
| Bt) # Terminal keyword or single-char | |
| _c="${_cv%"${_cv#?}"}" | |
| case ${#_cv} in | |
| 1) _bnf_gen_da "$_c" "$_rcode" "OPEN_SKIP_CLOSE_CONT=$_rcode=$_done_state";; | |
| *) _bnf_gen_da "$_c" "$_rcode" "OPEN_KW=$_rcode=$_cv=$_done_state";; | |
| esac;; | |
| Bq) # Sequence branch in alternation | |
| _bnf_gen_walk_seq_body "$_rcode" "$_ch";; | |
| esac | |
| done | |
| } | |
| # Walk into Bo (optional) or Bm (repetition) nodes and add entries | |
| _bnf_gen_walk_inner () { | |
| local _rcode="$1" _node="$2" _t _ch _ct _cv _c | |
| eval "_t=\"\${X$_node%% *}\"" | |
| case "$_t" in | |
| Bi) # Direct reference | |
| eval "_cv=\"\${V$_node:-}\"" | |
| _bnf_gen_add_ref_push "$_rcode" "$_cv";; | |
| Bq) # Inner sequence (e.g., ',' member inside Bm) | |
| eval "set -- \$X$_node"; shift | |
| local _iq_nsteps=$# | |
| # Use two-state approach only when keywords are active (prevents | |
| # item FIRST chars from overlapping with keyword-terminated closes) | |
| case "$_GKW_LIST" in ?*) | |
| local _iq_i=0 _iq_cur="$_rcode" _iq_next _iq_rcode="$_rcode" | |
| for _ch in "$@"; do | |
| _iq_i=$((_iq_i + 1)) | |
| eval "_ct=\"\${X$_ch%% *}\"; _cv=\"\${V$_ch:-}\"" | |
| case "$_ct" in | |
| Bt) # Separator/keyword terminal | |
| _c="${_cv%"${_cv#?}"}" | |
| _bnf_gen_is_kw "$_cv" | |
| if test $_iq_i -lt $_iq_nsteps; then | |
| _bnf_gen_code "_iq${_rcode}_$_iq_i" | |
| eval "_iq_next=\$_RCODE__iq${_rcode}_$_iq_i" | |
| else | |
| _iq_next="$_rcode" | |
| fi | |
| case "$REPLY" in 1) | |
| _bnf_gen_da "$_c" "$_iq_cur" "SKIP_KW_CONT=$_cv=$_iq_next";; | |
| *) _bnf_gen_da "$_c" "$_iq_cur" "SKIP_CONT=$_iq_next";; | |
| esac | |
| _iq_cur="$_iq_next";; | |
| Bi) # Content reference | |
| eval "_cv=\"\${V$_ch:-}\"" | |
| if test $_iq_i -lt $_iq_nsteps; then | |
| _bnf_gen_code "_iq${_rcode}_$_iq_i" | |
| eval "_iq_next=\$_RCODE__iq${_rcode}_$_iq_i" | |
| _bnf_gen_add_ref_push_cont "$_iq_cur" "$_cv" "$_iq_next" | |
| _iq_cur="$_iq_next" | |
| else | |
| _bnf_gen_add_ref_push_cont "$_iq_cur" "$_cv" "$_rcode" | |
| fi;; | |
| Bm|Bo|Be) | |
| # Record inheritance for this Bo/Bm step. The inherited entries | |
| # allow the parser to skip the optional and match what follows. | |
| # We record BEFORE recursion because ksh93 corrupts all locals | |
| # during recursive _bnf_gen_walk_inner calls (nested POSIX function scoping). | |
| _DA_INHERIT="$_DA_INHERIT $_iq_cur=$_iq_rcode" | |
| _bnf_gen_walk_inner "$_iq_cur" "$_ch";; | |
| esac | |
| done;; | |
| *) # No keywords: use original flat approach (all steps in shared state) | |
| for _ch in "$@"; do | |
| eval "_ct=\"\${X$_ch%% *}\"; _cv=\"\${V$_ch:-}\"" | |
| case "$_ct" in | |
| Bt) _c="${_cv%"${_cv#?}"}" | |
| _bnf_gen_is_kw "$_cv" | |
| case "$REPLY" in 1) | |
| _bnf_gen_da "$_c" "$_rcode" "SKIP_KW_CONT=$_cv=$_rcode";; | |
| *) case "$_GSTRICT" in 1) | |
| case "$_c" in ',') _bnf_gen_da "$_c" "$_rcode" "SKIP_COMMA";; | |
| *) _bnf_gen_da "$_c" "$_rcode" "SKIP";; esac;; | |
| *) _bnf_gen_da "$_c" "$_rcode" "SKIP";; esac;; | |
| esac;; | |
| Bi) eval "_cv=\"\${V$_ch:-}\"" | |
| _bnf_gen_add_ref_push "$_rcode" "$_cv";; | |
| Bm|Bo|Be) | |
| _bnf_gen_walk_inner "$_rcode" "$_ch";; | |
| esac | |
| done;; | |
| esac;; | |
| Bo|Bm|Be) # Optional/repetition/group: recurse into inner | |
| eval "set -- \$X$_node"; shift | |
| _bnf_gen_walk_inner "$_rcode" "$1";; | |
| Ba) # Alternation inside optional/repetition | |
| eval "set -- \$X$_node"; shift | |
| for _ch in "$@"; do | |
| _bnf_gen_walk_inner "$_rcode" "$_ch" | |
| done;; | |
| Bt) # Terminal inside optional/repetition | |
| eval "_cv=\"\${V$_node:-}\"" | |
| _c="${_cv%"${_cv#?}"}" | |
| _bnf_gen_is_kw "$_cv" | |
| case "$REPLY" in 1) | |
| _bnf_gen_da "$_c" "$_rcode" "SKIP_KW_CONT=$_cv=$_rcode";; | |
| *) _bnf_gen_da "$_c" "$_rcode" "SKIP";; | |
| esac;; | |
| esac | |
| } | |
| # Walk sequence body with CONTINUATION STATES. | |
| # Each step after step 1 gets its own state code. This eliminates | |
| # FIRST-set conflicts between different positions in a sequence. | |
| _bnf_gen_walk_seq_body () { | |
| local _rcode="$1" _snode="$2" _ch _ct _cv _c _fc _refcode _refname | |
| local _nsteps _i _first_is_term=0 _cont_states _shared _all_opt _j | |
| local _cur_state="$_rcode" _next_state _prev_ch _prev_t | |
| eval "set -- \$X$_snode"; shift; _nsteps=$# | |
| # Check if step 1 is a terminal (entry step handled by parent OPEN) | |
| eval "_ct=\"\${X$1%% *}\"" | |
| case "$_ct" in Bt) _first_is_term=1;; esac | |
| # Pre-allocate continuation state codes. | |
| # If step 1 is terminal (handled by parent OPEN), step 2 uses the primary state. | |
| case $_first_is_term in | |
| 1) # Step 1 is terminal: primary state = step 2, continuations from step 3 | |
| _cont_states="SKIP $_rcode" # SKIP for step 1, primary for step 2 | |
| _i=2;; | |
| *) # Step 1 is non-terminal: primary state = step 1, continuations from step 2 | |
| _cont_states="$_rcode" | |
| _i=1;; | |
| esac | |
| # Allocate continuation states for remaining steps. | |
| # BUT: if a step is Bo/Bm (nullable), share the state with its successor. | |
| eval "set -- \$X$_snode"; shift | |
| while test $_i -lt $_nsteps; do | |
| _i=$((_i + 1)) | |
| # Check if the PREVIOUS step (at position _i-1) was nullable (Bo/Bm) | |
| eval "_prev_ch=\${$((_i - 1))}" | |
| eval "_prev_t=\"\${X$_prev_ch%% *}\"" | |
| case "$_prev_t" in Bo|Bm) | |
| # Previous step was nullable: share its state with this step | |
| _shared="${_cont_states##* }" | |
| _cont_states="$_cont_states $_shared";; | |
| *) | |
| _bnf_gen_code "_c${_rcode}_$_i" | |
| eval "_next_state=\$_RCODE__c${_rcode}_$_i" | |
| _cont_states="$_cont_states $_next_state";; | |
| esac | |
| done | |
| eval "set -- \$X$_snode"; shift # restore positional params | |
| # Add continuation states to _GALT when all remaining steps are optional (Bo) | |
| _i=0 | |
| for _ch in "$@"; do | |
| _i=$((_i + 1)) | |
| eval "_ct=\"\${X$_ch%% *}\"" | |
| # Check if all steps from _i to end are optional | |
| _all_opt=1; _j=$_i | |
| while test $_j -le $_nsteps; do | |
| eval "_jch=\${$_j}"; eval "_jt=\"\${X$_jch%% *}\"" | |
| case "$_jt" in Bo|Bm) ;; *) _all_opt=0; break;; esac | |
| _j=$((_j + 1)) | |
| done | |
| case $_all_opt in 1) | |
| # This state has only optional content remaining — add to _GALT | |
| set -- $_cont_states | |
| eval "_st=\${$_i}" | |
| case "$_st" in SKIP) ;; *) | |
| _bnf_gen_galt_add "$_st";; | |
| esac;; | |
| esac | |
| eval "set -- \$X$_snode"; shift # restore | |
| done | |
| # Process each step | |
| _i=0 | |
| for _ch in "$@"; do | |
| _i=$((_i + 1)) | |
| eval "_ct=\"\${X$_ch%% *}\"; _cv=\"\${V$_ch:-}\"" | |
| # Determine current and next state from pre-allocated list | |
| set -- $_cont_states | |
| eval "_cur_state=\${$_i}" | |
| # Skip step 1 if terminal (handled by parent OPEN) | |
| case "$_cur_state" in SKIP) continue;; esac | |
| if test $_i -lt $_nsteps; then | |
| eval "_next_state=\${$((_i + 1))}" | |
| else | |
| _next_state= | |
| fi | |
| case "$_ct" in | |
| Bt) _c="${_cv%"${_cv#?}"}" | |
| _bnf_gen_is_kw "$_cv" | |
| case "$REPLY" in 1) | |
| # Keyword terminal in sequence: skip whole word and transition | |
| case "$_next_state" in ?*) | |
| _bnf_gen_da "$_c" "$_cur_state" "SKIP_KW_CONT=$_cv=$_next_state";; | |
| *) _bnf_gen_da "$_c" "$_cur_state" "SKIP_KW_CLOSE=$_cv";; | |
| esac;; | |
| *) | |
| case $_i in | |
| 1) # Entry terminal: skip if handled by parent OPEN | |
| case $_first_is_term in 1) ;; *) | |
| _bnf_gen_da "$_c" "$_cur_state" "SKIP";; esac | |
| ;; | |
| $_nsteps) _bnf_gen_da "$_c" "$_cur_state" "SKIP_CLOSE";; | |
| *) case "$_next_state" in ?*) | |
| _bnf_gen_da "$_c" "$_cur_state" "SKIP_CONT=$_next_state";; | |
| *) _bnf_gen_da "$_c" "$_cur_state" "SKIP";; | |
| esac;; | |
| esac;; | |
| esac;; | |
| Bi) # Non-terminal reference | |
| eval "_refname=\"\${V$_ch:-}\"" | |
| case "$_next_state" in ?*) | |
| _bnf_gen_add_ref_push_cont "$_cur_state" "$_refname" "$_next_state";; | |
| *) # Last step is non-terminal: allocate a "done" state | |
| # that has no entries and auto-closes immediately | |
| _bnf_gen_alloc_done "_done_${_rcode}" | |
| # If sequence has leading terminal, mark done state as no-collapse | |
| # (the node must be preserved so the emitter can reconstruct the terminal) | |
| case "$_first_is_term" in 1) | |
| _GNOCOL="$_GNOCOL $_done_state";; esac | |
| _bnf_gen_add_ref_push_cont "$_cur_state" "$_refname" "$_done_state";; | |
| esac;; | |
| Bo|Bm) # Optional/Repetition: add entries for inner content | |
| eval "set -- \$X$_ch"; shift | |
| # For Bo (optional) in keyword grammars: pass "push" so the first | |
| # keyword terminal saves STATE, enabling proper scope return. | |
| # Only do this when there are MORE optional steps after this one — | |
| # if the next step is a required terminal (like 'END'), scope is harmful | |
| # because the keyword might share first chars with the terminal. | |
| _bnf_gen_walk_inner "$_cur_state" "$1" | |
| # Optional/repetition may be empty — peek at the NEXT step | |
| # and add its entries to the current state too (for the "skip" case). | |
| # This allows e.g. '}' to match in both the optional-content state | |
| # and the close-delimiter state. | |
| ;; | |
| Bc) # Bare character class (no quantifier) | |
| eval "_ccv=\"\${V$_ch:-}\"" | |
| eval "_ACCUM_$_cur_state=\"1:\$_ccv\"";; | |
| Bp|Bs|Bk) # Quantifier wrapping Bc | |
| eval "set -- \$X$_ch"; shift | |
| eval "_inner_t=\"\${X$1%% *}\"" | |
| case "$_inner_t" in Bc) | |
| eval "_ccv=\"\${V$1:-}\"" | |
| eval "_ACCUM_$_cur_state=\"$_ct:\$_ccv\"";; | |
| esac;; | |
| esac | |
| done | |
| } | |
| # Single reference rule: push the referenced rule with done continuation | |
| _bnf_gen_walk_single_ref () { | |
| local _rname="$1" _rcode="$2" _bnode="$3" _cv _refcode _fc | |
| eval "_cv=\"\${V$_bnode:-}\"" | |
| eval "_refcode=\"\${_RCODE_$_cv:-}\"" | |
| # Skip done state for the precedence rule — it has its own close mechanism | |
| case "$_GPREC_RULE_CODE" in "$_rcode") | |
| _bnf_gen_ref_push_core "$_rcode" "$_cv" "OPEN" "PUSH" | |
| return;; | |
| esac | |
| # Allocate a "done" state so the rule closes after child completes | |
| _bnf_gen_alloc_done "_done_${_rcode}" | |
| # Use _bnf_gen_add_ref_push_cont to inline alternations (avoids extra intermediate nodes) | |
| _bnf_gen_add_ref_push_cont "$_rcode" "$_cv" "$_done_state" | |
| } | |
| # --- Walk rules to populate dispatch table --- | |
| # KEY PRINCIPLE: each rule adds entries to ITS OWN state only. | |
| # Parent-child linking happens because: | |
| # - Alternation adds entries for its alternatives' first chars in its own state | |
| # - Sequence adds entries for its steps' first chars in its own state | |
| # - Single-ref adds entries for the referenced rule's first chars in its own state | |
| _bnf_gen_walk () { | |
| local _rname _rcode _rnode _body _bt | |
| for _rname in $_GR; do | |
| # Skip extern rules — they are implemented externally | |
| case "$_GEXTERN" in *" $_rname"*) continue;; esac | |
| eval "_rcode=\$_RCODE_$_rname; _rnode=\$_RNODE_$_rname" | |
| eval "set -- \$X$_rnode"; shift | |
| _body=$1 | |
| eval "_bt=\"\${X$_body%% *}\"" | |
| case "$_bt" in | |
| Ba) _bnf_gen_walk_alt "$_rname" "$_rcode" "$_body";; | |
| Bq) _bnf_gen_walk_seq_body "$_rcode" "$_body";; | |
| Bt) ;; # No own-state entries needed (parent handles via OPEN) | |
| Bi) _bnf_gen_walk_single_ref "$_rname" "$_rcode" "$_body";; | |
| Bm|Bo) # Repetition or optional as rule body — walk inner content | |
| eval "set -- \$X$_body"; shift | |
| _bnf_gen_walk_inner "$_rcode" "$1";; | |
| Bc|Bp|Bs|Bk) # Character class or quantifier-wrapped class as rule body | |
| # This is an accumulator rule (e.g., word = [a-z]+ or number = [0-9]+) | |
| # Extract the Bc node (may be wrapped in quantifier) | |
| local _ccnode="$_body" _cctype="$_bt" | |
| case "$_bt" in Bp|Bs|Bk) | |
| eval "set -- \$X$_body"; shift; _ccnode=$1 | |
| eval "_cctype=\"\${X$_ccnode%% *}\"";; | |
| esac | |
| case "$_cctype" in Bc) | |
| eval "_ccv=\"\${V$_ccnode:-}\"" | |
| eval "_ACCUM_$_rcode=\"$_bt:\$_ccv\"";; | |
| esac;; | |
| esac | |
| done | |
| } | |
| # -- Phase 2 sub-phases -- | |
| # Handle string accumulators: add close-char and escape entries, pre-compute glob vars. | |
| _bnf_gen_p2_str_accum () { | |
| local _si=1 _str_rule _str_code _str_close _str_esc | |
| local _sfp_glob _sfp_embed _sgesc _sgtmp _sgc | |
| while test $_si -le $_GSTR_N; do | |
| eval "_str_rule=\"\$_GSTR_RULE_$_si\"" | |
| eval "_str_code=\$_RCODE_$_str_rule" | |
| eval "_str_close=\"\$_GSTR_CLOSE_$_si\"" | |
| eval "_str_esc=\"\${_GSTR_ESC_$_si:-}\"" | |
| eval "_GSTR_CODE_$_si=\$_str_code" | |
| _bnf_gen_da "$_str_close" "$_str_code" "ACCUM_STR_CLOSE" | |
| case "$_str_esc" in | |
| json) _bnf_gen_da '\' "$_str_code" "ACCUM_STR_ESC";; | |
| simple) _bnf_gen_da '\' "$_str_code" "ACCUM_STR_ESC_SIMPLE";; | |
| esac | |
| _sfp_glob= | |
| case "$_str_esc" in json|simple) _sfp_glob="\\";; esac | |
| _sfp_glob="${_sfp_glob}$_str_close" | |
| eval "_sfp_embed=\"\${_GSTR_EMBED_STOP_$_si:-}\"" | |
| _sfp_glob="${_sfp_glob}$_sfp_embed" | |
| _sgesc= _sgtmp="$_sfp_glob" | |
| while test ${#_sgtmp} -gt 0; do | |
| _sgc="${_sgtmp%"${_sgtmp#?}"}"; _sgtmp="${_sgtmp#?}" | |
| case "$_sgc" in | |
| "'") _sgesc="${_sgesc}'\"'\"'";; | |
| '\') _sgesc="${_sgesc}\\\\";; | |
| *) _sgesc="$_sgesc$_sgc";; | |
| esac | |
| done | |
| eval "_STR_GLOBVAR_$_si=\"_${_GN}_sg_${_str_code}='[\$_sgesc]*'\"" | |
| _si=$((_si + 1)) | |
| done | |
| } | |
| # Detect multi-char close sequences for char-class accumulators. | |
| # For each sequence rule, if a Bi ref to an accumulator is followed by >=2 | |
| # consecutive terminals, store the close sequence in _CLOSESEQ_<code>. | |
| _bnf_gen_p2_closeseq () { | |
| local _rname _rcode _rnode _body _bt _nsteps _i _ch _ct | |
| local _refname _ref_rc _ref_accum _closeseq _j _nch _nt _nv | |
| for _rname in $_GR; do | |
| eval "_rcode=\$_RCODE_$_rname; _rnode=\$_RNODE_$_rname" | |
| eval "set -- \$X$_rnode"; shift; _body=$1 | |
| eval "_bt=\"\${X$_body%% *}\"" | |
| case "$_bt" in Bq) ;; *) continue;; esac | |
| eval "set -- \$X$_body"; shift; _nsteps=$# | |
| _i=0 | |
| for _ch in "$@"; do | |
| _i=$((_i + 1)) | |
| eval "_ct=\"\${X$_ch%% *}\"" | |
| case "$_ct" in Bi) | |
| eval "_refname=\"\${V$_ch:-}\"" | |
| eval "_ref_rc=\"\${_RCODE_$_refname:-}\"" | |
| eval "_ref_accum=\"\${_ACCUM_$_ref_rc:-}\"" | |
| case "$_ref_accum" in ?*) | |
| _closeseq= _j=$((_i + 1)) | |
| while test $_j -le $_nsteps; do | |
| eval "_nch=\${$_j}"; eval "_nt=\"\${X$_nch%% *}\"" | |
| case "$_nt" in Bt) | |
| eval "_nv=\"\${V$_nch:-}\"" | |
| case "$_GKW_LIST" in *"$_nv"*) break;; esac | |
| _closeseq="$_closeseq$_nv"; _j=$((_j + 1));; | |
| *) break;; esac | |
| done | |
| case "$_closeseq" in ??*) | |
| eval "_CLOSESEQ_$_ref_rc=\"\$_closeseq\"";; | |
| esac | |
| ;; esac | |
| ;; esac | |
| done | |
| done | |
| } | |
| # Track which states need auto-close (cascade close after child completes). | |
| _bnf_gen_p2_autoclose () { | |
| local _r _node _rcode _bt _last_ch _last_t _seq_nsteps _last_cont | |
| IFS=' ' | |
| for _r in $_GR; do | |
| case "$_GEXTERN" in *" $_r"*) continue;; esac | |
| eval "_node=\$_RNODE_$_r; _rcode=\$_RCODE_$_r" | |
| case "$_rcode" in "$_GDC") continue;; esac | |
| eval "set -- \$X$_node"; shift | |
| eval "_bt=\"\${X$1%% *}\"" | |
| case "$_bt" in | |
| Ba|Bi|Bm|Bo) | |
| case "$_GPREC_RULE_CODE" in "$_rcode") ;; *) | |
| _bnf_gen_galt_add "$_rcode";; | |
| esac;; | |
| Bq) IFS=' ' | |
| eval "set -- \$X$1"; shift | |
| _last_ch="$#"; eval "_last_ch=\${$_last_ch}" | |
| eval "_last_t=\"\${X$_last_ch%% *}\"" | |
| _seq_nsteps=$# | |
| case "$_last_t" in Bt) ;; | |
| *) if test $_seq_nsteps -gt 1; then | |
| eval "_last_cont=\"\${_RCODE__c${_rcode}_$_seq_nsteps:-$_rcode}\"" | |
| else | |
| _last_cont="$_rcode" | |
| fi | |
| _bnf_gen_galt_add "$_last_cont";; | |
| esac;; | |
| esac | |
| done | |
| } | |
| # Generate root dispatch + keyword lowercase copy + binary op dispatch copy. | |
| _bnf_gen_p2_root_dispatch () { | |
| local _G1C _fc _clentry _clrest _clst _clpat _g1node _g1bt | |
| local _cc _c _entries _lc _entry _st | |
| eval "_G1C=\$_RCODE_$_G1" | |
| case "$_GPREC_RULE_CODE" in "$_G1C") | |
| eval "_fc=\"\${_FIRST_$_G1:-}\"" | |
| _bnf_gen_add_first_entries "$_GDC" "$_fc" "PUSH_CONT=$_G1C=$_GDC" | |
| for _clentry in $_DA_CLASS; do | |
| _clrest="${_clentry#*=}" | |
| _clst="${_clrest%%=*}" | |
| case "$_clst" in "$_G1C") | |
| _clpat="${_clentry%%=*}" | |
| _DA_CLASS="$_DA_CLASS $_clpat=$_GDC=PUSH_CONT=$_G1C=$_GDC";; | |
| esac | |
| done | |
| ;; *) | |
| eval "_g1node=\$_RNODE_$_G1" | |
| eval "set -- \$X$_g1node"; shift | |
| eval "_g1bt=\"\${X$1%% *}\"" | |
| case "$_g1bt" in Bm|Bo) | |
| _bnf_gen_copy_dispatch "$_G1C" "$_GDC";; | |
| *) | |
| _bnf_gen_add_ref_push_cont "$_GDC" "$_G1" "$_GDC";; | |
| esac | |
| ;; esac | |
| # Copy ALL uppercase keyword entries as lowercase for case-insensitive keywords. | |
| # For every state that has an uppercase character dispatch entry, also add | |
| # the corresponding lowercase entry. This ensures keyword dispatch works | |
| # at all levels (root, continuation states, block bodies, etc.). | |
| case "$_GKW_LIST" in ?*) | |
| for _cc in $_DA_CODES; do | |
| eval "_c=\"\$_DA_RAW_$_cc\"" | |
| case "$_c" in [A-Z]) | |
| eval "_entries=\"\${_DA_$_cc:-}\"" | |
| _lcase "$_c"; _lc=$REPLY | |
| case "$_lc" in '') continue;; esac | |
| for _entry in $_entries; do | |
| _st="${_entry%%=*}" | |
| _bnf_gen_da "$_lc" "$_st" "${_entry#*=}" | |
| done;; | |
| esac | |
| done;; | |
| esac | |
| # Copy expr dispatch to binary op state | |
| case "$_GPREC_BIN" in ?*) | |
| _bnf_gen_copy_dispatch "$_GPREC_RULE_CODE" "$_GPREC_BIN" | |
| ;; esac | |
| } | |
| # Build dispatch entries for postfix operators and ternary operator. | |
| _bnf_gen_p2_postfix () { | |
| local _pi=1 _post_state _post_inner _post_close _post_cont _pc | |
| while test $_pi -le $_GPOST_N; do | |
| eval "_post_state=\"\$_GPOST_STATE_$_pi\"" | |
| eval "_post_inner=\"\$_GPOST_INNER_$_pi\"" | |
| eval "_post_close=\"\${_GPOST_CLOSE_$_pi:-}\"" | |
| eval "_post_cont=\"\${_GPOST_CONT_$_pi:-}\"" | |
| case "$_post_close" in ?*) | |
| _bnf_gen_add_ref_push_cont "$_post_state" "$_post_inner" "$_post_cont" | |
| _pc="${_post_close%"${_post_close#?}"}" | |
| _bnf_gen_da "$_pc" "$_post_state" "SKIP_CLOSE_XC" | |
| _bnf_gen_da "$_pc" "$_post_cont" "SKIP_CLOSE_XC";; | |
| *) | |
| _bnf_gen_alloc_done "_postd_$_pi" | |
| _bnf_gen_add_ref_push_cont "$_post_state" "$_post_inner" "$_done_state";; | |
| esac | |
| _pi=$((_pi + 1)) | |
| done | |
| # Ternary operator dispatch | |
| case "$_GTERNARY" in 1) | |
| _bnf_gen_copy_dispatch "$_GPREC_RULE_CODE" "$_GTERNARY_STATE" | |
| _bnf_gen_da ":" "$_GTERNARY_COLON" "SKIP_CONT=$_GTERNARY_DONE" | |
| _bnf_gen_copy_dispatch "$_GPREC_RULE_CODE" "$_GTERNARY_DONE" | |
| ;; esac | |
| } | |
| # Pre-compute glob vars for char classes containing ' (needed at file scope). | |
| _bnf_gen_p2_glob_precomp () { | |
| local _r _rcode _accum _ccval _ccraw | |
| _GLOB_VARS= | |
| for _r in $_GR; do | |
| eval "_rcode=\$_RCODE_$_r" | |
| eval "_accum=\"\${_ACCUM_$_rcode:-}\"" | |
| case "$_accum" in '') continue;; esac | |
| _ccval="${_accum#*:}" | |
| case "$_ccval" in '^'*) _ccraw="${_ccval#^}";; *) _ccraw="!${_ccval}";; esac | |
| case "$_ccraw" in *"'"*) | |
| _GLOB_VARS="$_GLOB_VARS $_rcode" | |
| eval "_GLOB_PAT_$_rcode=\"\$_ccraw\"";; | |
| esac | |
| done | |
| } | |
| _bnf_gen_p2_exp_inh () { | |
| local _cc _c _entries _readable _entry _clentry _clpat _clrest _st _wentry | |
| local _inh _inh_target _inh_source _cur | |
| # Collect valid characters/patterns per state from the dispatch table. | |
| for _cc in $_DA_CODES; do | |
| eval "_c=\"\$_DA_RAW_$_cc\"" | |
| eval "_entries=\"\${_DA_$_cc:-}\"" | |
| case "$_c" in '"') _readable='"\""';; *) _readable="'$_c'";; esac | |
| for _entry in $_entries; do _bnf_gen_exp_add "${_entry%%=*}" "$_readable"; done | |
| done | |
| for _clentry in $_DA_CLASS; do | |
| _clpat="${_clentry%%=*}"; _clrest="${_clentry#*=}"; _st="${_clrest%%=*}" | |
| case "$_clpat" in '[0-9]'*) _readable="number";; '[a-zA-Z'*) _readable="identifier";; *) _readable="$_clpat";; esac | |
| _bnf_gen_exp_add "$_st" "$_readable" | |
| done | |
| for _wentry in $_DA_WILD; do _bnf_gen_exp_add "${_wentry%%=*}" "text"; done | |
| # Propagate inherited entries (inner Bq states ending with Bo/Bm) | |
| for _inh in $_DA_INHERIT; do | |
| _inh_target="${_inh%%=*}"; _inh_source="${_inh#*=}" | |
| _bnf_gen_copy_dispatch "$_inh_source" "$_inh_target" | |
| case "$_GALT" in *"$_inh_source"*) _bnf_gen_galt_add "$_inh_target";; esac | |
| done | |
| } | |
| _bnf_gen_p2 () { | |
| # ============================================================ | |
| # Phase 2: Build dispatch table | |
| # ============================================================ | |
| # _DA_<charcode> = space-separated "state=action" entries | |
| # _DA_CODES = space-separated char codes for safe iteration | |
| # _DA_RAW_<code> = raw character for each code | |
| _DA_CODES= | |
| _DA_CLASS= # space-separated "pattern=state=action" for class-based dispatch | |
| _DA_WILD= # space-separated "state=action" for wildcard dispatch (negated classes) | |
| _DA_INHERIT= # space-separated "target=source" for state inheritance | |
| _bnf_gen_p2_str_accum | |
| # Handle number accumulator: entry via digits or minus | |
| # The number fast path handles accumulation; close is triggered by non-numeric char. | |
| case "$_GNUM" in ?*) | |
| eval "_GNUM_CODE=\$_RCODE_$_GNUM" | |
| # Number accumulation is handled entirely by the fast path. | |
| # No dispatch entries needed in the number state itself. | |
| ;; esac | |
| _GALT= # Initialize auto-close list (done states added during walk) | |
| _GDONE= # Track done states (excluded from whitespace skip) | |
| _GNOCOL= # Done states that should NOT collapse (preserve node for terminal-leading rules) | |
| _bnf_gen_walk | |
| _bnf_gen_p2_closeseq | |
| _bnf_gen_p2_autoclose | |
| _bnf_gen_p2_root_dispatch | |
| _bnf_gen_p2_postfix | |
| _bnf_gen_p2_glob_precomp | |
| # ============================================================ | |
| # Phase 2b: Build expected-token strings per state (for error messages) | |
| # ============================================================ | |
| # Collect valid characters/patterns per state from the dispatch table. | |
| # _EXP_<state> = readable string like "'(' or number" | |
| _bnf_gen_p2_exp_inh | |
| } # end _bnf_gen_p2 | |
| # ============================================================ | |
| # Phase 3: Parser Emission | |
| # ============================================================ | |
| # -- Action & pattern emission -- | |
| # Emit keyword match: ast_more + MATCH extraction + case-insensitive or strict dispatch. | |
| # $1=keyword $2=CI match action $3=strict match action $4=ident fallback continuation | |
| _bnf_gen_emit_kw_match () { | |
| _printr1 " ast_more; MATCH=\"\${CODE%%[!a-zA-Z0-9_]*}\"" | |
| case "$_GKW_LIST" in *"$1"*) | |
| _printr1 " _ucase \"\$MATCH\"" | |
| _printr1 " case \"\$REPLY\" in" | |
| _printr1 " '$1') $2" | |
| eval "_kw_ident_code=\"\${_RCODE_$_GKW_RULE:-}\"" | |
| _printr1 " *) ast_consume_match" | |
| _printr1 " ast_$_kw_ident_code; ast_close; $4" | |
| _printr1 " esac;;";; | |
| *) _printr1 " case \"\$MATCH\" in" | |
| _printr1 " '$1') $3" | |
| _printr1 " *) _error KEYWORD;;" | |
| _printr1 " esac;;";; | |
| esac | |
| } | |
| _bnf_gen_emit_action () { | |
| # Emit shell code for an action. $1=action string | |
| local _act="$1" _sp _sk _rest _cont _child _skip | |
| local _okrc _okkw _okpfx _osc_code _osc_cont | |
| local _okw_rc _okw_kw _okw_cont _okw_sfx _skw _skw_action | |
| case "$_act" in | |
| PUSH=*|OPEN=*) # Push child state / create node + skip char | |
| _sp= _sk= | |
| case "$_GSTRICT" in 1) _sp="_JT=0; ";; esac | |
| case "$_act" in OPEN=*) _sk=" ast_skip;";; esac | |
| _printr1 " ${_sp}ast_${_act#*=};$_sk continue;;";; | |
| OPEN_KWSKIP=*|OPEN_KWSKIP_CONT=*) # Create node, skip full keyword word | |
| _okrc=; _okkw=; _okpfx= | |
| case "$_act" in | |
| OPEN_KWSKIP=*) _rest="${_act#OPEN_KWSKIP=}" | |
| _okrc="${_rest%%=*}"; _okkw="${_rest#*=}";; | |
| *) _rest="${_act#OPEN_KWSKIP_CONT=}" | |
| _okrc="${_rest%%=*}"; _rest="${_rest#*=}" | |
| _okkw="${_rest%%=*}"; _okpfx="STATE=${_rest#*=}; ";; | |
| esac | |
| _printr1 "" | |
| _bnf_gen_emit_kw_match "$_okkw" \ | |
| "${_okpfx}ast_$_okrc; ast_skip_match; continue;;" \ | |
| "${_okpfx}ast_$_okrc; CODE=\"\${CODE#$_okkw}\"; _COL=\$((_COL+${#_okkw})); continue;;" \ | |
| "${_okpfx}continue;;" | |
| ;; | |
| OPEN_SKIP_CLOSE=*) # Create node, skip, close | |
| _printr1 " ast_${_act#OPEN_SKIP_CLOSE=}; ast_skip; ast_close; continue;;";; | |
| OPEN_SKIP_CLOSE_CONT=*) # Create node, skip, close + set continuation | |
| _rest="${_act#OPEN_SKIP_CLOSE_CONT=}" | |
| _osc_code="${_rest%%=*}"; _osc_cont="${_rest#*=}" | |
| _printr1 " ast_$_osc_code; ast_skip; ast_close; STATE=$_osc_cont; continue;;";; | |
| OPEN_KW=*) # Create node via keyword match (optional continuation: OPEN_KW=<code>=<kw>[=<cont>]) | |
| _rest="${_act#OPEN_KW=}"; _okw_cont=; _okw_sfx= | |
| _okw_rc="${_rest%%=*}"; _rest="${_rest#*=}" | |
| _okw_kw="${_rest%%=*}" | |
| case "$_rest" in *=*) _okw_cont="${_rest#*=}"; _okw_sfx=" STATE=$_okw_cont;";; esac | |
| _printr1 "" | |
| case "$_okw_cont" in '') case "$_GSTRICT" in 1) | |
| _printr1 " _JT=0;";; esac;; esac | |
| _okw_lc=$_okw_kw | |
| case "$_GKW_CASE" in lower) _lcase_str "$_okw_kw"; _okw_lc=$REPLY;; esac | |
| _bnf_gen_emit_kw_match "$_okw_kw" \ | |
| "CONSUMED='$_okw_lc'; ast_skip_match | |
| ast_$_okw_rc; ast_close;$_okw_sfx continue;;" \ | |
| "CONSUMED='$_okw_lc'; CODE=\"\${CODE#$_okw_kw}\"; _COL=\$((_COL+${#_okw_kw})) | |
| ast_$_okw_rc; ast_close;$_okw_sfx continue;;" \ | |
| "${_okw_cont:+STATE=$_okw_cont; }continue;;" | |
| ;; | |
| SKIP_CONT=*) # Skip char and transition to continuation state | |
| _cont="${_act#SKIP_CONT=}" | |
| _printr1 " ast_skip; STATE=$_cont; continue;;";; | |
| PUSH_CONT=*|OPEN_CONT=*) # Push/open child with continuation state | |
| _rest="${_act#*_CONT=}"; _skip= | |
| case "$_act" in OPEN_CONT=*) _skip=" ast_skip;";; esac | |
| _child="${_rest%%=*}"; _cont="${_rest#*=}" | |
| _printr1 " STATE=$_cont; ast_$_child;$_skip continue;;";; | |
| SKIP_CLOSE) # Skip char, close node | |
| case "$_GSTRICT" in 1) | |
| _printr1 "" | |
| _printr1 " case \$_JT in 1) _error COMMA;; esac" | |
| _printr1 " ast_skip; ast_close; continue;;";; | |
| *) _printr1 " ast_skip; ast_close; continue;;";; | |
| esac;; | |
| SKIP_CLOSE_XC) # Skip char, close node, set _XC=1 (postfix close) | |
| _printr1 " ast_skip; ast_close_xc;;";; | |
| SKIP) # Skip char (middle delimiter) | |
| _printr1 " ast_skip; continue;;";; | |
| SKIP_KW_CONT=*|SKIP_KW_CLOSE=*) # Skip keyword and transition or close | |
| case "$_act" in | |
| SKIP_KW_CONT=*) _rest="${_act#SKIP_KW_CONT=}" | |
| _skw="${_rest%%=*}"; _skw_action="STATE=${_rest#*=}; continue;;";; | |
| *) _skw="${_act#SKIP_KW_CLOSE=}"; _skw_action="ast_close; continue;;";; | |
| esac | |
| _printr1 "" | |
| _bnf_gen_emit_kw_match "$_skw" \ | |
| "ast_skip_match; $_skw_action" \ | |
| "CODE=\"\${CODE#$_skw}\"; _COL=\$((_COL+${#_skw})); $_skw_action" \ | |
| "continue;;" | |
| ;; | |
| SKIP_COMMA) # Comma separator (sets trailing comma flag) | |
| _printr1 " ast_skip; _JT=1; continue;;";; | |
| ACCUM_STR_CLOSE) # Close string on matching quote | |
| _printr1 " ast_close; ast_skip; continue;;";; | |
| ACCUM_STR_ESC) # Escape sequence in string (JSON mode) | |
| _printr1 "" | |
| _printr1 " case \${CODE#?} in" | |
| _printr1 " '\"'*|'\\'*|'/'*|'b'*|'f'*|'n'*|'r'*|'t'*)" | |
| _printr1 " ast_consume2;;" | |
| _printr1 " 'u'*) case \$CODE in" | |
| _printr1 " '\\u'[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]*)" | |
| _printr1 " REST=\"\${CODE#??????}\"; _ast_xfer; _COL=\$((_COL+6));;" | |
| _printr1 " *) _error UNICODE;; esac;;" | |
| _printr1 " *) _error ESCAPE;;" | |
| _printr1 " esac; continue;;" | |
| ;; | |
| ACCUM_STR_ESC_SIMPLE) # Escape sequence in string (simple mode: any \X = 2 chars) | |
| _printr1 " ast_consume2; continue;;";; | |
| esac | |
| } | |
| _bnf_gen_emit_case_pat () { | |
| case "$1" in | |
| "'") _printr1 " \"'\"*)";; | |
| '"') _printr1 " '\"'*)";; | |
| '\\') _printr1 " '\\\\'*)" | |
| ;; | |
| '['*']') # Character class pattern (with matching ]) | |
| _printr1 " $1*)";; | |
| *) _printr1 " '$1'*)";; | |
| esac | |
| } | |
| # Emit a state's action with optional colon-check prefix. | |
| # $1=state code, $2=action string | |
| _bnf_gen_emit_state_action () { | |
| _printn1 " $1)" | |
| _bnf_gen_emit_action "$2" | |
| } | |
| # Emit a state:action pair, merging keyword actions if needed. | |
| # $1=state $2=first action $3=char (for kw merge context) | |
| # Checks if the state has multiple keyword entries in $_entries (from caller scope) | |
| # and merges them into a single keyword-check block if so. | |
| _bnf_gen_emit_state_or_kw_merge () { | |
| local _eskm_st="$1" _eskm_act="$2" _eskm_ch="$3" | |
| local _eskm_kw= _eskm_kwn=0 _eskm_nkw= _eskm_tot=0 | |
| for _e2 in $_entries; do | |
| local _s2="${_e2%%=*}" _a2="${_e2#*=}" | |
| case "$_s2" in "$_eskm_st") | |
| _eskm_tot=$((_eskm_tot + 1)) | |
| case "$_a2" in OPEN_KW=*=*=*|OPEN_KWSKIP_CONT=*|SKIP_KW_CONT=*|SKIP_KW_CLOSE=*) | |
| _eskm_kw="$_eskm_kw $_a2"; _eskm_kwn=$((_eskm_kwn + 1));; | |
| *) case "$_eskm_nkw" in '') _eskm_nkw="$_a2";; esac;; | |
| esac;; | |
| esac | |
| done | |
| # Merge when: 2+ keyword actions, OR keyword + non-keyword conflict | |
| case "$_eskm_kwn" in 0) _bnf_gen_emit_state_action "$_eskm_st" "$_eskm_act"; return;; esac | |
| case "$_eskm_tot$_eskm_kwn" in "$_eskm_kwn$_eskm_kwn") | |
| case "$_eskm_kwn" in 1) | |
| # Only shortcut if no class entry provides a better fallback | |
| local _has_cls= _clentry _clpat _clrest _clst2 | |
| for _clentry in $_DA_CLASS; do | |
| _clpat="${_clentry%%=*}" | |
| case "$_eskm_ch" in ${_clpat}*) ;; *) continue;; esac | |
| _clrest="${_clentry#*=}" | |
| _clst2="${_clrest%%=*}" | |
| case "$_clst2" in "$_eskm_st") _has_cls=1; break;; esac | |
| done | |
| case "$_has_cls" in '') | |
| _bnf_gen_emit_state_action "$_eskm_st" "$_eskm_act"; return;; esac;; | |
| esac;; | |
| esac | |
| _bnf_gen_emit_kw_merge "$_eskm_st" "$_eskm_kw" "$_eskm_nkw" "$_eskm_ch" | |
| } | |
| # Emit merged keyword dispatch block for a state with multiple keyword actions. | |
| # $1=state, $2=keyword actions (space-sep), $3=non-keyword action, $4=current char. | |
| _bnf_gen_emit_kw_merge () { | |
| local _km_st="$1" _km_kw_acts="$2" _km_non_kw="$3" _km_ch="$4" | |
| local _ka _kw_str _any_kw_listed= _merge_cont= _rest _mc _mkw _mcont | |
| local _clentry _clpat _clrest _clst2 _nk_rest _nk_code _nk_cont _kw_ident_code | |
| _printr1 " $_km_st)" | |
| _printr1 " ast_more; MATCH=\"\${CODE%%[!a-zA-Z0-9_]*}\"" | |
| for _ka in $_km_kw_acts; do | |
| _kw_str="${_ka#*=}" | |
| case "$_ka" in OPEN_*) _kw_str="${_kw_str#*=}";; esac | |
| _kw_str="${_kw_str%%=*}" | |
| case "$_GKW_LIST" in *"$_kw_str"*) _any_kw_listed=1;; esac | |
| done | |
| case "$_any_kw_listed" in 1) | |
| _printr1 " _ucase \"\$MATCH\"";; | |
| esac | |
| _printr1 " case \"\$REPLY\" in" | |
| # Emit each keyword branch | |
| for _ka in $_km_kw_acts; do | |
| case "$_ka" in | |
| OPEN_KW=*=*=*) | |
| _rest="${_ka#OPEN_KW=}" | |
| _mc="${_rest%%=*}"; _rest="${_rest#*=}" | |
| _mkw="${_rest%%=*}"; _mcont="${_rest#*=}" | |
| _merge_cont="$_mcont" | |
| _mkw_lc=$_mkw | |
| case "$_GKW_CASE" in lower) _lcase_str "$_mkw"; _mkw_lc=$REPLY;; esac | |
| _printr1 " '$_mkw') CONSUMED='$_mkw_lc'; CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH}))" | |
| _printr1 " ast_$_mc; ast_close; STATE=$_mcont; continue;;";; | |
| OPEN_KWSKIP_CONT=*) | |
| _rest="${_ka#OPEN_KWSKIP_CONT=}" | |
| _mc="${_rest%%=*}"; _rest="${_rest#*=}" | |
| _mkw="${_rest%%=*}"; _mcont="${_rest#*=}" | |
| _merge_cont="$_mcont" | |
| _printr1 " '$_mkw') STATE=$_mcont; ast_$_mc; CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH})); continue;;";; | |
| SKIP_KW_CONT=*) | |
| _rest="${_ka#SKIP_KW_CONT=}" | |
| _mkw="${_rest%%=*}"; _mcont="${_rest#*=}" | |
| case "$_merge_cont" in '') _merge_cont="$_mcont";; esac | |
| _printr1 " '$_mkw') CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH})); STATE=$_mcont; continue;;";; | |
| SKIP_KW_CLOSE=*) | |
| _mkw="${_ka#SKIP_KW_CLOSE=}" | |
| _printr1 " '$_mkw') CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH})); ast_close; continue;;";; | |
| esac | |
| done | |
| # Fallback: non-keyword action, class entry match, ident fallback, or error | |
| case "$_km_non_kw" in '') | |
| for _clentry in $_DA_CLASS; do | |
| _clpat="${_clentry%%=*}" | |
| case "$_km_ch" in ${_clpat}*) ;; *) continue;; esac | |
| _clrest="${_clentry#*=}" | |
| _clst2="${_clrest%%=*}" | |
| case "$_clst2" in "$_km_st") | |
| _km_non_kw="${_clrest#*=}"; break;; esac | |
| done;; esac | |
| case "$_km_non_kw" in | |
| PUSH_CONT=*) | |
| _nk_rest="${_km_non_kw#PUSH_CONT=}" | |
| _nk_code="${_nk_rest%%=*}"; _nk_cont="${_nk_rest#*=}" | |
| _printr1 " *) STATE=$_nk_cont; ast_$_nk_code; continue;;";; | |
| OPEN_CONT=*) | |
| _nk_rest="${_km_non_kw#OPEN_CONT=}" | |
| _nk_code="${_nk_rest%%=*}"; _nk_cont="${_nk_rest#*=}" | |
| _printr1 " *) STATE=$_nk_cont; ast_$_nk_code; ast_skip; continue;;";; | |
| *) | |
| eval "_kw_ident_code=\"\${_RCODE_$_GKW_RULE:-}\"" | |
| case "$_kw_ident_code" in ?*) | |
| _printr1 " *) CONSUMED=\"\$MATCH\"; CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH}))" | |
| _printr1 " ast_$_kw_ident_code; ast_close; STATE=${_merge_cont:-}; continue;;";; | |
| *) _printr1 " *) _error KEYWORD;;";; | |
| esac;; | |
| esac | |
| _printr1 " esac;;" | |
| } | |
| # -- Dispatch structure emission -- | |
| # Emit wildcard entries + galt cascade + error fallback for a dispatch block. | |
| # $1=emitted states $2=left delimiter $3=right delimiter (for state match check) | |
| _bnf_gen_emit_wild_fallback () { | |
| local _wentry _wst _wact | |
| for _wentry in $_DA_WILD; do | |
| _wst="${_wentry%%=*}"; _wact="${_wentry#*=}" | |
| case "$1" in *"$2$_wst$3"*) continue;; esac | |
| _printn1 " $_wst)" | |
| _bnf_gen_emit_action "$_wact" | |
| done | |
| _bnf_gen_emit_galt "$1" | |
| _printr1 " *) _pars_err;;" | |
| _printr1 " esac;;" | |
| } | |
| # Emit auto-close cases for alternation/transparent states. | |
| # $1=skip filter (pipe-delimited states to exclude, or empty for none) | |
| _bnf_gen_emit_galt () { | |
| case "$_GALT" in '') return;; esac | |
| local _galt_skip="$1" _filt= _galt_rest="$_GALT" _gs | |
| # Walk pipe-delimited _GALT, filtering out already-emitted states | |
| while :; do | |
| case "$_galt_rest" in | |
| *'|'*) _gs="${_galt_rest%%\|*}"; _galt_rest="${_galt_rest#*\|}";; | |
| ?*) _gs="$_galt_rest"; _galt_rest=;; | |
| *) break;; | |
| esac | |
| case "$_galt_skip" in *"$_gs="*|*"|$_gs|"*) ;; | |
| *) case "$_filt" in '') _filt="$_gs";; *) _filt="$_filt|$_gs";; esac;; esac | |
| done | |
| case "$_filt" in ?*) | |
| # Split no-collapse states from regular collapse states | |
| local _col_filt= _nocol_filt= | |
| local _filt_rest="$_filt" _fs | |
| while :; do | |
| case "$_filt_rest" in | |
| *'|'*) _fs="${_filt_rest%%\|*}"; _filt_rest="${_filt_rest#*\|}";; | |
| ?*) _fs="$_filt_rest"; _filt_rest=;; | |
| *) break;; esac | |
| case "$_GNOCOL" in *" $_fs "*|*" $_fs") | |
| case "$_nocol_filt" in '') _nocol_filt="$_fs";; *) _nocol_filt="$_nocol_filt|$_fs";; esac;; | |
| *) case "$_col_filt" in '') _col_filt="$_fs";; *) _col_filt="$_col_filt|$_fs";; esac;; | |
| esac | |
| done | |
| _bnf_gen_emit_close_case "$_col_filt" "ast_close_col" | |
| _bnf_gen_emit_close_case "$_nocol_filt" "ast_close" | |
| ;; esac | |
| } | |
| # Emit a close-case line with optional precedence reset. | |
| # $1=state list, $2=close action (ast_close or ast_close_col) | |
| _bnf_gen_emit_close_case () { | |
| case "$1" in ?*) | |
| case "$_GPREC_RULE" in ?*) | |
| _printr1 " $1) ${2}_xc;;";; | |
| *) _printr1 " $1) $2; continue;;";; | |
| esac;; esac | |
| } | |
| # Emit a for-each-child loop that joins results with a separator. | |
| # $1=state code, $2=separator (empty=newline-join for line-oriented, concat otherwise) | |
| _bnf_gen_emit_child_loop () { | |
| _printr1 " $1) _r=" | |
| _printr1 " for _ch in \"\$@\"; do" | |
| case "$2" in | |
| ?*) _printr1 " case \"\$_r\" in ?*) _r=\"\$_r$2 \";; esac";; | |
| *) case "$_GCMT" in ?*) | |
| _printr1 " case \"\$_r\" in ?*) _r=\"\$_r\$_EOL\";; esac";; esac;; | |
| esac | |
| _printr1 " _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\"" | |
| _printr1 " done; REPLY=\"\$_r\";;" | |
| } | |
| # Cluster overlapping class patterns for dispatch emission. | |
| # Sets _cls_ngrp and _CLS_GRP_<n> (read by caller via dynamic scoping). | |
| _bnf_gen_emit_cls_cluster () { | |
| local _gcc_pats= _gcc_done= _clentry _clpat _cpat _op _tc _gp | |
| local _gcc_found _gcc_gi _gcc_ov _m1 _m2 | |
| for _clentry in $_DA_CLASS; do | |
| _clpat="${_clentry%%=*}" | |
| case "$_gcc_done" in *"|$_clpat|"*) ;; *) | |
| _gcc_done="$_gcc_done|$_clpat|" | |
| _gcc_pats="$_gcc_pats $_clpat";; | |
| esac | |
| done | |
| _cls_ngrp=0 | |
| for _cpat in $_gcc_pats; do | |
| _gcc_found= | |
| _gcc_gi=0 | |
| while test $_gcc_gi -lt $_cls_ngrp; do | |
| eval "_gp=\"\$_CLS_GRP_$_gcc_gi\"" | |
| for _op in $_gp; do | |
| _gcc_ov=0 | |
| for _tc in a m z A M Z 0 5 9 _ . : '#' '+' '-' '!' '?' '*' '/'; do | |
| _m1=0; _m2=0 | |
| case "$_tc" in ${_cpat}*) _m1=1;; esac | |
| case "$_tc" in ${_op}*) _m2=1;; esac | |
| case "$_m1$_m2" in 11) _gcc_ov=1; break;; esac | |
| done | |
| case $_gcc_ov in 1) _gcc_found=$_gcc_gi; break 2;; esac | |
| done | |
| _gcc_gi=$((_gcc_gi + 1)) | |
| done | |
| case "$_gcc_found" in | |
| ?*) eval "_CLS_GRP_$_gcc_found=\"\$_CLS_GRP_$_gcc_found \$_cpat\"";; | |
| *) eval "_CLS_GRP_$_cls_ngrp=\"\$_cpat\"" | |
| _cls_ngrp=$((_cls_ngrp + 1));; | |
| esac | |
| done | |
| } | |
| # -- Parser structure emission -- | |
| # Emit glob variables for char classes containing ' and string accumulators (file scope). | |
| _bnf_gen_emit_glob_vars () { | |
| case "$_GLOB_VARS" in ?*) | |
| _printr1 "" | |
| for _gvrc in $_GLOB_VARS; do | |
| eval "_gvpat=\"\$_GLOB_PAT_$_gvrc\"" | |
| _esc_sq "$_gvpat" | |
| _printr1 "_${_GN}_gp_$_gvrc='[$REPLY]*'" | |
| done | |
| ;; esac | |
| local _geg_i=1 | |
| while test $_geg_i -le $_GSTR_N; do | |
| eval "_sgvar=\"\${_STR_GLOBVAR_$_geg_i:-}\"" | |
| case "$_sgvar" in ?*) _printr1 "$_sgvar";; esac | |
| _geg_i=$((_geg_i + 1)) | |
| done | |
| } | |
| # Emit expected-token variables for error messages (file scope). | |
| _bnf_gen_emit_exp_tokens () { | |
| _printr1 "" | |
| for _sc in $_GST; do | |
| eval "_exp=\"\${_EXP_$_sc:-}\"" | |
| case "$_exp" in ?*) | |
| _esc_sq "$_exp" | |
| _printr1 "_EXP_$_sc='$REPLY'";; | |
| esac | |
| done | |
| } | |
| # Emit precedence climbing: _steal alias and _prec_<name> function. | |
| _bnf_gen_emit_prec () { | |
| case "$_GPREC_RULE" in ?*) | |
| _printr1 "" | |
| _printr1 "# Steal last sibling from parent, make it first child of current NODE" | |
| _printr1 "alias _steal='eval \"_W=\\\"\\\${X\$PARN##*\\\" \\\"}\\\"" | |
| _printr1 " X\$PARN=\\\"\\\${X\$PARN% *}\\\"" | |
| _printr1 " X\$NODE=\\\"\\\$X\$NODE \\\$_W\\\"\"'" | |
| _printr1 "" | |
| _printr1 "_${_GN}_parser_prec () {" | |
| _printr1 " case \"\$1\" in" | |
| local _gep_i=1 | |
| while test $_gep_i -le $_GPREC_N; do | |
| eval "_po=\"\$_GPREC_O_$_gep_i\"" | |
| eval "_pp=\"\$_GPREC_P_$_gep_i\"" | |
| _printr1 " '$_po') REPLY=$_pp;;" | |
| _gep_i=$((_gep_i + 1)) | |
| done | |
| case "$_GTERNARY" in 1) | |
| _printr1 " '$_GTERNARY_OPEN') REPLY=$_GTERNARY_PREC;;";; | |
| esac | |
| _gep_i=1 | |
| while test $_gep_i -le $_GUNARY_N; do | |
| eval "_po=\"\$_GUNARY_O_$_gep_i\"" | |
| eval "_pp=\"\$_GUNARY_P_$_gep_i\"" | |
| _printr1 " '$_po') REPLY=$_pp;;" | |
| _gep_i=$((_gep_i + 1)) | |
| done | |
| _printr1 " *) REPLY=0;;" | |
| _printr1 " esac" | |
| _printr1 "}" | |
| ;; esac | |
| } | |
| # Emit char-class accumulator fast paths. | |
| # Sets _ACCUM_STATES and _GLOB_VARS (read by caller via dynamic scoping). | |
| _bnf_gen_emit_fp_charclass () { | |
| local _ccquant _ccval _glob _ccraw _has_dash _dtmp _drem _c _ccesc | |
| local _glob_var _numval _closeseq _cs1 _cs2 _has_bracket _cs2pat | |
| _ACCUM_STATES= | |
| _GLOB_VARS= | |
| for _r in $_GR; do | |
| eval "_rcode=\$_RCODE_$_r" | |
| eval "_accum=\"\${_ACCUM_$_rcode:-}\"" | |
| case "$_accum" in '') continue;; esac | |
| _ACCUM_STATES="$_ACCUM_STATES $_rcode" | |
| _ccquant="${_accum%%:*}"; _ccval="${_accum#*:}" | |
| case "$_ccval" in | |
| '^'*) _ccraw="${_ccval#^}";; | |
| *) _ccraw="!${_ccval}";; | |
| esac | |
| _has_dash= | |
| _strip_bsdash "$_ccraw" | |
| case "$REPLY" in "$_ccraw") ;; *) _has_dash=1; _ccraw="$REPLY";; esac | |
| case "$_ccraw" in *-) _has_dash=1; _ccraw="${_ccraw%-}";; esac | |
| _has_bracket= | |
| case "$_ccraw" in *'\'*) | |
| _dtmp=; _drem="$_ccraw" | |
| while :; do | |
| case "$_drem" in | |
| *'\'*) _dtmp="$_dtmp${_drem%%\\*}" | |
| _drem="${_drem#*\\}" | |
| _c="${_drem%"${_drem#?}"}"; _drem="${_drem#?}" | |
| case "$_c" in | |
| ']') _has_bracket=1;; | |
| '-') _has_dash=1;; | |
| *) _dtmp="$_dtmp$_c";; | |
| esac;; | |
| *) _dtmp="$_dtmp$_drem"; break;; esac | |
| done | |
| _ccraw="$_dtmp";; | |
| esac | |
| case "$_has_bracket" in 1) | |
| case "$_ccraw" in '!'*) _ccraw="!]${_ccraw#!}";; *) _ccraw="]$_ccraw";; esac;; esac | |
| case "$_has_dash" in 1) _ccraw="$_ccraw-";; esac | |
| _esc_dq_only "$_ccraw"; _ccesc="$REPLY"; _glob_var= | |
| case "$_ccesc" in *"'"*) | |
| _glob_var="_${_GN}_gp_$_rcode" | |
| _glob="\$$_glob_var" | |
| _GLOB_VARS="$_GLOB_VARS $_glob_var=[$_ccesc]*";; | |
| *) _glob="[$_ccesc]*";; | |
| esac | |
| _numval= | |
| case "$_GVALNUM" in 1) | |
| case "$_r" in "$_GNUM"|number) _numval=1;; esac;; esac | |
| case "$_ccquant" in Bp|Bs) | |
| eval "_closeseq=\"\${_CLOSESEQ_$_rcode:-}\"" | |
| case "$_closeseq" in | |
| ??*) | |
| _cs1="${_closeseq%"${_closeseq#?}"}"; _cs2="${_closeseq#?}" | |
| _cs2="${_cs2%"${_cs2#?}"}" | |
| _printr1 " # $_r: accumulate [$_ccval], close on $_closeseq" | |
| _printr1 " $_rcode) ast_more; REST=\"\${CODE%%$_glob}\"" | |
| _printr1 " case \"\$REST\" in ?*) ast_bulk_nl;; *)" | |
| case "$_cs2" in | |
| '-') _cs2pat='[-]';; ']') _cs2pat='[]]';; *) _cs2pat="['${_cs2}']";; esac | |
| _printr1 " case \$CODE in '${_cs1}'${_cs2pat}*) ast_close;$_xc continue;; '${_cs1}'*)" | |
| _printr1 " ast_consume; continue;; *) ast_close;$_xc continue;; esac;; esac;;";; | |
| *) | |
| _printr1 " # $_r: accumulate [$_ccval]" | |
| _printr1 " $_rcode) ast_more; REST=\"\${CODE%%$_glob}\"" | |
| case "$_numval" in 1) | |
| _printr1 " case \"\$REST\" in ?*) ast_bulk;; *) _numck; ast_close;$_xc continue;; esac;;";; | |
| *) | |
| _printr1 " case \"\$REST\" in ?*) ast_bulk_nl;; *) ast_close;$_xc continue;; esac;;";; | |
| esac;; | |
| esac | |
| ;; esac | |
| done | |
| } | |
| # Emit fast paths: extern handlers, string/number/char-class accumulators, | |
| # whitespace skip, and comment handling. | |
| _bnf_gen_emit_fast_paths () { | |
| local _ext _ext_code _sfc _ch _str_rule _str_code _str_close _str_esc _str_embed_stop | |
| local _r _rcode _accum _has_bracket _cs2pat _ws _sc _str_skip _xc | |
| # When precedence climbing is active, fast-path closures must set _XC=1 | |
| # so the postfix/binary operator dispatch fires on the next iteration. | |
| case "$_GPREC_RULE" in ?*) _xc=' _XC=1; _PREV=;';; *) _xc=;; esac | |
| _printr1 "" | |
| _printr1 " # --- Fast paths (bulk accumulation) ---" | |
| _printr1 " case \$STATE in" | |
| # Extern rule handlers: call external function when state is entered | |
| for _ext in $_GEXTERN; do | |
| eval "_ext_code=\"\${_RCODE_$_ext:-}\"" | |
| case "$_ext_code" in ?*) | |
| _printr1 " # $_ext: externally implemented" | |
| _printr1 " $_ext_code) _parse_${_ext}_$_GN; continue;;";; | |
| esac | |
| done | |
| # String accumulator fast paths (one per #!string directive) | |
| local _gef_si=1 | |
| while test $_gef_si -le $_GSTR_N; do | |
| eval "_str_rule=\"\$_GSTR_RULE_$_gef_si\"" | |
| eval "_str_code=\"\$_GSTR_CODE_$_gef_si\"" | |
| eval "_str_close=\"\$_GSTR_CLOSE_$_gef_si\"" | |
| eval "_str_esc=\"\${_GSTR_ESC_$_gef_si:-}\"" | |
| eval "_str_embed_stop=\"\${_GSTR_EMBED_STOP_$_gef_si:-}\"" | |
| _printr1 " # $_str_rule: accumulate (stops at close/esc)" | |
| _printn1 " $_str_code) case \$CODE in " | |
| case "$_str_close" in | |
| '"') _printn1 "'\"'*";; | |
| "'") _printn1 "\"'\"*";; | |
| *) _printn1 "'$_str_close'*";; | |
| esac | |
| case "$_str_esc" in json|simple) | |
| _printn1 "|'\\'*";; | |
| esac | |
| _sfc="$_str_embed_stop" | |
| while test ${#_sfc} -gt 0; do | |
| _ch="${_sfc%"${_sfc#?}"}"; _sfc="${_sfc#?}" | |
| case "$_ch" in '$') _printn1 "|'\$'*";; *) _printn1 "|'$_ch'*";; esac | |
| done | |
| _printr1 "|'') ;; *)" | |
| _printr1 " ast_more; REST=\"\${CODE%%\$_${_GN}_sg_${_str_code}}\"; ast_bulk_nl;; esac;;" | |
| _gef_si=$((_gef_si + 1)) | |
| done | |
| # Number accumulator fast path | |
| case "$_GNUM" in ?*) | |
| _printr1 " $_GNUM_CODE) case \$CODE in [0-9.eE+-]*)" | |
| case "$_GVALNUM" in 1) | |
| _printr1 " ast_more; REST=\"\${CODE%%[!0-9.eE+-]*}\"; ast_bulk;;" | |
| _printr1 " *) _numck; ast_close;$_xc continue;; esac;;" | |
| ;; *) | |
| _printr1 " ast_more; REST=\"\${CODE%%[!0-9.eE+-]*}\"; ast_bulk;;" | |
| _printr1 " *) ast_close;$_xc continue;; esac;;" | |
| ;; esac | |
| ;; esac | |
| _bnf_gen_emit_fp_charclass | |
| # Whitespace skip (all states EXCEPT accumulators and wildcard-entry states) | |
| case "$_GW" in skip|token|line) | |
| _ws= | |
| for _sc in $_GST; do | |
| _gef_si=1; _str_skip= | |
| while test $_gef_si -le $_GSTR_N; do | |
| eval "_str_code=\"\$_GSTR_CODE_$_gef_si\"" | |
| case "$_sc" in "$_str_code") _str_skip=1; break;; esac | |
| _gef_si=$((_gef_si + 1)) | |
| done | |
| case "$_str_skip" in 1) continue;; esac | |
| case "$_GNUM" in ?*) case "$_sc" in "$_GNUM_CODE") continue;; esac;; esac | |
| case "$_ACCUM_STATES" in *" $_sc "*|*" $_sc") continue;; esac | |
| # Exclude wildcard-entry states from whitespace skip ONLY if they | |
| # are NOT auto-close states. Auto-close states need whitespace | |
| # skipped before the closing terminal can match. | |
| case "$_DA_WILD" in *" $_sc="*) | |
| case "$_GALT" in *"|$_sc|"*|"$_sc|"*|*"|$_sc") ;; *) continue;; esac;; esac | |
| case "$_GDONE" in *" $_sc "*|*" $_sc") continue;; esac | |
| _ws="${_ws}${_sc}|" | |
| done | |
| _ws="${_ws%\|}" | |
| _printr1 " $_ws)" | |
| case "$_GW" in | |
| line) | |
| _printr1 " case \$CODE in ' '*|\"\$_TAB\"*)" | |
| _printr1 " ast_skip_ws; continue;; esac;;" | |
| ;; | |
| *) | |
| _printr1 " case \$CODE in ' '*|\"\$_TAB\"*|\"\$_EOL\"*)" | |
| _printr1 " ast_skip; continue;; esac;;" | |
| ;; | |
| esac | |
| ;; esac | |
| _printr1 " esac" | |
| # Comment handling | |
| case "$_GCMT" in ?*) | |
| _printr1 "" | |
| case "$_GCMT_END" in | |
| '') # Line comment (skip to end of line) | |
| _printr1 " # Line comment skip" | |
| _printr1 " case \$CODE in '$_GCMT'*)" | |
| _printr1 " ast_cmt_line;;" | |
| _printr1 " esac";; | |
| *) # Block comment (skip until end delimiter) | |
| _printr1 " # Block comment skip" | |
| _printr1 " case \$CODE in '$_GCMT'*)" | |
| _printr1 " ast_cmt_block;;" | |
| _printr1 " esac";; | |
| esac | |
| ;; esac | |
| } | |
| # Emit expression completion block (precedence climbing). | |
| # Handles postfix operators, binary operator peek, precedence comparison, | |
| # operator consumption, ternary operators, and close cascading. | |
| _bnf_gen_emit_prec_climb () { | |
| case "$_GPREC_RULE" in ?*) | |
| _printr1 "" | |
| _printr1 " # --- Expression completion (precedence climbing) ---" | |
| local _gepc_xc="$_GPREC_RULE_CODE|$_GPREC_BIN" | |
| case "$_GTERNARY" in 1) _gepc_xc="$_gepc_xc|$_GTERNARY_STATE|$_GTERNARY_DONE";; esac | |
| _printr1 " case \$_XC in 1) _XC=0; case \$STATE in $_gepc_xc)" | |
| # Skip whitespace | |
| case "$_GW" in skip) | |
| _printr1 " case \"\$CODE\" in ' '*|\"\$_TAB\"*|\"\$_EOL\"*) ast_skip_wse;; esac";; | |
| esac | |
| # Postfix operator dispatch (before binary op peek) | |
| case "$_GPOST_N" in 0) ;; *) | |
| _printr1 " case \"\$CODE\" in" | |
| _list_find_max_len "_GPOST_OPEN" "$_GPOST_N" | |
| local _gepc_plen=$REPLY | |
| while test $_gepc_plen -gt 0; do | |
| local _gepc_i=1 | |
| while test $_gepc_i -le $_GPOST_N; do | |
| eval "_po=\"\$_GPOST_OPEN_$_gepc_i\"" | |
| eval "_pst=\"\$_GPOST_STATE_$_gepc_i\"" | |
| case ${#_po} in "$_gepc_plen") | |
| _cg_case_pat "$_po" | |
| eval "_pi=\"\${_GPOST_INNER_$_gepc_i:-}\"" | |
| eval "_pc=\"\${_GPOST_CLOSE_$_gepc_i:-}\"" | |
| case "$_pi$_pc" in | |
| '') # Close-less postfix (e.g. ++): steal, close, re-enter prec climbing | |
| _printr1 " ${REPLY}*) CODE=\"\${CODE#\"$_po\"}\"; _COL=\$((_COL+${#_po})); ast_$_pst; _steal; ast_close_xc;;";; | |
| *) _printr1 " ${REPLY}*) CODE=\"\${CODE#\"$_po\"}\"; _COL=\$((_COL+${#_po})); ast_$_pst; _steal; continue;;";; | |
| esac;; | |
| esac | |
| _gepc_i=$((_gepc_i + 1)) | |
| done | |
| _gepc_plen=$((_gepc_plen - 1)) | |
| done | |
| _printr1 " esac";; | |
| esac | |
| # Peek at operators — symbolic ops use char patterns, keyword ops use word peek | |
| _printr1 " _OP=" | |
| local _has_kw_ops= _has_sym_ops= _gepc_i=1 | |
| while test $_gepc_i -le $_GPREC_N; do | |
| eval "_po=\"\$_GPREC_O_$_gepc_i\"" | |
| case "$_po" in [a-zA-Z]*) _has_kw_ops=1;; *) _has_sym_ops=1;; esac | |
| _gepc_i=$((_gepc_i + 1)) | |
| done | |
| # Keyword operators: peek at word, uppercase, match | |
| case "$_has_kw_ops" in 1) | |
| _printr1 " case \"\$CODE\" in [a-zA-Z_]*)" | |
| _printr1 " ast_more; MATCH=\"\${CODE%%[!a-zA-Z0-9_]*}\"" | |
| _printr1 " _ucase \"\$MATCH\"" | |
| _printr1 " case \"\$REPLY\" in" | |
| _gepc_i=1 | |
| while test $_gepc_i -le $_GPREC_N; do | |
| eval "_po=\"\$_GPREC_O_$_gepc_i\"" | |
| eval "_pp=\"\$_GPREC_P_$_gepc_i\"" | |
| case "$_po" in [a-zA-Z]*) | |
| _printr1 " '$_po') _OP=\"$_po\"; _np=$_pp;;";; | |
| esac | |
| _gepc_i=$((_gepc_i + 1)) | |
| done | |
| _printr1 " esac;; esac";; | |
| esac | |
| # Symbolic operators: char-pattern peek (longest match first) | |
| case "$_has_sym_ops" in 1) | |
| _printr1 " case \"\$CODE\" in" | |
| _list_find_max_len "_GPREC_O" "$_GPREC_N" '[a-zA-Z]*' | |
| local _gepc_len=$REPLY | |
| while test $_gepc_len -gt 0; do | |
| _gepc_i=1 | |
| while test $_gepc_i -le $_GPREC_N; do | |
| eval "_po=\"\$_GPREC_O_$_gepc_i\"" | |
| eval "_pp=\"\$_GPREC_P_$_gepc_i\"" | |
| case "$_po" in [a-zA-Z]*) _gepc_i=$((_gepc_i + 1)); continue;; esac | |
| case ${#_po} in "$_gepc_len") | |
| _cg_case_pat "$_po" | |
| _printr1 " ${REPLY}*) _OP=\"$_po\"; _np=$_pp;;";; | |
| esac | |
| _gepc_i=$((_gepc_i + 1)) | |
| done | |
| _gepc_len=$((_gepc_len - 1)) | |
| done | |
| case "$_GTERNARY" in 1) | |
| _cg_case_pat "$_GTERNARY_OPEN" | |
| _printr1 " ${REPLY}*) _OP=\"$_GTERNARY_OPEN\"; _np=$_GTERNARY_PREC;;";; | |
| esac | |
| _printr1 " esac";; | |
| esac | |
| # If operator found: precedence comparison + consume | |
| local _gepc_ps="$_GPREC_BIN" | |
| case "$_GTERNARY" in 1) _gepc_ps="$_gepc_ps|$_GTERNARY_STATE|$_GTERNARY_DONE";; esac | |
| _printr1 " case \"\$_OP\" in ?*)" | |
| _printr1 " case \$STATE in $_gepc_ps)" | |
| _printr1 " _W=\"\${NODES##*\" \"}\"; eval \"_W=\\\"\\\${V\$_W:-}\\\"\"" | |
| _printr1 " _${_GN}_parser_prec \"\$_W\"; _cp=\$REPLY" | |
| # Build right-assoc check (includes ternary — always right-associative) | |
| local _right_ops= _gepc_i=1 | |
| while test $_gepc_i -le $_GPREC_N; do | |
| eval "_pa=\"\$_GPREC_A_$_gepc_i\"" | |
| eval "_po=\"\$_GPREC_O_$_gepc_i\"" | |
| case "$_pa" in right) _right_ops="$_right_ops|'$_po'";; esac | |
| _gepc_i=$((_gepc_i + 1)) | |
| done | |
| case "$_GTERNARY" in 1) | |
| _right_ops="$_right_ops|'$_GTERNARY_OPEN'";; esac | |
| case "$_right_ops" in ?*) | |
| _right_ops="${_right_ops#\|}" | |
| _printr1 " case \"\$_OP\" in $_right_ops) case \$((_np < _cp)) in 1) ast_close_xc;; esac;;" | |
| _printr1 " *) case \$((_np <= _cp)) in 1) ast_close_xc;; esac;; esac";; | |
| *) _printr1 " case \$((_np <= _cp)) in 1) ast_close_xc;; esac";; | |
| esac | |
| _printr1 " ;; esac" | |
| # Consume operator, create binary op node, steal previous atom | |
| case "$_has_kw_ops" in 1) | |
| _printr1 " case \"\$_OP\" in [A-Z]*) CONSUMED=\"\$_OP\"; ast_skip_match;;" | |
| _printr1 " *) ast_consume_op;; esac";; | |
| *) _printr1 " ast_consume_op";; | |
| esac | |
| case "$_GTERNARY" in 1) | |
| _printr1 " case \"\$_OP\" in '$_GTERNARY_OPEN') ast_$_GTERNARY_STATE; _steal; continue;; esac";; | |
| esac | |
| _printr1 " ast_$_GPREC_BIN; _steal; continue" | |
| _printr1 " ;; esac" | |
| # Not an operator: close binary/ternary op, or close expr | |
| case "$_GTERNARY" in 1) | |
| _printr1 " case \$STATE in $_GTERNARY_STATE) STATE=$_GTERNARY_COLON; continue;; esac" | |
| _printr1 " case \$STATE in $_GTERNARY_DONE) ast_close_xc;; esac";; esac | |
| _printr1 " case \$STATE in $_GPREC_BIN) ast_close_xc;; esac" | |
| _printr1 " case \$STATE in $_GPREC_RULE_CODE) ast_close; _PREV=; continue;; esac" | |
| _printr1 " ;; esac;; esac" | |
| ;; esac | |
| } | |
| _bnf_gen_emit () { | |
| # --- Prologue --- | |
| _printr1 "use ast_core" | |
| case "$_GKW_LIST$_GPREC_RULE" in ?*) | |
| _printr1 "use ast_consume";; esac | |
| case "$_GPREC_RULE" in ?*) | |
| _printr1 "use ast_prec";; esac | |
| case "$_GCMT" in ?*) | |
| _printr1 "use ast_comment";; esac | |
| case "$_GKW_LIST" in ?*) | |
| _printr1 "use str_core";; esac | |
| _printr1 "" | |
| # Emit literal token aliases (no eval/ast_tokens — ksh93 can't nest eval) | |
| for _etk in $_tok; do | |
| _printr1 "alias ast_$_etk=\"ast_new;STATE=$_etk;ast_push\"" | |
| done | |
| # Emit state-code-to-rule-name mapping as comment | |
| _printr1 "" | |
| _printr1 "# State codes:" | |
| _printn1 "# $_GDC=_doc_" | |
| for _mr in $_GR; do | |
| eval "_mc=\$_RCODE_$_mr" | |
| _printn1 " $_mc=$_mr" | |
| done | |
| _printr1 "" | |
| # Also list continuation states | |
| local _cont_list= | |
| for _sc in $_GST; do | |
| # Skip doc root and rule codes (already listed) | |
| case "$_sc" in "$_GDC") continue;; esac | |
| _bnf_gen_code_to_name "$_sc" | |
| case "$REPLY" in ?*) continue;; esac | |
| _cont_list="$_cont_list $_sc" | |
| done | |
| case "$_cont_list" in ?*) | |
| _printn1 "# cont:" | |
| for _sc in $_cont_list; do _printn1 " $_sc"; done | |
| _printr1 "" | |
| ;; esac | |
| _bnf_gen_emit_glob_vars | |
| _bnf_gen_emit_exp_tokens | |
| _bnf_gen_emit_prec | |
| _printr1 "" | |
| _printr1 "${_GN}_parser () {" | |
| _printr1 " local CODE= STATE=$_rc V=0 CONSUMED= STATES= NODES=\" 0\" X0=\"$_rc\" \\" | |
| _printr1 " NODE= PARN= PARNT= SIBL= REST= MATCH= _a= _W= _ST= _D= _C= _pq= \\" | |
| _printr1 " _EOF=0 _line= _PREV= _PLEN= _PLC=0 _JT=0 \\" | |
| case "$_GPREC_RULE" in ?*) | |
| _printr1 " _XC=0 _OP= _np=0 _cp=0 \\";; | |
| esac | |
| case "$_GCMT_END" in ?*) | |
| _printr1 " _CMT_S='$_GCMT' _CMT_SL=${#_GCMT} _CMT_E='$_GCMT_END' _CMT_EL=${#_GCMT_END} \\";; | |
| esac | |
| _printr1 " _LN=1 _COL=1 _RD=0" | |
| _printr1 "" | |
| _printr1 " while :; do" | |
| _printr1 " pars_progress" | |
| _printr1 " ast_feed" | |
| _bnf_gen_emit_prec_climb | |
| _bnf_gen_emit_fast_paths | |
| } | |
| _bnf_gen_emit_dispatch () { | |
| local _cc _ch _entries _entry _emitted_st _st _act | |
| local _ind_states _clentry _clpat _clrest _clst _clact | |
| _printr1 "" | |
| _printr1 " # --- Character dispatch ---" | |
| _printr1 " case \$CODE in" | |
| # --- Emit merged dispatch for each character --- | |
| for _cc in $_DA_CODES; do | |
| eval "_ch=\"\$_DA_RAW_$_cc\"" | |
| eval "_entries=\"\${_DA_$_cc:-}\"" | |
| case "$_entries" in '') continue;; esac | |
| _printr1 "" | |
| _bnf_gen_emit_case_pat "$_ch" | |
| _printr1 " case \$STATE in" | |
| # Emit each state:action pair. | |
| # When multiple keyword actions (OPEN_KW with cont, OPEN_KWSKIP_CONT, SKIP_KW_CONT, | |
| # SKIP_KW_CLOSE) exist for the same state, merge them into a single keyword-check | |
| # block with multiple branches. For non-keyword duplicates, first action wins. | |
| _emitted_st= | |
| for _entry in $_entries; do | |
| _st="${_entry%%=*}" | |
| _act="${_entry#*=}" | |
| # Skip if this state was already emitted | |
| case "$_emitted_st" in *"|$_st|"*) continue;; esac | |
| _emitted_st="$_emitted_st|$_st|" | |
| _bnf_gen_emit_state_or_kw_merge "$_st" "$_act" "$_ch" | |
| done | |
| # Also include class dispatch entries for states not covered above. | |
| # This handles chars like 't' that match both individual (keyword) and | |
| # class ([a-zA-Z0-9_-]) patterns — states from the class pattern need | |
| # to be checked here since the individual pattern matches first. | |
| _ind_states= | |
| for _entry in $_entries; do | |
| _ind_states="$_ind_states ${_entry%%=*}=" | |
| done | |
| for _clentry in $_DA_CLASS; do | |
| _clpat="${_clentry%%=*}" | |
| case "$_ch" in ${_clpat}*) ;; *) continue;; esac | |
| _clrest="${_clentry#*=}" | |
| _clst="${_clrest%%=*}" | |
| _clact="${_clrest#*=}" | |
| # Skip if this state was already handled (individual or prior class) | |
| case "$_ind_states" in *" $_clst="*) continue;; esac | |
| _ind_states="$_ind_states $_clst=" | |
| _printn1 " $_clst)" | |
| _bnf_gen_emit_action "$_clact" | |
| done | |
| # Wildcard entries (from negated class FIRST — for states not yet handled) | |
| _bnf_gen_emit_wild_fallback "$_ind_states" " " "=" | |
| done | |
| } | |
| _bnf_gen_emit_dispatch2 () { | |
| # --- Class-based dispatch (after individual chars, before EOF) --- | |
| case "$_DA_CLASS" in ?*) | |
| _bnf_gen_emit_cls_cluster | |
| # Emit one branch per cluster | |
| local _gi=0 | |
| while test $_gi -lt $_cls_ngrp; do | |
| eval "_gp=\"\$_CLS_GRP_$_gi\"" | |
| # Build union pattern for this cluster | |
| # Strip dashes from inner patterns, collect them, add a single | |
| # trailing dash at the end to avoid creating invalid ranges | |
| # (e.g. [a-z_-0-9] has the invalid range _-0). | |
| local _grp_cls= _grp_done= _grp_dash= | |
| for _cpat in $_gp; do | |
| local _cls_inner="${_cpat#\[}"; _cls_inner="${_cls_inner%\]}" | |
| case "$_grp_done" in *"|$_cpat|"*) ;; *) | |
| _grp_done="$_grp_done|$_cpat|" | |
| # Strip \- (backslash-dash) FIRST, before trailing dash | |
| _strip_bsdash "$_cls_inner" | |
| case "$REPLY" in "$_cls_inner") ;; *) | |
| _grp_dash=1; _cls_inner="$REPLY";; esac | |
| # Strip leading/trailing dashes, track if any existed | |
| case "$_cls_inner" in | |
| -*) _grp_dash=1; _cls_inner="${_cls_inner#-}";; | |
| esac | |
| case "$_cls_inner" in | |
| *-) _grp_dash=1; _cls_inner="${_cls_inner%-}";; | |
| esac | |
| _grp_cls="$_grp_cls$_cls_inner";; | |
| esac | |
| done | |
| # Append a single trailing dash if any source pattern had one | |
| case "$_grp_dash" in 1) _grp_cls="$_grp_cls-";; esac | |
| # Escape " inside bracket expressions for ksh93 compat | |
| _esc_dq_only "$_grp_cls"; _grp_cls="$REPLY" | |
| _printr1 "" | |
| _printr1 " [$_grp_cls]*)" | |
| _printr1 " case \$STATE in" | |
| local _emitted_states= | |
| for _clentry in $_DA_CLASS; do | |
| local _clpat2="${_clentry%%=*}" | |
| # Only include entries from patterns in this cluster | |
| local _in_grp=0 | |
| for _cpat in $_gp; do | |
| case "$_clpat2" in "$_cpat") _in_grp=1; break;; esac | |
| done | |
| case $_in_grp in 0) continue;; esac | |
| local _clrest="${_clentry#*=}" | |
| local _clst="${_clrest%%=*}" | |
| local _clact="${_clrest#*=}" | |
| case "$_emitted_states" in *"|$_clst|"*) continue;; esac | |
| _emitted_states="$_emitted_states|$_clst|" | |
| _bnf_gen_emit_state_action "$_clst" "$_clact" | |
| done | |
| _bnf_gen_emit_wild_fallback "$_emitted_states" "|" "|" | |
| _gi=$((_gi + 1)) | |
| done | |
| ;; esac | |
| _bnf_gen_emit_eof | |
| } | |
| # Emit EOF handler, wildcard fallback, and parser loop closing. | |
| _bnf_gen_emit_eof () { | |
| local _geo_emitted _wentry _wst _wact | |
| _printr1 "" | |
| _printr1 " '')" | |
| _printr1 " case \$STATE in" | |
| _bnf_gen_emit_galt "" | |
| case "$_GPREC_BIN" in ?*) | |
| _printr1 " $_GPREC_BIN) ast_close; continue;;" | |
| _printr1 " $_GPREC_RULE_CODE) ast_close; continue;;" | |
| ;; esac | |
| _printr1 " $_rc) break;;" | |
| _printr1 " *) _pars_err_eof;;" | |
| _printr1 " esac;;" | |
| case "$_DA_WILD$_GALT" in ?*) | |
| _printr1 "" | |
| _printr1 " *)" | |
| _printr1 " case \$STATE in" | |
| _geo_emitted= | |
| for _wentry in $_DA_WILD; do | |
| _wst="${_wentry%%=*}" | |
| _wact="${_wentry#*=}" | |
| _geo_emitted="$_geo_emitted|$_wst|" | |
| _printn1 " $_wst)" | |
| _bnf_gen_emit_action "$_wact" | |
| done | |
| _bnf_gen_emit_galt "$_geo_emitted" | |
| _printr1 " *) _pars_err;;" | |
| _printr1 " esac;;" | |
| ;; esac | |
| _printr1 " esac" | |
| _printr1 " done" | |
| _printr1 "" | |
| _printr1 " ast_out" | |
| _printr1 "}" | |
| } | |
| # -- AST emitter generation -- | |
| # Close current slot without registering NT (for Bm boundaries). | |
| _bnf_gen_tq_close_slot () { | |
| eval "_tq_S$_tq_slotcnt=\"\$_tq_cur_slot\"" | |
| _tq_slots="$_tq_slots $_tq_slotcnt" | |
| _tq_slotcnt=$((_tq_slotcnt + 1)) | |
| _tq_cur_slot= | |
| } | |
| # Close current slot, register a non-terminal. $1=node ID to register. | |
| _bnf_gen_tq_close_nt () { | |
| _bnf_gen_tq_close_slot | |
| _tq_nts="$_tq_nts $1" | |
| _tq_ntcnt=$((_tq_ntcnt + 1)) | |
| } | |
| # Classify AST children into terminal slots and NTs for template emission. | |
| # Walks into Bq (sequence) wrappers. Bt → accumulate, other → close+register. | |
| _bnf_gen_tq_classify () { | |
| for _tqc in "$@"; do | |
| eval "_tqc_t=\"\${X$_tqc%% *}\"" | |
| case "$_tqc_t" in | |
| Bt) eval "_v=\"\${V$_tqc:-}\"" | |
| case "$_GKW_CASE" in lower) case "$_GKW_LIST" in *"$_v"*) | |
| _lcase_str "$_v"; _v=$REPLY;; esac;; esac | |
| _bnf_gen_tq_accum_term "$_v";; | |
| Bq) eval "set -- \$X$_tqc"; shift; _bnf_gen_tq_classify "$@";; | |
| *) _bnf_gen_tq_close_nt "$_tqc";; | |
| esac | |
| done | |
| } | |
| # Accumulate terminal value into current slot. $1=terminal text. | |
| _bnf_gen_tq_accum_term () { | |
| case "$_tq_cur_slot" in ?*) | |
| case "$1" in [a-zA-Z][a-zA-Z]*) _tq_cur_slot="$_tq_cur_slot $1";; *) _tq_cur_slot="$_tq_cur_slot$1";; esac;; | |
| *) _tq_cur_slot="$1";; esac | |
| } | |
| # Prepare slot string: eval slot variable and shell-escape for double-quote embedding. | |
| # $1=slot index. Result in REPLY. | |
| _bnf_gen_prep_slot () { eval "_s=\"\$_tq_S$1\""; _esc_dq "$_s"; } | |
| # Compute inter-NT slot separator: _bnf_gen_slot_str + _esc_dqin one step. | |
| # $1=slot index, $2=NT node id. Result in REPLY. | |
| _bnf_gen_prep_inter_slot () { _bnf_gen_slot_str "$1" "$2"; _esc_dq "$REPLY"; } | |
| # Helper: determine slot string between consecutive NTs | |
| # For empty slots (no terminal): check if next NT is negated-class accumulator | |
| # $1=slot index, $2=NT node id (next NT) | |
| _bnf_gen_slot_str () { | |
| local _sv _si _nt_node _ch_t _ntn _ntrc _ntac | |
| _sv=; _si=$1; _nt_node=$2 | |
| eval "_sv=\"\$_tq_S$_si\"" | |
| case "$_sv" in | |
| ?*) # Has terminal(s): format and return | |
| # Conditional padding: = gets spaces when _GCMT set, : always gets trailing space | |
| # Keyword-ending slots get trailing space (e.g., "DELETE FROM " before ident) | |
| case "$_sv" in | |
| '=') case "$_GCMT" in ?*) REPLY=" = "; return;; esac;; | |
| ':') REPLY=": "; return;; | |
| esac | |
| REPLY="$_sv";; | |
| *) # Empty slot between consecutive NTs | |
| # Check if next NT's rule is a negated-class accumulator | |
| eval "_ch_t=\"\${X$_nt_node%% *}\"" | |
| case "$_ch_t" in Bi) | |
| eval "_ntn=\"\${V$_nt_node:-}\"" | |
| eval "_ntrc=\"\${_RCODE_$_ntn:-}\"" | |
| eval "_ntac=\"\${_ACCUM_$_ntrc:-}\"" | |
| case "$_ntac" in B?:'^'*) REPLY=""; return;; esac;; | |
| esac | |
| REPLY=" ";; | |
| esac | |
| } | |
| # Build slot structure for a sequence rule's emitter. | |
| # $1=body_node (Bq AST node), $2=rcode (state code for prefix lookup). | |
| # Sets: _tq_slots, _tq_nts, _tq_ntcnt, _tq_has_bm, _tq_bm_pos, | |
| # _tq_pre_bm, _tq_post_bm, _tq_bm_sep, _tq_opt_start, | |
| # _tq_slotcnt, _tq_S<i> (slot terminal strings). | |
| _bnf_gen_tq_build () { | |
| local _tqb_body=$1 _tqb_rcode=$2 | |
| eval "set -- \$X$_tqb_body"; shift | |
| _tq_slots= _tq_nts= _tq_ntcnt=0 _tq_has_bm=0 _tq_bm_pos=0 | |
| _tq_cur_slot= _tq_slotcnt=0 | |
| _tq_pre_bm=0 _tq_post_bm=0 | |
| _tq_bm_sep= _tq_opt_start= | |
| for _mid in "$@"; do | |
| eval "_ch_t=\"\${X$_mid%% *}\"" | |
| case "$_ch_t" in | |
| Bt) eval "_v=\"\${V$_mid:-}\"" | |
| case "$_GKW_CASE" in lower) case "$_GKW_LIST" in *"$_v"*) | |
| _lcase_str "$_v"; _v=$REPLY;; esac;; esac | |
| _bnf_gen_tq_accum_term "$_v";; | |
| Bm) ;; # handled below | |
| Bo|Ba) # Check if wraps a Bm (e.g., [ member { ',' member } ]) | |
| _tq_found_bm= | |
| eval "set -- \$X$_mid"; shift | |
| for _inner in "$@"; do | |
| eval "_it=\"\${X$_inner%% *}\"" | |
| case "$_it" in | |
| Bm) _tq_found_bm=$_inner; break;; | |
| Bq) eval "set -- \$X$_inner"; shift | |
| for _iq in "$@"; do | |
| eval "_iqt=\"\${X$_iq%% *}\"" | |
| case "$_iqt" in Bm) _tq_found_bm=$_iq; break 2;; esac | |
| done;; | |
| esac | |
| done | |
| eval "set -- \$X$_tqb_body"; shift # restore | |
| case "$_tq_found_bm" in | |
| ?*) _mid=$_tq_found_bm; _ch_t=Bm;; | |
| *) # Optional: walk children as normal slots/NTs | |
| local _pre_opt_ntcnt=$_tq_ntcnt | |
| eval "set -- \$X$_mid"; shift | |
| _bnf_gen_tq_classify "$@" | |
| case "$_tq_opt_start" in '') case $((_tq_ntcnt - _pre_opt_ntcnt)) in | |
| 0) ;; *) _tq_opt_start=$_pre_opt_ntcnt;; esac;; esac | |
| eval "set -- \$X$_tqb_body"; shift # restore | |
| continue;; | |
| esac;; | |
| esac | |
| # Process Bm (direct or found inside Bo/Ba) | |
| case "$_ch_t" in Bm) | |
| _bnf_gen_tq_close_slot | |
| _tq_has_bm=1; _tq_bm_pos=$_tq_ntcnt | |
| _tq_pre_bm=$_tq_ntcnt | |
| # Extract Bm separator (if inner is seq starting with terminal) | |
| eval "set -- \$X$_mid"; shift | |
| _tq_bm_inner=$1 | |
| eval "_ch_t=\"\${X$_tq_bm_inner%% *}\"" | |
| case "$_ch_t" in Bq) | |
| eval "set -- \$X$_tq_bm_inner"; shift | |
| eval "_ch_t=\"\${X$1%% *}\"" | |
| case "$_ch_t" in Bt) eval "_tq_bm_sep=\"\${V$1:-}\"";; esac;; | |
| esac | |
| eval "set -- \$X$_tqb_body"; shift # restore | |
| continue;; | |
| esac | |
| # Non-terminal (Bi, etc.) that isn't a Bm wrapper | |
| case "$_ch_t" in Bt) ;; *) _bnf_gen_tq_close_nt "$_mid";; esac | |
| done | |
| # Final slot (trailing terminals after last NT) | |
| eval "_tq_S$_tq_slotcnt=\"\$_tq_cur_slot\"" | |
| case $_tq_has_bm in 1) _tq_post_bm=$((_tq_ntcnt - _tq_pre_bm));; esac | |
| # Normalize keyword slots: add trailing space (so "SELECT"+ident → "SELECT a") | |
| # and leading space for inter-NT slots (so ident+"FROM"+ident → "a FROM t") | |
| _si=0 | |
| while test $_si -lt $_tq_slotcnt; do | |
| eval "_sv=\"\${_tq_S$_si:-}\"" | |
| case "$_sv" in *[a-zA-Z][a-zA-Z]) eval "_tq_S$_si=\"\$_sv \"";; esac | |
| case $_si in 0) ;; *) | |
| eval "_sv=\"\${_tq_S$_si:-}\"" | |
| case "$_sv" in [a-zA-Z]*) eval "_tq_S$_si=\" \$_sv\"";; esac;; | |
| esac | |
| _si=$((_si + 1)) | |
| done | |
| # Prepend inherited prefix to slot 0 | |
| eval "_pfx=\"\${_EMIT_PREFIX_$_tqb_rcode:-}\"" | |
| eval "_tq_S0=\"\$_pfx\$_tq_S0\"" | |
| } | |
| # Try to propagate terminal prefix from rule $_r to its collapsed target. | |
| # Sets _pfx_changed=1 if any prefix was updated (visible to caller). | |
| _bnf_gen_propagate_prefix_for_rule () { | |
| eval "_rcode=\$_RCODE_$_r; _node=\$_RNODE_$_r" | |
| eval "set -- \$X$_node"; shift; _body_node=$1 | |
| eval "_bt=\"\${X$_body_node%% *}\"" | |
| case "$_bt" in Bq) ;; *) return;; esac | |
| eval "set -- \$X$_body_node"; shift | |
| # Collect leading terminals + single trailing NT ref (no trailing elements) | |
| local _lead= _rest_ref= _has_trail= | |
| for _ch in "$@"; do | |
| eval "_ch_t=\"\${X$_ch%% *}\"" | |
| case "$_ch_t" in | |
| Bt) case "$_rest_ref" in | |
| '') eval "_lead=\"\$_lead\${V$_ch:-}\"";; | |
| *) _has_trail=1;; | |
| esac;; | |
| Bi) case "$_rest_ref" in '') _rest_ref="$_ch";; *) _has_trail=1; break;; esac;; | |
| *) case "$_rest_ref" in '') ;; *) _has_trail=1;; esac; break;; | |
| esac | |
| done | |
| case "$_has_trail" in 1) return;; esac | |
| case "$_lead" in '') return;; esac | |
| case "$_rest_ref" in '') return;; esac | |
| # Skip rules with no-collapse done states | |
| eval "_done_code=\"\${_RCODE__done_$_rcode:-}\"" | |
| case "$_done_code" in ?*) | |
| case "$_GNOCOL" in *" $_done_code"*) return;; esac;; esac | |
| eval "_ref_name=\"\${V$_rest_ref:-}\"" | |
| eval "_ref_code=\$_RCODE_$_ref_name" | |
| eval "_ref_node=\$_RNODE_$_ref_name" | |
| eval "set -- \$X$_ref_node"; shift; local _ref_body=$1 | |
| eval "_rbt=\"\${X$_ref_body%% *}\"" | |
| # Check own prefix — skip if cycling | |
| eval "_own_pfx=\"\${_EMIT_PREFIX_$_rcode:-}\"" | |
| case "$_own_pfx" in *"$_lead"*) return;; esac | |
| _total_pfx="$_own_pfx$_lead" | |
| # If target is alternation: propagate to each alternative | |
| case "$_rbt" in Ba) | |
| eval "set -- \$X$_ref_body"; shift | |
| for _alt in "$@"; do | |
| eval "_alt_t=\"\${X$_alt%% *}\"" | |
| case "$_alt_t" in Bi) | |
| eval "_alt_name=\"\${V$_alt:-}\"" | |
| eval "_alt_code=\$_RCODE_$_alt_name" | |
| case "$_alt_code" in "$_rcode") continue;; esac | |
| eval "_cur=\"\${_EMIT_PREFIX_$_alt_code:-}\"" | |
| case "$_cur" in "$_total_pfx"*) ;; *) | |
| eval "_EMIT_PREFIX_$_alt_code=\"\$_total_pfx\"" | |
| _pfx_changed=1;; | |
| esac;; | |
| esac | |
| done;; | |
| Bi) # Single ref: propagate directly | |
| eval "_cur=\"\${_EMIT_PREFIX_$_ref_code:-}\"" | |
| case "$_cur" in "$_total_pfx"*) ;; *) | |
| eval "_EMIT_PREFIX_$_ref_code=\"\$_total_pfx\"" | |
| _pfx_changed=1;; | |
| esac;; | |
| esac | |
| } | |
| # Emit sequence template-based emitter case for a Bq rule body. | |
| # Reads _rcode, _body_node from caller scope. Uses _tq_* from _bnf_gen_tq_build. | |
| _bnf_gen_emit_seq () { | |
| _bnf_gen_tq_build "$_body_node" "$_rcode" | |
| case $_tq_has_bm in | |
| 0) # Fixed-arity sequence: positional template emit | |
| case $_tq_ntcnt in | |
| 0) # All terminals (e.g., cdata_kw = 'C' 'D' 'A' 'T' 'A') | |
| eval "_s=\"\$_tq_S0\"" | |
| _esc_dq "$_s"; _printr1 " $_rcode) REPLY=\"$REPLY\";;";; | |
| 1) # Single NT: check if accumulator | |
| set -- $_tq_nts; _tq_nt1=$1 | |
| eval "_ch_t=\"\${X$_tq_nt1%% *}\"" | |
| _tq_is_accum=0 | |
| case "$_ch_t" in Bc|Bp|Bs|Bk) _tq_is_accum=1;; esac | |
| _bnf_gen_prep_slot 0; _e0="$REPLY" | |
| _bnf_gen_prep_slot 1; _e1="$REPLY" | |
| case $_tq_is_accum in | |
| 1) _printr1 " $_rcode) REPLY=\"${_e0}\$_v${_e1}\";;";; | |
| *) case "$_tq_opt_start" in 0) | |
| _printr1 " $_rcode) case \$# in 0) REPLY=\"${_e0}${_e1}\";; *) _${_GN}_unast_emit \"\$1\"; REPLY=\"${_e0}\${REPLY}${_e1}\";; esac;;";; | |
| *) _printr1 " $_rcode) _${_GN}_unast_emit \"\$1\"; REPLY=\"${_e0}\${REPLY}${_e1}\";;";; | |
| esac;; | |
| esac;; | |
| *) # N>1 NTs: sequential positional emit | |
| set -- $_tq_nts | |
| _bnf_gen_prep_slot 0; _e0="$REPLY" | |
| case "$_tq_opt_start" in | |
| '') # All NTs required: pure positional | |
| _printr1 " $_rcode) _${_GN}_unast_emit \"\$1\"; _r=\"${_e0}\$REPLY\"" | |
| _si=1; shift | |
| while test $# -gt 0; do | |
| _bnf_gen_prep_inter_slot $_si "$1"; _se="$REPLY" | |
| case $# in | |
| 1) _bnf_gen_prep_slot $((_si + 1)); _ef="$REPLY" | |
| _printr1 " _${_GN}_unast_emit \"\$$((_si + 1))\"; REPLY=\"\${_r}${_se}\${REPLY}${_ef}\";;";; | |
| *) _printr1 " _${_GN}_unast_emit \"\$$((_si + 1))\"; _r=\"\${_r}${_se}\$REPLY\"";; | |
| esac | |
| _si=$((_si + 1)); shift | |
| done;; | |
| *) # Has optional NTs from index _tq_opt_start | |
| case "$_tq_opt_start" in | |
| 0) # ALL NTs are optional — S0 already in _r | |
| _printr1 " $_rcode) _r=\"$_e0\"" | |
| _printr1 " case \$# in 0) ;; *) _${_GN}_unast_emit \"\$1\"; _r=\"\${_r}\$REPLY\"; shift;; esac" | |
| _si=1;; | |
| *) # Some required, then optional | |
| _printr1 " $_rcode) _${_GN}_unast_emit \"\$1\"; _r=\"${_e0}\$REPLY\"" | |
| _si=1; shift | |
| while test $_si -lt $_tq_opt_start; do | |
| _bnf_gen_prep_inter_slot $_si "$1"; _se="$REPLY" | |
| _printr1 " _${_GN}_unast_emit \"\$$((_si + 1))\"; _r=\"\${_r}${_se}\$REPLY\"" | |
| _si=$((_si + 1)); shift | |
| done | |
| _printr1 " shift $_tq_opt_start";; | |
| esac | |
| _printr1 " _si=$_si" | |
| while test $# -gt 0; do | |
| _bnf_gen_prep_inter_slot $_si "$1"; _se="$REPLY" | |
| _printr1 " case \$# in 0) ;; *) _${_GN}_unast_emit \"\$1\"; _r=\"\${_r}${_se}\$REPLY\"; shift;; esac" | |
| _si=$((_si + 1)); shift | |
| done | |
| _bnf_gen_prep_slot $_tq_slotcnt; _ef="$REPLY" | |
| _printr1 " REPLY=\"\${_r}${_ef}\";;";; | |
| esac;; | |
| esac;; | |
| 1) # Variable-arity: has embedded Bm (shift-based emit) | |
| _bnf_gen_prep_slot 0; _e0="$REPLY" | |
| _bnf_gen_prep_slot $_tq_slotcnt; _final_slot="$REPLY" | |
| _bm_sfmt=" " | |
| case "${_tq_bm_sep:-}" in ?*) | |
| case "$_tq_bm_sep" in | |
| '=') case "$_GCMT" in ?*) _bm_sfmt=" $_tq_bm_sep ";; *) _bm_sfmt="$_tq_bm_sep";; esac;; | |
| ':') _bm_sfmt="$_tq_bm_sep ";; | |
| *) _bm_sfmt="$_tq_bm_sep";; | |
| esac;; | |
| esac | |
| _esc_dq "$_bm_sfmt"; _bm_sfmt="$REPLY" | |
| case $_tq_post_bm in | |
| 0) _printr1 " $_rcode) _r=\"$_e0\"" | |
| _printr1 " for _ch in \"\$@\"; do" | |
| _printr1 " case \"\$_r\" in \"$_e0\") ;; *) _r=\"\$_r$_bm_sfmt\";; esac" | |
| _printr1 " _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\"" | |
| _printr1 " done; REPLY=\"\$_r$_final_slot\";;";; | |
| *) set -- $_tq_nts | |
| case $_tq_pre_bm in | |
| 0) _printr1 " $_rcode) _r=\"$_e0\"";; | |
| *) _printr1 " $_rcode) _${_GN}_unast_emit \"\$1\"; _r=\"${_e0}\$REPLY\"; shift" | |
| _si=1; shift | |
| while test $_si -lt $_tq_pre_bm; do | |
| _bnf_gen_prep_inter_slot $_si "$1"; _se="$REPLY" | |
| _printr1 " _${_GN}_unast_emit \"\$1\"; _r=\"\${_r}${_se}\$REPLY\"; shift" | |
| _si=$((_si + 1)); shift | |
| done;; | |
| esac | |
| _printr1 " while test \$# -gt $_tq_post_bm; do" | |
| _printr1 " case \"\$_r\" in \"$_e0\") ;; *) _r=\"\$_r$_bm_sfmt\";; esac" | |
| _printr1 " _${_GN}_unast_emit \"\$1\"; _r=\"\$_r\$REPLY\"; shift" | |
| _printr1 " done" | |
| # Emit intermediate slot between Bm items and post-Bm NTs | |
| _bnf_gen_prep_slot $((_tq_pre_bm + 1)); _se="$REPLY" | |
| case "$_se" in ?*) | |
| _printr1 " _r=\"\$_r$_se\"";; esac | |
| _printr1 " _${_GN}_unast_emit \"\$1\"; REPLY=\"\$_r\${REPLY}$_final_slot\";;";; | |
| esac;; | |
| esac | |
| } | |
| _bnf_gen_emit_ast () { | |
| # --- Compute emitter prefix chains for collapsed intermediates --- | |
| # Propagate terminal prefixes from sequences (e.g., '(' value ')') down to | |
| # the collapsed child rule, so the emitter reconstructs leading terminals. | |
| local _pfx_changed=1 _pfx_pass=0 | |
| local _r _rcode _node _body_node _bt _is_str _si _str_close _str_rule _mid _ch_t | |
| local _ba_tval _ba_tcnt _ba_esc _bm_sep _bm_inner _bm_inner_t _bm_first_t | |
| local _tq_nt1 _e0 _e1 _tq_is_accum _se _ef _final_slot _bm_sfmt _s | |
| local _pi _pst _po _pc | |
| while test $_pfx_changed -eq 1; do | |
| _pfx_changed=0; _pfx_pass=$((_pfx_pass + 1)) | |
| for _r in $_GR; do _bnf_gen_propagate_prefix_for_rule; done | |
| case $_pfx_pass in 20) break;; esac | |
| done | |
| # --- Emitter function --- | |
| _printr1 "" | |
| _printr1 "# --- Emitter (AST to source reconstruction) ---" | |
| _printr1 "" | |
| _printr1 "_${_GN}_unast_emit () {" | |
| _printr1 " local _n=\$1 _t _v _r _ch" | |
| _printr1 " IFS=' '; eval \"set -- \\\$X\$_n\"; IFS=''" | |
| _printr1 " _t=\$1; shift" | |
| _printr1 " eval \"_v=\\\"\\\${V\$_n:-}\\\"\"" | |
| _printr1 "" | |
| _printr1 " case \"\$_t\" in" | |
| # Document root: emit all children (newline-join for line-oriented grammars) | |
| _bnf_gen_emit_child_loop "$_GDC" "" | |
| # Generate emitter case for each rule based on its grammar structure | |
| for _r in $_GR; do | |
| # Skip extern rules — emitter provided externally | |
| case "$_GEXTERN" in *" $_r"*) continue;; esac | |
| eval "_rcode=\$_RCODE_$_r; _node=\$_RNODE_$_r" | |
| eval "set -- \$X$_node"; shift | |
| _body_node=$1 | |
| eval "_bt=\"\${X$_body_node%% *}\"" | |
| # String accumulator: emit value wrapped in configured delimiter | |
| _is_str= _si=1 | |
| while test $_si -le $_GSTR_N; do | |
| eval "_str_rule=\"\$_GSTR_RULE_$_si\"" | |
| case "$_r" in "$_str_rule") _is_str=$_si; break;; esac | |
| _si=$((_si + 1)) | |
| done | |
| case "$_is_str" in ?*) | |
| eval "_str_close=\"\$_GSTR_CLOSE_$_is_str\"" | |
| case "$_str_close" in | |
| '"') _printr1 " $_rcode) REPLY=\"\\\"\$_v\\\"\";;";; | |
| "'") _printr1 " $_rcode) REPLY=\"'\$_v'\";;";; | |
| *) _printr1 " $_rcode) REPLY=\"$_str_close\$_v$_str_close\";;";; | |
| esac | |
| continue;; esac | |
| # Number accumulator: emit raw value | |
| case "$_r" in "$_GNUM") | |
| _printr1 " $_rcode) REPLY=\"\$_v\";;" | |
| continue;; esac | |
| case "$_bt" in | |
| Bc|Bp|Bs|Bk) # Character class accumulator: emit raw value | |
| _printr1 " $_rcode) REPLY=\"\$_v\";;" | |
| continue;; | |
| Bi|Bo) # Single-ref, optional: delegate to child, or emit V for keywords | |
| _printr1 " $_rcode) case \$# in 0) REPLY=\"\$_v\";; *) _${_GN}_unast_emit \"\$1\";; esac;;";; | |
| Ba) # Alternation: scan for a terminal-only branch as fallback | |
| _ba_tval= _ba_tcnt=0 | |
| eval "set -- \$X$_body_node"; shift | |
| for _mid in "$@"; do | |
| eval "_ch_t=\"\${X$_mid%% *}\"" | |
| case "$_ch_t" in Bt) | |
| eval "_ba_tval=\"\${V$_mid:-}\"" | |
| _ba_tcnt=$((_ba_tcnt + 1));; | |
| esac | |
| done | |
| case "$_ba_tcnt" in 1) | |
| # Single terminal branch: fallback to its value when $_v is empty | |
| _esc_dq "$_ba_tval"; _ba_esc="$REPLY" | |
| _printr1 " $_rcode) case \$# in 0) case \"\$_v\" in ?*) REPLY=\"\$_v\";; *) REPLY=\"$_ba_esc\";; esac;; 1) _${_GN}_unast_emit \"\$1\";; *) _r=; for _ch in \"\$@\"; do _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\"; done; REPLY=\"\$_r\";; esac;;";; | |
| *) _printr1 " $_rcode) case \$# in 0) REPLY=\"\$_v\";; 1) _${_GN}_unast_emit \"\$1\";; *) _r=; for _ch in \"\$@\"; do _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\"; done; REPLY=\"\$_r\";; esac;;";; | |
| esac;; | |
| Bm) # Repetition: detect separator from inner grammar and join children | |
| # The Bm body contains the repetition content. | |
| # If it's a sequence starting with a terminal (e.g., { ',' item }), | |
| # that terminal is the separator for joining children. | |
| _bm_sep= | |
| eval "set -- \$X$_body_node"; shift # get Bm's children | |
| _bm_inner=$1 | |
| eval "_bm_inner_t=\"\${X$_bm_inner%% *}\"" | |
| case "$_bm_inner_t" in Bq) | |
| # Inner is a sequence — check if first element is a terminal | |
| eval "set -- \$X$_bm_inner"; shift | |
| eval "_bm_first_t=\"\${X$1%% *}\"" | |
| case "$_bm_first_t" in Bt) | |
| eval "_bm_sep=\"\${V$1:-}\"";; | |
| esac;; | |
| esac | |
| _bnf_gen_emit_child_loop "$_rcode" "$_bm_sep";; | |
| Bq) _bnf_gen_emit_seq;; | |
| Bt) _printr1 " $_rcode) REPLY=\"\$_v\";;";; | |
| esac | |
| done | |
| # Binary operator emitter for precedence climbing | |
| case "$_GPREC_BIN" in ?*) | |
| _printr1 " $_GPREC_BIN) _${_GN}_unast_emit \"\$1\"; _r=\"\$REPLY\"" | |
| _printr1 " _${_GN}_unast_emit \"\$2\"; REPLY=\"\$_r\$_v\$REPLY\";;" | |
| ;; esac | |
| # Postfix operator emitters | |
| _pi=1 | |
| while test $_pi -le $_GPOST_N; do | |
| eval "_pst=\"\$_GPOST_STATE_$_pi\"" | |
| eval "_po=\"\$_GPOST_OPEN_$_pi\"" | |
| eval "_pc=\"\${_GPOST_CLOSE_$_pi:-}\"" | |
| case "$_pc" in ?*) | |
| # Bracket postfix: child1 is LHS, rest are args — emit LHS<open>args<close> | |
| _printr1 " $_pst) _${_GN}_unast_emit \"\$1\"; _r=\"\$REPLY$_po\"; shift" | |
| _printr1 " local _sep=" | |
| _printr1 " for _ch in \"\$@\"; do" | |
| _printr1 " case \"\$_sep\" in ?*) _r=\"\$_r, \";; esac" | |
| _printr1 " _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\"; _sep=1" | |
| _printr1 " done; REPLY=\"\$_r$_pc\";;";; | |
| *) | |
| eval "_pi2=\"\${_GPOST_INNER_$_pi:-}\"" | |
| case "$_pi2" in | |
| '') # Close-less postfix with no inner (e.g. ++): child1 is LHS — emit LHS<op> | |
| _printr1 " $_pst) _${_GN}_unast_emit \"\$1\"; REPLY=\"\$REPLY$_po\";;";; | |
| *) # Simple postfix: child1 is LHS, child2 is inner — emit LHS<open>inner | |
| _printr1 " $_pst) _${_GN}_unast_emit \"\$1\"; _r=\"\$REPLY$_po\"" | |
| _printr1 " _${_GN}_unast_emit \"\$2\"; REPLY=\"\$_r\$REPLY\";;";; | |
| esac;; | |
| esac | |
| _pi=$((_pi + 1)) | |
| done | |
| # Ternary: child1 is condition, child2 is true branch, child3 is false branch | |
| case "$_GTERNARY" in 1) | |
| _printr1 " $_GTERNARY_STATE) _${_GN}_unast_emit \"\$1\"; _r=\"\$REPLY$_GTERNARY_OPEN\"" | |
| _printr1 " _${_GN}_unast_emit \"\$2\"; _r=\"\$_r\$REPLY$_GTERNARY_SEP\"" | |
| _printr1 " _${_GN}_unast_emit \"\$3\"; REPLY=\"\$_r\$REPLY\";;";; | |
| esac | |
| _printr1 " *) REPLY=\"??\${_t}??\";;" | |
| _printr1 " esac" | |
| _printr1 "}" | |
| _printr1 "" | |
| _printr1 "_${_GN}_unast_emit_root () { _${_GN}_unast_emit \"\$@\"; }" | |
| _printr1 "" | |
| _printr1 "${_GN}_unast () {" | |
| _printr1 " _readall; eval \"\$REPLY\"" | |
| _printr1 " _${_GN}_unast_emit_root 0" | |
| _printr1 " _printr1 \"\$REPLY\"" | |
| _printr1 "}" | |
| } | |
| # ============================================================ | |
| # Entry Point | |
| # ============================================================ | |
| gen_bnf () { | |
| local _outdir="${1:-}" | |
| _readall; eval "$REPLY" | |
| IFS=' ' | |
| # Phase 1 scalar outputs | |
| local _GN _GP _GW _GR _G1 _GDC _GST _GSN | |
| local _GNUM _GVALNUM _GSTRICT _GCMT _GCMT_END _GEXTERN | |
| local _GKW_RULE _GKW_LIST _GSTR_SKIP_LIST | |
| local _GPREC_RULE _GPREC_N _GPREC_RULE_CODE _GPREC_BIN | |
| local _GPOST_N _GUNARY_N _GSTR_N | |
| local _GTERNARY _GTERNARY_OPEN _GTERNARY_SEP _GTERNARY_PREC | |
| local _GTERNARY_STATE _GTERNARY_COLON _GTERNARY_DONE | |
| # Phase 2 scalar outputs | |
| local _DA_CODES _DA_CLASS _DA_WILD _DA_INHERIT | |
| local _GALT _GDONE _GNOCOL _GNUM_CODE _GLOB_VARS | |
| # Emit-phase shared | |
| local _rc _tok | |
| _bnf_gen_p1 | |
| _bnf_gen_p2 | |
| _rc="$_GDC"; _tok="${_GST# }" | |
| # Emit parser module | |
| { _bnf_gen_emit; _bnf_gen_emit_dispatch; _bnf_gen_emit_dispatch2; } > "$_outdir/parser.sh" | |
| # Emit unast module | |
| { _printr1 "use ${_GN}_parser" | |
| _printr1 "use io_readall" | |
| _printr1 "" | |
| _bnf_gen_emit_ast | |
| } > "$_outdir/unast.sh" | |
| # Emit reast module | |
| { _printr1 "use ${_GN}_parser" | |
| _printr1 "use ${_GN}_unast" | |
| _printr1 "" | |
| _printr1 "${_GN}_reast () { ${_GN}_parser | ${_GN}_unast; }" | |
| } > "$_outdir/reast.sh" | |
| } | |
| bnf_gen () { gen_bnf "$@"; } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment