Skip to content

Instantly share code, notes, and snippets.

@alganet

alganet/bnf.sh Secret

Created April 3, 2026 06:55
Show Gist options
  • Select an option

  • Save alganet/4dfd501a3377a60f7825901114d65c77 to your computer and use it in GitHub Desktop.

Select an option

Save alganet/4dfd501a3377a60f7825901114d65c77 to your computer and use it in GitHub Desktop.
# ISC License
# Copyright (c) 2026 Alexandre Gomes Gaigalas <alganet@gmail.com>
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
use ast_core
use bnf_parser
use str_core
use str_esc
use io_readall
use ds_list
use codegen_core
# ============================================================
# Global Variables
# ============================================================
# Phase 1 outputs (set by _bnf_gen_p1):
# _GN grammar name _GP state code prefix
# _GW whitespace mode _GR rule names (space-sep)
# _G1 root rule name _GDC document root state code
# _GST all state codes (space-sep) _GSN state code counter
# _GNUM number rule name _GVALNUM number validation flag
# _GSTRICT strict mode flag _GCMT comment start char
# _GEXTERN extern rule names _GKW_RULE keyword ident rule
# _GKW_LIST keyword list (uppercase)
# _GPREC_RULE precedence rule _GPREC_N operator count
# _GPREC_RULE_CODE prec rule state code _GPREC_BIN binary op state code
# _GPOST_N postfix op count _GUNARY_N unary op count
# _GSTR_N string accumulator count
# _GTERNARY ternary enabled flag
# _GTERNARY_OPEN/SEP/PREC/STATE/COLON/DONE ternary config
#
# Phase 1 dynamic arrays (eval-indexed by rule name or number):
# _RCODE_<rule> state code per rule
# _RNODE_<rule> AST node ID per rule
# _FIRST_<rule> FIRST set chars per rule
# _GPREC_O/P/A_<i> operator/prec/assoc (i=1.._GPREC_N)
# _GUNARY_O/P_<i> unary op/prec (i=1.._GUNARY_N)
# _GPOST_OPEN/INNER/CLOSE/STATE/CONT_<i> postfix config (i=1.._GPOST_N)
# _GSTR_RULE/CLOSE/ESC/CODE_<i> string config (i=1.._GSTR_N)
# _STR_GLOBVAR_<i> string glob var (i=1.._GSTR_N)
#
# Phase 2 outputs (set by _bnf_gen_p2):
# _DA_CODES char codes for dispatch iteration
# _DA_<code> "state=action" entries per char code
# _DA_RAW_<code> raw char per code
# _DA_CLASS "pattern=state=action" for char classes
# _DA_WILD "state=action" for wildcard dispatch
# _DA_INHERIT "target=source" for state inheritance
# _GALT auto-close states (pipe-sep)
# _GDONE done states (space-sep)
# _GNOCOL no-collapse done states
# _ACCUM_<code> accumulator type:value
# _CLOSESEQ_<code> close sequences
# _EXP_<code> expected-token strings
# _GLOB_VARS glob var names
# _GLOB_PAT_<code> glob patterns
# _GNUM_CODE number rule state code
#
# Emit-phase shared (set in gen_bnf):
# _rc document root state code
# _tok token list for ast_tokens
#
# Cross-function return values:
# REPLY standard return convention
# _done_state set by _bnf_gen_alloc_done
# _tq_* template queue (set by _bnf_gen_tq_build, read by _bnf_gen_emit_seq)
# Resolve a state code to its rule name. Sets REPLY (empty if not found).
_bnf_gen_code_to_name () {
REPLY=
for _r in $_GR; do
eval "case \"\$_RCODE_$_r\" in \"\$1\") REPLY=\$_r; return;; esac"
done
}
# Check if $1 is a multi-char keyword in the #!keywords list. Sets REPLY=1 or empty.
_bnf_gen_is_kw () {
case ${#1} in 1) REPLY=;; *)
case "$_GKW_LIST" in *"$1"*) REPLY=1;; *) REPLY=;; esac;;
esac
}
# ============================================================
# Phase 1: Grammar Extraction & State Allocation
# ============================================================
_bnf_gen_code () {
local _name="$1" _code
local _p; case ${#_GP} in 1) _p="$_GP";; *) _p="${_GP%?}";; esac
_GSN=$((_GSN + 1))
case $((_GSN > 26)) in
1) _code="${_p}$((_GSN - 26))";;
*) set -- a b c d e f g h i j k l m n o p q r s t u v w x y z
eval "_code=\"\$_p\${$_GSN}\"";;
esac
eval "_RCODE_$_name=\$_code"
_GST="$_GST $_code"
}
# (Unary state codes allocated on demand if #!unary directives used)
# ============================================================
# Phase 1c: Compute FIRST sets (fixpoint)
# ============================================================
# Iterative FIRST-set computation (uses _gf_ prefix to avoid variable
# conflicts with callers on ksh93 where local doesn't scope in POSIX fns)
_bnf_gen_first_node () {
local _gf_n=$1 _gf_t _gf_v _gf_r _gf_wk
_gf_wk="$_gf_n" _gf_r=
while :; do
case "$_gf_wk" in '') break;; esac
_gf_n="${_gf_wk%% *}"; case "$_gf_wk" in *' '*) _gf_wk="${_gf_wk#* }";; *) _gf_wk=;; esac
eval "_gf_t=\"\${X$_gf_n%% *}\"; _gf_v=\"\${V$_gf_n:-}\""
case "$_gf_t" in
Bt) _gf_r="$_gf_r${_gf_v%"${_gf_v#?}"}";;
Bi) eval "_gf_r=\"\$_gf_r\${_FIRST_$_gf_v:-}\"";;
Bc) case "$_gf_v" in '^'*) _gf_r="${_gf_r}W";; *) _gf_r="${_gf_r}[${_gf_v}]";; esac;;
Ba) eval "set -- \$X$_gf_n"; shift
case "$_gf_wk" in '') _gf_wk="$*";; *) _gf_wk="$* $_gf_wk";; esac;;
Bq) eval "set -- \$X$_gf_n"; shift
# Add children through first non-nullable to worklist
# (if first child is Bo/Bm, FIRST includes next child too)
local _gf_bq=
for _gf_ch in "$@"; do
eval "_gf_ct=\"\${X$_gf_ch%% *}\""
case "$_gf_bq" in '') _gf_bq="$_gf_ch";; *) _gf_bq="$_gf_ch $_gf_bq";; esac
case "$_gf_ct" in Bo|Bm|Be) ;; *) break;; esac
done
case "$_gf_wk" in '') _gf_wk="$_gf_bq";; *) _gf_wk="$_gf_bq $_gf_wk";; esac;;
Bo|Bm|Bs|Bk|Bp|Be) eval "set -- \$X$_gf_n"; shift
case "$_gf_wk" in '') _gf_wk="$1";; *) _gf_wk="$1 $_gf_wk";; esac;;
esac
done
REPLY="$_gf_r"
}
_bnf_gen_p1 () {
local _ch _t _v _r _rn _str_rule _str_close _si _pi
local _pop _pprec _passoc _uop _uprec _uname _post_open _post_inner
local _nid _xt _max_nid _name _node _syn_id _prec_node _body_t _atom_rule
local _atom_node _atom_body _atom_body_t _bt_id _bi_id _bq_id _br_id _ba_id _ref_id
local _code _old _changed _pass _p
# ============================================================
# Phase 1: Extract grammar info from AST
# ============================================================
# Extracts directive values and rule list from the parsed BNF.
# _GN grammar name (#!name)
# _GP state code prefix (#!prefix)
# _GW whitespace mode (#!whitespace skip|token|line)
# _GR space-separated rule names
# _G1 first (root) rule name
# _GNUM number accumulator rule (#!number)
# _GVALNUM 1 if number validation (#!validate)
# _GSTRICT 1 if strict mode (#!strict — trailing comma / colon)
# _GCMT line-comment start char (#!comment)
# _GPREC_RULE precedence climbing rule (#!precedence <rule> <ops...>)
# _GPREC_N number of operators
# _GKW_RULE identifier rule for keyword checking (#!keywords <ident_rule> <kw...>)
# _GKW_LIST space-separated keyword list (uppercase)
# _GPOST_N number of postfix operators (#!postfix <rule> <open> [<inner> [<close>]])
# _GEXTERN space-separated extern rules (#!extern <rule>)
# _GSTR_N number of string accumulators (#!string <rule> [<close> [escape]])
_GN=generated _GP=Gn _GW=token _GR= _G1= _GNUM= _GVALNUM= _GSTRICT= _GCMT= _GCMT_END= _GEXTERN=
_GPREC_RULE= _GPREC_N=0 _GPREC_RULE_CODE= _GPREC_BIN=
_GKW_RULE= _GKW_LIST= _GKW_CASE=
_GPOST_N=0 # postfix operator count
_GUNARY_N=0 # unary prefix operator count
_GTERNARY= _GTERNARY_OPEN= _GTERNARY_SEP= _GTERNARY_PREC=
_GSTR_N=0 # string accumulator count
eval "set -- \$X0"; shift
for _ch in "$@"; do
eval "_t=\"\${X$_ch%% *}\"; _v=\"\${V$_ch:-}\""
case "$_t" in
Bd) case "$_v" in
name*) _GN="${_v#name }";;
prefix*) _GP="${_v#prefix }";;
whitespace*) _GW="${_v#whitespace }";;
string*) # #!string <rule> [<close> [escape]]
_v="${_v#string }"
_GSTR_N=$((_GSTR_N + 1))
_str_rule="${_v%% *}"; _v="${_v#"$_str_rule"}"; _v="${_v# }"
eval "_GSTR_RULE_$_GSTR_N=\"\$_str_rule\""
case "$_v" in
'') # No args: backward compat (close=", escape=\, JSON mode)
eval "_GSTR_CLOSE_$_GSTR_N='\"'"
eval "_GSTR_ESC_$_GSTR_N=json";;
*) _str_close="${_v%% *}"; _v="${_v#"$_str_close"}"; _v="${_v# }"
eval "_GSTR_CLOSE_$_GSTR_N=\"\$_str_close\""
case "$_v" in
'') eval "_GSTR_ESC_$_GSTR_N=";;
*) eval "_GSTR_ESC_$_GSTR_N=simple";;
esac;;
esac
;;
number*) _GNUM="${_v#number }";; # number accumulator rule name
validate*) _GVALNUM=1;; # enable number validation
strict*) _GSTRICT=1;; # trailing comma + colon enforcement
comment*) _v="${_v#comment }"
_GCMT="${_v%% *}"
case "$_v" in *" "*) _GCMT_END="${_v#* }";; *) _GCMT_END=;; esac;;
extern*) _GEXTERN="$_GEXTERN ${_v#extern }";; # externally-implemented rules
keywords_case*) _GKW_CASE="${_v#keywords_case }";;
keywords*) # #!keywords <ident_rule> <kw1> <kw2> ...
_v="${_v#keywords }"
_GKW_RULE="${_v%% *}"; _v="${_v#"$_GKW_RULE" }"
_GKW_LIST="$_v";;
precedence*) # #!precedence <rule> <op> <prec> <assoc> [<op> <prec> <assoc> ...]
_v="${_v#precedence }"
_GPREC_RULE="${_v%% *}"; _v="${_v#"$_GPREC_RULE" }"
# Parse operator triples: op prec assoc
while test ${#_v} -gt 0; do
_pop="${_v%% *}"; _v="${_v#"$_pop"}"; _v="${_v# }"
case "$_pop" in '') break;; esac
_pprec="${_v%% *}"; _v="${_v#"$_pprec"}"; _v="${_v# }"
_passoc="${_v%% *}"; _v="${_v#"$_passoc"}"; _v="${_v# }"
_GPREC_N=$((_GPREC_N + 1))
eval "_GPREC_P_$_GPREC_N=$_pprec"
eval "_GPREC_A_$_GPREC_N=$_passoc"
eval "_GPREC_O_$_GPREC_N=\"\$_pop\""
done;;
postfix*) # #!postfix <prec-rule> <open> [<inner-rule> [<close>]]
_v="${_v#postfix }"
# Skip <prec-rule> (already stored in _GPREC_RULE)
_v="${_v#"${_v%% *}" }"
_GPOST_N=$((_GPOST_N + 1))
_post_open="${_v%% *}"; _v="${_v#"$_post_open"}"; _v="${_v# }"
eval "_GPOST_OPEN_$_GPOST_N=\"\$_post_open\""
_post_inner="${_v%% *}"; _v="${_v#"$_post_inner"}"; _v="${_v# }"
eval "_GPOST_INNER_$_GPOST_N=\"\$_post_inner\""
eval "_GPOST_CLOSE_$_GPOST_N=\"\$_v\"";;
ternary*) # #!ternary <prec-rule> <open> <sep> <prec>
_v="${_v#ternary }"
_v="${_v#"${_v%% *}" }" # Skip <prec-rule>
_GTERNARY_OPEN="${_v%% *}"; _v="${_v#"$_GTERNARY_OPEN" }"
_GTERNARY_SEP="${_v%% *}"; _v="${_v#"$_GTERNARY_SEP" }"
_GTERNARY_PREC="${_v%% *}"
_GTERNARY=1;;
unary*) # #!unary <prec-rule> <op> <prec>
_v="${_v#unary }"
_v="${_v#"${_v%% *}" }" # Skip <prec-rule>
_GUNARY_N=$((_GUNARY_N + 1))
_uop="${_v%% *}"; _v="${_v#"$_uop"}"; _v="${_v# }"
_uprec="${_v%% *}"
eval "_GUNARY_O_$_GUNARY_N=\"\$_uop\""
eval "_GUNARY_P_$_GUNARY_N=\"\$_uprec\"";;
esac;;
Br) _GR="$_GR $_v"
case "$_G1" in '') _G1="$_v";; esac
eval "_RNODE_$_v=$_ch";;
esac
done
# ---- Synthesize unary prefix operator rules ----
# For each #!unary directive, create a grammar rule: _unary_N = 'OP' <atom> ;
# and prepend it to the atom alternation (the precedence rule's body).
case "$_GUNARY_N" in 0) ;; *)
# Find the atom rule: the precedence rule's body is 'expr = atom ;'
# where atom is a Bi reference. Find the atom rule's name.
eval "_prec_node=\$_RNODE_$_GPREC_RULE"
eval "set -- \$X$_prec_node"; shift # Br children
eval "_body_t=\"\${X$1%% *}\""
_atom_rule=
case "$_body_t" in
Bi) eval "_atom_rule=\"\${V$1:-}\"";; # expr = atom ;
Bq) eval "set -- \$X$1"; shift # Bq children
eval "_atom_rule=\"\${V$1:-}\"";; # first child
esac
case "$_atom_rule" in ?*)
# Find max node ID for synthesizing new nodes
_max_nid=0
for _r in $_GR; do
eval "_rn=\$_RNODE_$_r"
case $((_rn > _max_nid)) in 1) _max_nid=$_rn;; esac
done
# Walk all X nodes to find actual max
_nid=0
while test $_nid -le $((_max_nid + 200)); do
eval "_xt=\"\${X$_nid:-}\""
case "$_xt" in ?*) case $((_nid > _max_nid)) in 1) _max_nid=$_nid;; esac;; esac
_nid=$((_nid + 1))
done
_syn_id=$((_max_nid + 1))
# Find the atom rule's body (should be Ba alternation or single ref)
eval "_atom_node=\$_RNODE_$_atom_rule"
eval "set -- \$X$_atom_node"; shift # Br children
_atom_body=$1
eval "_atom_body_t=\"\${X$_atom_body%% *}\""
# For each unary operator, synthesize a rule and add to alternation
_pi=1
while test $_pi -le $_GUNARY_N; do
eval "_uop=\"\$_GUNARY_O_$_pi\""
_uname="_unary_$_pi"
# Create nodes: Bt(op), Bi(atom), Bq(Bt Bi), Br(Bq)
_bt_id=$_syn_id; _syn_id=$((_syn_id + 1))
_bi_id=$_syn_id; _syn_id=$((_syn_id + 1))
_bq_id=$_syn_id; _syn_id=$((_syn_id + 1))
_br_id=$_syn_id; _syn_id=$((_syn_id + 1))
_ref_id=$_syn_id; _syn_id=$((_syn_id + 1))
eval "V$_bt_id=\"\$_uop\"; X$_bt_id=Bt"
eval "V$_bi_id=\"\$_atom_rule\"; X$_bi_id=Bi"
eval "X$_bq_id=\"Bq $_bt_id $_bi_id\""
eval "V$_br_id=\"\$_uname\"; X$_br_id=\"Br $_bq_id\""
eval "V$_ref_id=\"\$_uname\"; X$_ref_id=Bi"
# Register the rule
_GR="$_GR $_uname"
eval "_RNODE_$_uname=$_br_id"
# Add reference to atom alternation
case "$_atom_body_t" in
Ba) # Prepend to existing alternation
eval "X$_atom_body=\"Ba $_ref_id \${X$_atom_body#Ba }\"";;
*) # Single ref: wrap in alternation
_ba_id=$_syn_id; _syn_id=$((_syn_id + 1))
eval "X$_ba_id=\"Ba $_ref_id $_atom_body\""
eval "X$_atom_node=\"Br $_ba_id\""
_atom_body=$_ba_id; _atom_body_t=Ba;;
esac
_pi=$((_pi + 1))
done;;
esac;;
esac
# ============================================================
# Phase 1b: Assign 2-char state codes
# ============================================================
_GST= _GSN=0
# Assign a document root state code first
_bnf_gen_code "_doc_"
eval "_GDC=\$_RCODE__doc_" # document root code
for _r in $_GR; do _bnf_gen_code "$_r"; done
# Allocate binary operator state for precedence climbing
_GPREC_BIN=
case "$_GPREC_RULE" in ?*)
_bnf_gen_code "_binop_"
eval "_GPREC_BIN=\$_RCODE__binop_"
eval "_GPREC_RULE_CODE=\$_RCODE_$_GPREC_RULE"
;; esac
# Allocate state codes for postfix operators
_pi=1
while test $_pi -le $_GPOST_N; do
_bnf_gen_code "_post_$_pi"
eval "_GPOST_STATE_$_pi=\$_RCODE__post_$_pi"
eval "_post_close=\"\${_GPOST_CLOSE_$_pi:-}\""
case "$_post_close" in ?*)
# Bracket postfix: allocate continuation state for close token
_bnf_gen_code "_postc_$_pi"
eval "_GPOST_CONT_$_pi=\$_RCODE__postc_$_pi";;
esac
_pi=$((_pi + 1))
done
# Allocate state codes for ternary operator
_GTERNARY_STATE= _GTERNARY_COLON= _GTERNARY_DONE=
case "$_GTERNARY" in 1)
_bnf_gen_code "_ternary_"
eval "_GTERNARY_STATE=\$_RCODE__ternary_"
_bnf_gen_code "_terncol_"
eval "_GTERNARY_COLON=\$_RCODE__terncol_"
_bnf_gen_code "_terndone_"
eval "_GTERNARY_DONE=\$_RCODE__terndone_"
;; esac
# Seed FIRST sets and build skip list for accumulator rules (fixed FIRST sets)
_GSTR_SKIP_LIST=
_si=1
while test $_si -le $_GSTR_N; do
eval "_str_rule=\"\$_GSTR_RULE_$_si\""
eval "_str_close=\"\$_GSTR_CLOSE_$_si\""
eval "_FIRST_$_str_rule=\"\$_str_close\""
_GSTR_SKIP_LIST="$_GSTR_SKIP_LIST|$_str_rule"
_si=$((_si + 1))
done
case "$_GNUM" in ?*) eval "_FIRST_$_GNUM='0123456789-'";; esac
_changed=1 _pass=0
while test $_changed -eq 1; do
_changed=0; _pass=$((_pass + 1))
for _r in $_GR; do
# Skip accumulator rules (FIRST is fixed)
case "$_GNUM" in "$_r") continue;; esac
case "$_GSTR_SKIP_LIST" in *"|$_r"*) continue;; esac
eval "_node=\$_RNODE_$_r"
eval "set -- \$X$_node"; shift
_bnf_gen_first_node "$1"
eval "_old=\"\${_FIRST_$_r:-}\""
case "$_old" in "$REPLY") ;; *)
eval "_FIRST_$_r=\$REPLY"; _changed=1;; esac
done
case $_pass in 20) break;; esac
done
} # end _bnf_gen_p1
# ============================================================
# Phase 2: Dispatch Table Construction
# ============================================================
# -- Dispatch entry primitives --
# Add dispatch entry: char, state, action
_bnf_gen_da () {
case "$1" in
'['*']') # Class pattern (has matching ]): store separately
_DA_CLASS="$_DA_CLASS $1=$2=$3"
return;;
esac
local _cc
_cg_char_code "$1"; _cc=$REPLY
# Deduplicate: skip if this (state, action) pair already exists
# Deduplicate by (char, state, action) — exact match only
eval "case \"\${_DA_$_cc:-}\" in *\" \$2=\$3\"*) return;; esac"
eval "_DA_$_cc=\"\${_DA_$_cc:-} \$2=\$3\""
# Track unique chars via code-based list (avoids glob issues with [ etc.)
eval "case \"\${_DA_SEEN_$_cc:-}\" in '') _DA_CODES=\"\$_DA_CODES \$_cc\"; _DA_SEEN_$_cc=1; _DA_RAW_$_cc=\$1;; esac"
# For keyword actions: also add lowercase first-char entry
case "$3" in SKIP_KW_CONT=*|SKIP_KW_CLOSE=*|OPEN_KW=*)
case "$1" in [A-Z])
_lcase "$1"
case "$REPLY" in ?*) _bnf_gen_da "$REPLY" "$2" "$3";; esac;;
esac;;
esac
}
# Append a state to _GALT (pipe-separated, dedup)
_bnf_gen_galt_add () {
case "$_GALT" in *"$1"*) return;; esac
case "$_GALT" in '') _GALT="$1";; *) _GALT="$_GALT|$1";; esac
}
# Allocate a done state, add to _GALT and _GDONE. Sets _done_state.
_bnf_gen_alloc_done () {
_bnf_gen_code "$1"
eval "_done_state=\$_RCODE_$1"
_bnf_gen_galt_add "$_done_state"
_GDONE="$_GDONE $_done_state"
}
# Build expected-token strings per state and propagate inherited entries.
# Helper: add a readable token name to a state's _EXP_ string (dedup)
_bnf_gen_exp_add () {
eval "_cur=\"\${_EXP_$1:-}\""
case "$_cur" in *"$2"*) return;; esac
case "$_cur" in '') eval "_EXP_$1=\"\$2\"";; *) eval "_EXP_$1=\"\$_cur or \$2\"";; esac
}
# Copy all dispatch entries from one state to another
_bnf_gen_copy_dispatch () {
local _src="$1" _dst="$2" _cc _c _entries _entry _st _clentry _clrest _clst _wentry _wst
for _cc in $_DA_CODES; do
eval "_c=\"\$_DA_RAW_$_cc\""
eval "_entries=\"\${_DA_$_cc:-}\""
for _entry in $_entries; do
_st="${_entry%%=*}"
case "$_st" in "$_src")
_bnf_gen_da "$_c" "$_dst" "${_entry#*=}";; esac
done
done
for _clentry in $_DA_CLASS; do
_clrest="${_clentry#*=}"
_clst="${_clrest%%=*}"
case "$_clst" in "$_src")
_DA_CLASS="$_DA_CLASS ${_clentry%%=*}=$_dst=${_clrest#*=}";; esac
done
for _wentry in $_DA_WILD; do
_wst="${_wentry%%=*}"
case "$_wst" in "$_src")
_DA_WILD="$_DA_WILD $_dst=${_wentry#*=}";; esac
done
}
# -- Rule walking --
# Get the first terminal of a sequence rule (for OPEN actions)
# $1=rule_name. Sets REPLY to the terminal value, or empty if not a terminal-start seq.
_bnf_gen_seq_opening () {
local _rname="$1" _rnode _bt _first _ft _fv
eval "_rnode=\$_RNODE_$_rname"
eval "set -- \$X$_rnode"; shift; _first=$1
eval "_bt=\"\${X$_first%% *}\""
case "$_bt" in
Bq) eval "set -- \$X$_first"; shift; _first=$1
eval "_ft=\"\${X$_first%% *}\"; _fv=\"\${V$_first:-}\""
case "$_ft" in Bt) REPLY="$_fv"; return;; esac;;
esac
REPLY=
}
# Add FIRST chars from a set to dispatch. Handles both individual chars and [class] patterns.
_bnf_gen_add_first_entries () {
local _rcode="$1" _fc="$2" _action="$3" _c _afe_ref _afe_suffix _rest _cls
# Auto-upgrade OPEN/OPEN_CONT to OPEN_KWSKIP/OPEN_KWSKIP_CONT for keyword-starting sequences
case "$_GKW_LIST" in ?*)
_afe_ref=; _afe_suffix=
case "$_action" in
OPEN=*) _afe_ref="${_action#OPEN=}";;
OPEN_CONT=*) _rest="${_action#OPEN_CONT=}"
_afe_ref="${_rest%%=*}"; _afe_suffix="=${_rest#*=}";;
esac
case "$_afe_ref" in ?*)
_bnf_gen_code_to_name "$_afe_ref"
case "$REPLY" in ?*)
_bnf_gen_seq_opening "$REPLY"
case ${#REPLY} in 1) ;; *)
case "$_GKW_LIST" in *"$REPLY"*)
case "$_afe_suffix" in
'') _action="OPEN_KWSKIP=$_afe_ref=$REPLY";;
*) _action="OPEN_KWSKIP_CONT=$_afe_ref=$REPLY$_afe_suffix";;
esac;; esac;;
esac;;
esac;;
esac;;
esac
# For keyword actions: also add lowercase first-char entries for case-insensitive match
case "$_action" in OPEN_KWSKIP=*|OPEN_KWSKIP_CONT=*|SKIP_KW_CONT=*|SKIP_KW_CLOSE=*|OPEN_KW=*)
case "$_fc" in
[A-Z]) _lcase "$_fc"
case "$REPLY" in ?*) _fc="$_fc$REPLY";; esac;;
esac;;
esac
# Parse: extract [..] as class entries, rest as individual char entries.
while test ${#_fc} -gt 0; do
_c="${_fc%"${_fc#?}"}"; _fc="${_fc#?}"
case "$_c" in
'[') case "$_fc" in *']'*)
_cls="[${_fc%%]*}]"
_fc="${_fc#*]}"
_bnf_gen_da "$_cls" "$_rcode" "$_action";;
*) _bnf_gen_da "[" "$_rcode" "$_action";;
esac;;
W) _DA_WILD="$_DA_WILD $_rcode=$_action";;
*) _bnf_gen_da "$_c" "$_rcode" "$_action";;
esac
done
}
# Core ref-push: resolve refcode, check seq_opening, add FIRST entries.
# $1=rcode $2=refname $3=open action prefix $4=push action prefix $5=suffix (optional)
_bnf_gen_ref_push_core () {
local _refcode _fc
eval "_refcode=\"\${_RCODE_$2:-}\""
case "$_refcode" in '') return;; esac
_bnf_gen_seq_opening "$2"
eval "_fc=\"\${_FIRST_$2:-}\""
case "$REPLY" in
?*) _bnf_gen_add_first_entries "$1" "$_fc" "$3=$_refcode${5:+=$5}";;
*) _bnf_gen_add_first_entries "$1" "$_fc" "$4=$_refcode${5:+=$5}";;
esac
}
# Add PUSH entries for FIRST chars of a non-terminal reference in own state
_bnf_gen_add_ref_push () {
local _rcode="$1" _refname="$2" _refcode
eval "_refcode=\"\${_RCODE_$_refname:-}\""
case "$_refcode" in '') return;; esac
_bnf_gen_seq_opening "$_refname"
case "$REPLY" in
?*) # Seq starting with terminal: OPEN (create + skip entry)
eval "_fc=\"\${_FIRST_$_refname:-}\""
_bnf_gen_add_first_entries "$_rcode" "$_fc" "OPEN=$_refcode";;
*) # Check if the referenced rule is an alternation (expand inline)
eval "_rn2=\"\${_RNODE_$_refname:-}\""
case "$_rn2" in ?*)
eval "set -- \$X$_rn2"; shift; local _body2=$1
eval "_bt2=\"\${X$_body2%% *}\""
case "$_bt2" in Ba)
eval "set -- \$X$_body2"; shift
for _alt_ch in "$@"; do
eval "_alt_t=\"\${X$_alt_ch%% *}\""
case "$_alt_t" in
Bi) eval "_alt_v=\"\${V$_alt_ch:-}\""
_bnf_gen_add_ref_push "$_rcode" "$_alt_v";;
Bt) eval "_alt_v=\"\${V$_alt_ch:-}\""
local _alt_c="${_alt_v%"${_alt_v#?}"}"
case ${#_alt_v} in
1) _bnf_gen_da "$_alt_c" "$_rcode" "OPEN_SKIP_CLOSE=$_refcode";;
*) _bnf_gen_da "$_alt_c" "$_rcode" "OPEN_KW=$_refcode=$_alt_v";;
esac;;
esac
done
return;;
esac;;
esac
eval "_fc=\"\${_FIRST_$_refname:-}\""
_bnf_gen_add_first_entries "$_rcode" "$_fc" "PUSH=$_refcode";;
esac
}
# Add ref push with continuation: set STATE=next before pushing child.
# With continuation states, the child rule handles its own internal dispatch.
# The parent just needs to PUSH (or OPEN for seq-with-terminal-entry).
_bnf_gen_add_ref_push_cont () {
_bnf_gen_ref_push_core "$1" "$2" "OPEN_CONT" "PUSH_CONT" "$3"
}
# Alternation: dispatch each alternative from this rule's OWN state
_bnf_gen_walk_alt () {
local _rname="$1" _rcode="$2" _anode="$3" _ch _ct _cv _c _refcode _fc _done_state
# Allocate a "done" state — after one alternative completes, close_col
_bnf_gen_alloc_done "_adone_${_rcode}"
eval "set -- \$X$_anode"; shift
for _ch in "$@"; do
eval "_ct=\"\${X$_ch%% *}\"; _cv=\"\${V$_ch:-}\""
case "$_ct" in
Bi) # Non-terminal reference: push child with done continuation
_bnf_gen_add_ref_push_cont "$_rcode" "$_cv" "$_done_state";;
Bt) # Terminal keyword or single-char
_c="${_cv%"${_cv#?}"}"
case ${#_cv} in
1) _bnf_gen_da "$_c" "$_rcode" "OPEN_SKIP_CLOSE_CONT=$_rcode=$_done_state";;
*) _bnf_gen_da "$_c" "$_rcode" "OPEN_KW=$_rcode=$_cv=$_done_state";;
esac;;
Bq) # Sequence branch in alternation
_bnf_gen_walk_seq_body "$_rcode" "$_ch";;
esac
done
}
# Walk into Bo (optional) or Bm (repetition) nodes and add entries
_bnf_gen_walk_inner () {
local _rcode="$1" _node="$2" _t _ch _ct _cv _c
eval "_t=\"\${X$_node%% *}\""
case "$_t" in
Bi) # Direct reference
eval "_cv=\"\${V$_node:-}\""
_bnf_gen_add_ref_push "$_rcode" "$_cv";;
Bq) # Inner sequence (e.g., ',' member inside Bm)
eval "set -- \$X$_node"; shift
local _iq_nsteps=$#
# Use two-state approach only when keywords are active (prevents
# item FIRST chars from overlapping with keyword-terminated closes)
case "$_GKW_LIST" in ?*)
local _iq_i=0 _iq_cur="$_rcode" _iq_next _iq_rcode="$_rcode"
for _ch in "$@"; do
_iq_i=$((_iq_i + 1))
eval "_ct=\"\${X$_ch%% *}\"; _cv=\"\${V$_ch:-}\""
case "$_ct" in
Bt) # Separator/keyword terminal
_c="${_cv%"${_cv#?}"}"
_bnf_gen_is_kw "$_cv"
if test $_iq_i -lt $_iq_nsteps; then
_bnf_gen_code "_iq${_rcode}_$_iq_i"
eval "_iq_next=\$_RCODE__iq${_rcode}_$_iq_i"
else
_iq_next="$_rcode"
fi
case "$REPLY" in 1)
_bnf_gen_da "$_c" "$_iq_cur" "SKIP_KW_CONT=$_cv=$_iq_next";;
*) _bnf_gen_da "$_c" "$_iq_cur" "SKIP_CONT=$_iq_next";;
esac
_iq_cur="$_iq_next";;
Bi) # Content reference
eval "_cv=\"\${V$_ch:-}\""
if test $_iq_i -lt $_iq_nsteps; then
_bnf_gen_code "_iq${_rcode}_$_iq_i"
eval "_iq_next=\$_RCODE__iq${_rcode}_$_iq_i"
_bnf_gen_add_ref_push_cont "$_iq_cur" "$_cv" "$_iq_next"
_iq_cur="$_iq_next"
else
_bnf_gen_add_ref_push_cont "$_iq_cur" "$_cv" "$_rcode"
fi;;
Bm|Bo|Be)
# Record inheritance for this Bo/Bm step. The inherited entries
# allow the parser to skip the optional and match what follows.
# We record BEFORE recursion because ksh93 corrupts all locals
# during recursive _bnf_gen_walk_inner calls (nested POSIX function scoping).
_DA_INHERIT="$_DA_INHERIT $_iq_cur=$_iq_rcode"
_bnf_gen_walk_inner "$_iq_cur" "$_ch";;
esac
done;;
*) # No keywords: use original flat approach (all steps in shared state)
for _ch in "$@"; do
eval "_ct=\"\${X$_ch%% *}\"; _cv=\"\${V$_ch:-}\""
case "$_ct" in
Bt) _c="${_cv%"${_cv#?}"}"
_bnf_gen_is_kw "$_cv"
case "$REPLY" in 1)
_bnf_gen_da "$_c" "$_rcode" "SKIP_KW_CONT=$_cv=$_rcode";;
*) case "$_GSTRICT" in 1)
case "$_c" in ',') _bnf_gen_da "$_c" "$_rcode" "SKIP_COMMA";;
*) _bnf_gen_da "$_c" "$_rcode" "SKIP";; esac;;
*) _bnf_gen_da "$_c" "$_rcode" "SKIP";; esac;;
esac;;
Bi) eval "_cv=\"\${V$_ch:-}\""
_bnf_gen_add_ref_push "$_rcode" "$_cv";;
Bm|Bo|Be)
_bnf_gen_walk_inner "$_rcode" "$_ch";;
esac
done;;
esac;;
Bo|Bm|Be) # Optional/repetition/group: recurse into inner
eval "set -- \$X$_node"; shift
_bnf_gen_walk_inner "$_rcode" "$1";;
Ba) # Alternation inside optional/repetition
eval "set -- \$X$_node"; shift
for _ch in "$@"; do
_bnf_gen_walk_inner "$_rcode" "$_ch"
done;;
Bt) # Terminal inside optional/repetition
eval "_cv=\"\${V$_node:-}\""
_c="${_cv%"${_cv#?}"}"
_bnf_gen_is_kw "$_cv"
case "$REPLY" in 1)
_bnf_gen_da "$_c" "$_rcode" "SKIP_KW_CONT=$_cv=$_rcode";;
*) _bnf_gen_da "$_c" "$_rcode" "SKIP";;
esac;;
esac
}
# Walk sequence body with CONTINUATION STATES.
# Each step after step 1 gets its own state code. This eliminates
# FIRST-set conflicts between different positions in a sequence.
_bnf_gen_walk_seq_body () {
local _rcode="$1" _snode="$2" _ch _ct _cv _c _fc _refcode _refname
local _nsteps _i _first_is_term=0 _cont_states _shared _all_opt _j
local _cur_state="$_rcode" _next_state _prev_ch _prev_t
eval "set -- \$X$_snode"; shift; _nsteps=$#
# Check if step 1 is a terminal (entry step handled by parent OPEN)
eval "_ct=\"\${X$1%% *}\""
case "$_ct" in Bt) _first_is_term=1;; esac
# Pre-allocate continuation state codes.
# If step 1 is terminal (handled by parent OPEN), step 2 uses the primary state.
case $_first_is_term in
1) # Step 1 is terminal: primary state = step 2, continuations from step 3
_cont_states="SKIP $_rcode" # SKIP for step 1, primary for step 2
_i=2;;
*) # Step 1 is non-terminal: primary state = step 1, continuations from step 2
_cont_states="$_rcode"
_i=1;;
esac
# Allocate continuation states for remaining steps.
# BUT: if a step is Bo/Bm (nullable), share the state with its successor.
eval "set -- \$X$_snode"; shift
while test $_i -lt $_nsteps; do
_i=$((_i + 1))
# Check if the PREVIOUS step (at position _i-1) was nullable (Bo/Bm)
eval "_prev_ch=\${$((_i - 1))}"
eval "_prev_t=\"\${X$_prev_ch%% *}\""
case "$_prev_t" in Bo|Bm)
# Previous step was nullable: share its state with this step
_shared="${_cont_states##* }"
_cont_states="$_cont_states $_shared";;
*)
_bnf_gen_code "_c${_rcode}_$_i"
eval "_next_state=\$_RCODE__c${_rcode}_$_i"
_cont_states="$_cont_states $_next_state";;
esac
done
eval "set -- \$X$_snode"; shift # restore positional params
# Add continuation states to _GALT when all remaining steps are optional (Bo)
_i=0
for _ch in "$@"; do
_i=$((_i + 1))
eval "_ct=\"\${X$_ch%% *}\""
# Check if all steps from _i to end are optional
_all_opt=1; _j=$_i
while test $_j -le $_nsteps; do
eval "_jch=\${$_j}"; eval "_jt=\"\${X$_jch%% *}\""
case "$_jt" in Bo|Bm) ;; *) _all_opt=0; break;; esac
_j=$((_j + 1))
done
case $_all_opt in 1)
# This state has only optional content remaining — add to _GALT
set -- $_cont_states
eval "_st=\${$_i}"
case "$_st" in SKIP) ;; *)
_bnf_gen_galt_add "$_st";;
esac;;
esac
eval "set -- \$X$_snode"; shift # restore
done
# Process each step
_i=0
for _ch in "$@"; do
_i=$((_i + 1))
eval "_ct=\"\${X$_ch%% *}\"; _cv=\"\${V$_ch:-}\""
# Determine current and next state from pre-allocated list
set -- $_cont_states
eval "_cur_state=\${$_i}"
# Skip step 1 if terminal (handled by parent OPEN)
case "$_cur_state" in SKIP) continue;; esac
if test $_i -lt $_nsteps; then
eval "_next_state=\${$((_i + 1))}"
else
_next_state=
fi
case "$_ct" in
Bt) _c="${_cv%"${_cv#?}"}"
_bnf_gen_is_kw "$_cv"
case "$REPLY" in 1)
# Keyword terminal in sequence: skip whole word and transition
case "$_next_state" in ?*)
_bnf_gen_da "$_c" "$_cur_state" "SKIP_KW_CONT=$_cv=$_next_state";;
*) _bnf_gen_da "$_c" "$_cur_state" "SKIP_KW_CLOSE=$_cv";;
esac;;
*)
case $_i in
1) # Entry terminal: skip if handled by parent OPEN
case $_first_is_term in 1) ;; *)
_bnf_gen_da "$_c" "$_cur_state" "SKIP";; esac
;;
$_nsteps) _bnf_gen_da "$_c" "$_cur_state" "SKIP_CLOSE";;
*) case "$_next_state" in ?*)
_bnf_gen_da "$_c" "$_cur_state" "SKIP_CONT=$_next_state";;
*) _bnf_gen_da "$_c" "$_cur_state" "SKIP";;
esac;;
esac;;
esac;;
Bi) # Non-terminal reference
eval "_refname=\"\${V$_ch:-}\""
case "$_next_state" in ?*)
_bnf_gen_add_ref_push_cont "$_cur_state" "$_refname" "$_next_state";;
*) # Last step is non-terminal: allocate a "done" state
# that has no entries and auto-closes immediately
_bnf_gen_alloc_done "_done_${_rcode}"
# If sequence has leading terminal, mark done state as no-collapse
# (the node must be preserved so the emitter can reconstruct the terminal)
case "$_first_is_term" in 1)
_GNOCOL="$_GNOCOL $_done_state";; esac
_bnf_gen_add_ref_push_cont "$_cur_state" "$_refname" "$_done_state";;
esac;;
Bo|Bm) # Optional/Repetition: add entries for inner content
eval "set -- \$X$_ch"; shift
# For Bo (optional) in keyword grammars: pass "push" so the first
# keyword terminal saves STATE, enabling proper scope return.
# Only do this when there are MORE optional steps after this one —
# if the next step is a required terminal (like 'END'), scope is harmful
# because the keyword might share first chars with the terminal.
_bnf_gen_walk_inner "$_cur_state" "$1"
# Optional/repetition may be empty — peek at the NEXT step
# and add its entries to the current state too (for the "skip" case).
# This allows e.g. '}' to match in both the optional-content state
# and the close-delimiter state.
;;
Bc) # Bare character class (no quantifier)
eval "_ccv=\"\${V$_ch:-}\""
eval "_ACCUM_$_cur_state=\"1:\$_ccv\"";;
Bp|Bs|Bk) # Quantifier wrapping Bc
eval "set -- \$X$_ch"; shift
eval "_inner_t=\"\${X$1%% *}\""
case "$_inner_t" in Bc)
eval "_ccv=\"\${V$1:-}\""
eval "_ACCUM_$_cur_state=\"$_ct:\$_ccv\"";;
esac;;
esac
done
}
# Single reference rule: push the referenced rule with done continuation
_bnf_gen_walk_single_ref () {
local _rname="$1" _rcode="$2" _bnode="$3" _cv _refcode _fc
eval "_cv=\"\${V$_bnode:-}\""
eval "_refcode=\"\${_RCODE_$_cv:-}\""
# Skip done state for the precedence rule — it has its own close mechanism
case "$_GPREC_RULE_CODE" in "$_rcode")
_bnf_gen_ref_push_core "$_rcode" "$_cv" "OPEN" "PUSH"
return;;
esac
# Allocate a "done" state so the rule closes after child completes
_bnf_gen_alloc_done "_done_${_rcode}"
# Use _bnf_gen_add_ref_push_cont to inline alternations (avoids extra intermediate nodes)
_bnf_gen_add_ref_push_cont "$_rcode" "$_cv" "$_done_state"
}
# --- Walk rules to populate dispatch table ---
# KEY PRINCIPLE: each rule adds entries to ITS OWN state only.
# Parent-child linking happens because:
# - Alternation adds entries for its alternatives' first chars in its own state
# - Sequence adds entries for its steps' first chars in its own state
# - Single-ref adds entries for the referenced rule's first chars in its own state
_bnf_gen_walk () {
local _rname _rcode _rnode _body _bt
for _rname in $_GR; do
# Skip extern rules — they are implemented externally
case "$_GEXTERN" in *" $_rname"*) continue;; esac
eval "_rcode=\$_RCODE_$_rname; _rnode=\$_RNODE_$_rname"
eval "set -- \$X$_rnode"; shift
_body=$1
eval "_bt=\"\${X$_body%% *}\""
case "$_bt" in
Ba) _bnf_gen_walk_alt "$_rname" "$_rcode" "$_body";;
Bq) _bnf_gen_walk_seq_body "$_rcode" "$_body";;
Bt) ;; # No own-state entries needed (parent handles via OPEN)
Bi) _bnf_gen_walk_single_ref "$_rname" "$_rcode" "$_body";;
Bm|Bo) # Repetition or optional as rule body — walk inner content
eval "set -- \$X$_body"; shift
_bnf_gen_walk_inner "$_rcode" "$1";;
Bc|Bp|Bs|Bk) # Character class or quantifier-wrapped class as rule body
# This is an accumulator rule (e.g., word = [a-z]+ or number = [0-9]+)
# Extract the Bc node (may be wrapped in quantifier)
local _ccnode="$_body" _cctype="$_bt"
case "$_bt" in Bp|Bs|Bk)
eval "set -- \$X$_body"; shift; _ccnode=$1
eval "_cctype=\"\${X$_ccnode%% *}\"";;
esac
case "$_cctype" in Bc)
eval "_ccv=\"\${V$_ccnode:-}\""
eval "_ACCUM_$_rcode=\"$_bt:\$_ccv\"";;
esac;;
esac
done
}
# -- Phase 2 sub-phases --
# Handle string accumulators: add close-char and escape entries, pre-compute glob vars.
_bnf_gen_p2_str_accum () {
local _si=1 _str_rule _str_code _str_close _str_esc
local _sfp_glob _sfp_embed _sgesc _sgtmp _sgc
while test $_si -le $_GSTR_N; do
eval "_str_rule=\"\$_GSTR_RULE_$_si\""
eval "_str_code=\$_RCODE_$_str_rule"
eval "_str_close=\"\$_GSTR_CLOSE_$_si\""
eval "_str_esc=\"\${_GSTR_ESC_$_si:-}\""
eval "_GSTR_CODE_$_si=\$_str_code"
_bnf_gen_da "$_str_close" "$_str_code" "ACCUM_STR_CLOSE"
case "$_str_esc" in
json) _bnf_gen_da '\' "$_str_code" "ACCUM_STR_ESC";;
simple) _bnf_gen_da '\' "$_str_code" "ACCUM_STR_ESC_SIMPLE";;
esac
_sfp_glob=
case "$_str_esc" in json|simple) _sfp_glob="\\";; esac
_sfp_glob="${_sfp_glob}$_str_close"
eval "_sfp_embed=\"\${_GSTR_EMBED_STOP_$_si:-}\""
_sfp_glob="${_sfp_glob}$_sfp_embed"
_sgesc= _sgtmp="$_sfp_glob"
while test ${#_sgtmp} -gt 0; do
_sgc="${_sgtmp%"${_sgtmp#?}"}"; _sgtmp="${_sgtmp#?}"
case "$_sgc" in
"'") _sgesc="${_sgesc}'\"'\"'";;
'\') _sgesc="${_sgesc}\\\\";;
*) _sgesc="$_sgesc$_sgc";;
esac
done
eval "_STR_GLOBVAR_$_si=\"_${_GN}_sg_${_str_code}='[\$_sgesc]*'\""
_si=$((_si + 1))
done
}
# Detect multi-char close sequences for char-class accumulators.
# For each sequence rule, if a Bi ref to an accumulator is followed by >=2
# consecutive terminals, store the close sequence in _CLOSESEQ_<code>.
_bnf_gen_p2_closeseq () {
local _rname _rcode _rnode _body _bt _nsteps _i _ch _ct
local _refname _ref_rc _ref_accum _closeseq _j _nch _nt _nv
for _rname in $_GR; do
eval "_rcode=\$_RCODE_$_rname; _rnode=\$_RNODE_$_rname"
eval "set -- \$X$_rnode"; shift; _body=$1
eval "_bt=\"\${X$_body%% *}\""
case "$_bt" in Bq) ;; *) continue;; esac
eval "set -- \$X$_body"; shift; _nsteps=$#
_i=0
for _ch in "$@"; do
_i=$((_i + 1))
eval "_ct=\"\${X$_ch%% *}\""
case "$_ct" in Bi)
eval "_refname=\"\${V$_ch:-}\""
eval "_ref_rc=\"\${_RCODE_$_refname:-}\""
eval "_ref_accum=\"\${_ACCUM_$_ref_rc:-}\""
case "$_ref_accum" in ?*)
_closeseq= _j=$((_i + 1))
while test $_j -le $_nsteps; do
eval "_nch=\${$_j}"; eval "_nt=\"\${X$_nch%% *}\""
case "$_nt" in Bt)
eval "_nv=\"\${V$_nch:-}\""
case "$_GKW_LIST" in *"$_nv"*) break;; esac
_closeseq="$_closeseq$_nv"; _j=$((_j + 1));;
*) break;; esac
done
case "$_closeseq" in ??*)
eval "_CLOSESEQ_$_ref_rc=\"\$_closeseq\"";;
esac
;; esac
;; esac
done
done
}
# Track which states need auto-close (cascade close after child completes).
_bnf_gen_p2_autoclose () {
local _r _node _rcode _bt _last_ch _last_t _seq_nsteps _last_cont
IFS=' '
for _r in $_GR; do
case "$_GEXTERN" in *" $_r"*) continue;; esac
eval "_node=\$_RNODE_$_r; _rcode=\$_RCODE_$_r"
case "$_rcode" in "$_GDC") continue;; esac
eval "set -- \$X$_node"; shift
eval "_bt=\"\${X$1%% *}\""
case "$_bt" in
Ba|Bi|Bm|Bo)
case "$_GPREC_RULE_CODE" in "$_rcode") ;; *)
_bnf_gen_galt_add "$_rcode";;
esac;;
Bq) IFS=' '
eval "set -- \$X$1"; shift
_last_ch="$#"; eval "_last_ch=\${$_last_ch}"
eval "_last_t=\"\${X$_last_ch%% *}\""
_seq_nsteps=$#
case "$_last_t" in Bt) ;;
*) if test $_seq_nsteps -gt 1; then
eval "_last_cont=\"\${_RCODE__c${_rcode}_$_seq_nsteps:-$_rcode}\""
else
_last_cont="$_rcode"
fi
_bnf_gen_galt_add "$_last_cont";;
esac;;
esac
done
}
# Generate root dispatch + keyword lowercase copy + binary op dispatch copy.
_bnf_gen_p2_root_dispatch () {
local _G1C _fc _clentry _clrest _clst _clpat _g1node _g1bt
local _cc _c _entries _lc _entry _st
eval "_G1C=\$_RCODE_$_G1"
case "$_GPREC_RULE_CODE" in "$_G1C")
eval "_fc=\"\${_FIRST_$_G1:-}\""
_bnf_gen_add_first_entries "$_GDC" "$_fc" "PUSH_CONT=$_G1C=$_GDC"
for _clentry in $_DA_CLASS; do
_clrest="${_clentry#*=}"
_clst="${_clrest%%=*}"
case "$_clst" in "$_G1C")
_clpat="${_clentry%%=*}"
_DA_CLASS="$_DA_CLASS $_clpat=$_GDC=PUSH_CONT=$_G1C=$_GDC";;
esac
done
;; *)
eval "_g1node=\$_RNODE_$_G1"
eval "set -- \$X$_g1node"; shift
eval "_g1bt=\"\${X$1%% *}\""
case "$_g1bt" in Bm|Bo)
_bnf_gen_copy_dispatch "$_G1C" "$_GDC";;
*)
_bnf_gen_add_ref_push_cont "$_GDC" "$_G1" "$_GDC";;
esac
;; esac
# Copy ALL uppercase keyword entries as lowercase for case-insensitive keywords.
# For every state that has an uppercase character dispatch entry, also add
# the corresponding lowercase entry. This ensures keyword dispatch works
# at all levels (root, continuation states, block bodies, etc.).
case "$_GKW_LIST" in ?*)
for _cc in $_DA_CODES; do
eval "_c=\"\$_DA_RAW_$_cc\""
case "$_c" in [A-Z])
eval "_entries=\"\${_DA_$_cc:-}\""
_lcase "$_c"; _lc=$REPLY
case "$_lc" in '') continue;; esac
for _entry in $_entries; do
_st="${_entry%%=*}"
_bnf_gen_da "$_lc" "$_st" "${_entry#*=}"
done;;
esac
done;;
esac
# Copy expr dispatch to binary op state
case "$_GPREC_BIN" in ?*)
_bnf_gen_copy_dispatch "$_GPREC_RULE_CODE" "$_GPREC_BIN"
;; esac
}
# Build dispatch entries for postfix operators and ternary operator.
_bnf_gen_p2_postfix () {
local _pi=1 _post_state _post_inner _post_close _post_cont _pc
while test $_pi -le $_GPOST_N; do
eval "_post_state=\"\$_GPOST_STATE_$_pi\""
eval "_post_inner=\"\$_GPOST_INNER_$_pi\""
eval "_post_close=\"\${_GPOST_CLOSE_$_pi:-}\""
eval "_post_cont=\"\${_GPOST_CONT_$_pi:-}\""
case "$_post_close" in ?*)
_bnf_gen_add_ref_push_cont "$_post_state" "$_post_inner" "$_post_cont"
_pc="${_post_close%"${_post_close#?}"}"
_bnf_gen_da "$_pc" "$_post_state" "SKIP_CLOSE_XC"
_bnf_gen_da "$_pc" "$_post_cont" "SKIP_CLOSE_XC";;
*)
_bnf_gen_alloc_done "_postd_$_pi"
_bnf_gen_add_ref_push_cont "$_post_state" "$_post_inner" "$_done_state";;
esac
_pi=$((_pi + 1))
done
# Ternary operator dispatch
case "$_GTERNARY" in 1)
_bnf_gen_copy_dispatch "$_GPREC_RULE_CODE" "$_GTERNARY_STATE"
_bnf_gen_da ":" "$_GTERNARY_COLON" "SKIP_CONT=$_GTERNARY_DONE"
_bnf_gen_copy_dispatch "$_GPREC_RULE_CODE" "$_GTERNARY_DONE"
;; esac
}
# Pre-compute glob vars for char classes containing ' (needed at file scope).
_bnf_gen_p2_glob_precomp () {
local _r _rcode _accum _ccval _ccraw
_GLOB_VARS=
for _r in $_GR; do
eval "_rcode=\$_RCODE_$_r"
eval "_accum=\"\${_ACCUM_$_rcode:-}\""
case "$_accum" in '') continue;; esac
_ccval="${_accum#*:}"
case "$_ccval" in '^'*) _ccraw="${_ccval#^}";; *) _ccraw="!${_ccval}";; esac
case "$_ccraw" in *"'"*)
_GLOB_VARS="$_GLOB_VARS $_rcode"
eval "_GLOB_PAT_$_rcode=\"\$_ccraw\"";;
esac
done
}
_bnf_gen_p2_exp_inh () {
local _cc _c _entries _readable _entry _clentry _clpat _clrest _st _wentry
local _inh _inh_target _inh_source _cur
# Collect valid characters/patterns per state from the dispatch table.
for _cc in $_DA_CODES; do
eval "_c=\"\$_DA_RAW_$_cc\""
eval "_entries=\"\${_DA_$_cc:-}\""
case "$_c" in '"') _readable='"\""';; *) _readable="'$_c'";; esac
for _entry in $_entries; do _bnf_gen_exp_add "${_entry%%=*}" "$_readable"; done
done
for _clentry in $_DA_CLASS; do
_clpat="${_clentry%%=*}"; _clrest="${_clentry#*=}"; _st="${_clrest%%=*}"
case "$_clpat" in '[0-9]'*) _readable="number";; '[a-zA-Z'*) _readable="identifier";; *) _readable="$_clpat";; esac
_bnf_gen_exp_add "$_st" "$_readable"
done
for _wentry in $_DA_WILD; do _bnf_gen_exp_add "${_wentry%%=*}" "text"; done
# Propagate inherited entries (inner Bq states ending with Bo/Bm)
for _inh in $_DA_INHERIT; do
_inh_target="${_inh%%=*}"; _inh_source="${_inh#*=}"
_bnf_gen_copy_dispatch "$_inh_source" "$_inh_target"
case "$_GALT" in *"$_inh_source"*) _bnf_gen_galt_add "$_inh_target";; esac
done
}
_bnf_gen_p2 () {
# ============================================================
# Phase 2: Build dispatch table
# ============================================================
# _DA_<charcode> = space-separated "state=action" entries
# _DA_CODES = space-separated char codes for safe iteration
# _DA_RAW_<code> = raw character for each code
_DA_CODES=
_DA_CLASS= # space-separated "pattern=state=action" for class-based dispatch
_DA_WILD= # space-separated "state=action" for wildcard dispatch (negated classes)
_DA_INHERIT= # space-separated "target=source" for state inheritance
_bnf_gen_p2_str_accum
# Handle number accumulator: entry via digits or minus
# The number fast path handles accumulation; close is triggered by non-numeric char.
case "$_GNUM" in ?*)
eval "_GNUM_CODE=\$_RCODE_$_GNUM"
# Number accumulation is handled entirely by the fast path.
# No dispatch entries needed in the number state itself.
;; esac
_GALT= # Initialize auto-close list (done states added during walk)
_GDONE= # Track done states (excluded from whitespace skip)
_GNOCOL= # Done states that should NOT collapse (preserve node for terminal-leading rules)
_bnf_gen_walk
_bnf_gen_p2_closeseq
_bnf_gen_p2_autoclose
_bnf_gen_p2_root_dispatch
_bnf_gen_p2_postfix
_bnf_gen_p2_glob_precomp
# ============================================================
# Phase 2b: Build expected-token strings per state (for error messages)
# ============================================================
# Collect valid characters/patterns per state from the dispatch table.
# _EXP_<state> = readable string like "'(' or number"
_bnf_gen_p2_exp_inh
} # end _bnf_gen_p2
# ============================================================
# Phase 3: Parser Emission
# ============================================================
# -- Action & pattern emission --
# Emit keyword match: ast_more + MATCH extraction + case-insensitive or strict dispatch.
# $1=keyword $2=CI match action $3=strict match action $4=ident fallback continuation
_bnf_gen_emit_kw_match () {
_printr1 " ast_more; MATCH=\"\${CODE%%[!a-zA-Z0-9_]*}\""
case "$_GKW_LIST" in *"$1"*)
_printr1 " _ucase \"\$MATCH\""
_printr1 " case \"\$REPLY\" in"
_printr1 " '$1') $2"
eval "_kw_ident_code=\"\${_RCODE_$_GKW_RULE:-}\""
_printr1 " *) ast_consume_match"
_printr1 " ast_$_kw_ident_code; ast_close; $4"
_printr1 " esac;;";;
*) _printr1 " case \"\$MATCH\" in"
_printr1 " '$1') $3"
_printr1 " *) _error KEYWORD;;"
_printr1 " esac;;";;
esac
}
_bnf_gen_emit_action () {
# Emit shell code for an action. $1=action string
local _act="$1" _sp _sk _rest _cont _child _skip
local _okrc _okkw _okpfx _osc_code _osc_cont
local _okw_rc _okw_kw _okw_cont _okw_sfx _skw _skw_action
case "$_act" in
PUSH=*|OPEN=*) # Push child state / create node + skip char
_sp= _sk=
case "$_GSTRICT" in 1) _sp="_JT=0; ";; esac
case "$_act" in OPEN=*) _sk=" ast_skip;";; esac
_printr1 " ${_sp}ast_${_act#*=};$_sk continue;;";;
OPEN_KWSKIP=*|OPEN_KWSKIP_CONT=*) # Create node, skip full keyword word
_okrc=; _okkw=; _okpfx=
case "$_act" in
OPEN_KWSKIP=*) _rest="${_act#OPEN_KWSKIP=}"
_okrc="${_rest%%=*}"; _okkw="${_rest#*=}";;
*) _rest="${_act#OPEN_KWSKIP_CONT=}"
_okrc="${_rest%%=*}"; _rest="${_rest#*=}"
_okkw="${_rest%%=*}"; _okpfx="STATE=${_rest#*=}; ";;
esac
_printr1 ""
_bnf_gen_emit_kw_match "$_okkw" \
"${_okpfx}ast_$_okrc; ast_skip_match; continue;;" \
"${_okpfx}ast_$_okrc; CODE=\"\${CODE#$_okkw}\"; _COL=\$((_COL+${#_okkw})); continue;;" \
"${_okpfx}continue;;"
;;
OPEN_SKIP_CLOSE=*) # Create node, skip, close
_printr1 " ast_${_act#OPEN_SKIP_CLOSE=}; ast_skip; ast_close; continue;;";;
OPEN_SKIP_CLOSE_CONT=*) # Create node, skip, close + set continuation
_rest="${_act#OPEN_SKIP_CLOSE_CONT=}"
_osc_code="${_rest%%=*}"; _osc_cont="${_rest#*=}"
_printr1 " ast_$_osc_code; ast_skip; ast_close; STATE=$_osc_cont; continue;;";;
OPEN_KW=*) # Create node via keyword match (optional continuation: OPEN_KW=<code>=<kw>[=<cont>])
_rest="${_act#OPEN_KW=}"; _okw_cont=; _okw_sfx=
_okw_rc="${_rest%%=*}"; _rest="${_rest#*=}"
_okw_kw="${_rest%%=*}"
case "$_rest" in *=*) _okw_cont="${_rest#*=}"; _okw_sfx=" STATE=$_okw_cont;";; esac
_printr1 ""
case "$_okw_cont" in '') case "$_GSTRICT" in 1)
_printr1 " _JT=0;";; esac;; esac
_okw_lc=$_okw_kw
case "$_GKW_CASE" in lower) _lcase_str "$_okw_kw"; _okw_lc=$REPLY;; esac
_bnf_gen_emit_kw_match "$_okw_kw" \
"CONSUMED='$_okw_lc'; ast_skip_match
ast_$_okw_rc; ast_close;$_okw_sfx continue;;" \
"CONSUMED='$_okw_lc'; CODE=\"\${CODE#$_okw_kw}\"; _COL=\$((_COL+${#_okw_kw}))
ast_$_okw_rc; ast_close;$_okw_sfx continue;;" \
"${_okw_cont:+STATE=$_okw_cont; }continue;;"
;;
SKIP_CONT=*) # Skip char and transition to continuation state
_cont="${_act#SKIP_CONT=}"
_printr1 " ast_skip; STATE=$_cont; continue;;";;
PUSH_CONT=*|OPEN_CONT=*) # Push/open child with continuation state
_rest="${_act#*_CONT=}"; _skip=
case "$_act" in OPEN_CONT=*) _skip=" ast_skip;";; esac
_child="${_rest%%=*}"; _cont="${_rest#*=}"
_printr1 " STATE=$_cont; ast_$_child;$_skip continue;;";;
SKIP_CLOSE) # Skip char, close node
case "$_GSTRICT" in 1)
_printr1 ""
_printr1 " case \$_JT in 1) _error COMMA;; esac"
_printr1 " ast_skip; ast_close; continue;;";;
*) _printr1 " ast_skip; ast_close; continue;;";;
esac;;
SKIP_CLOSE_XC) # Skip char, close node, set _XC=1 (postfix close)
_printr1 " ast_skip; ast_close_xc;;";;
SKIP) # Skip char (middle delimiter)
_printr1 " ast_skip; continue;;";;
SKIP_KW_CONT=*|SKIP_KW_CLOSE=*) # Skip keyword and transition or close
case "$_act" in
SKIP_KW_CONT=*) _rest="${_act#SKIP_KW_CONT=}"
_skw="${_rest%%=*}"; _skw_action="STATE=${_rest#*=}; continue;;";;
*) _skw="${_act#SKIP_KW_CLOSE=}"; _skw_action="ast_close; continue;;";;
esac
_printr1 ""
_bnf_gen_emit_kw_match "$_skw" \
"ast_skip_match; $_skw_action" \
"CODE=\"\${CODE#$_skw}\"; _COL=\$((_COL+${#_skw})); $_skw_action" \
"continue;;"
;;
SKIP_COMMA) # Comma separator (sets trailing comma flag)
_printr1 " ast_skip; _JT=1; continue;;";;
ACCUM_STR_CLOSE) # Close string on matching quote
_printr1 " ast_close; ast_skip; continue;;";;
ACCUM_STR_ESC) # Escape sequence in string (JSON mode)
_printr1 ""
_printr1 " case \${CODE#?} in"
_printr1 " '\"'*|'\\'*|'/'*|'b'*|'f'*|'n'*|'r'*|'t'*)"
_printr1 " ast_consume2;;"
_printr1 " 'u'*) case \$CODE in"
_printr1 " '\\u'[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]*)"
_printr1 " REST=\"\${CODE#??????}\"; _ast_xfer; _COL=\$((_COL+6));;"
_printr1 " *) _error UNICODE;; esac;;"
_printr1 " *) _error ESCAPE;;"
_printr1 " esac; continue;;"
;;
ACCUM_STR_ESC_SIMPLE) # Escape sequence in string (simple mode: any \X = 2 chars)
_printr1 " ast_consume2; continue;;";;
esac
}
_bnf_gen_emit_case_pat () {
case "$1" in
"'") _printr1 " \"'\"*)";;
'"') _printr1 " '\"'*)";;
'\\') _printr1 " '\\\\'*)"
;;
'['*']') # Character class pattern (with matching ])
_printr1 " $1*)";;
*) _printr1 " '$1'*)";;
esac
}
# Emit a state's action with optional colon-check prefix.
# $1=state code, $2=action string
_bnf_gen_emit_state_action () {
_printn1 " $1)"
_bnf_gen_emit_action "$2"
}
# Emit a state:action pair, merging keyword actions if needed.
# $1=state $2=first action $3=char (for kw merge context)
# Checks if the state has multiple keyword entries in $_entries (from caller scope)
# and merges them into a single keyword-check block if so.
_bnf_gen_emit_state_or_kw_merge () {
local _eskm_st="$1" _eskm_act="$2" _eskm_ch="$3"
local _eskm_kw= _eskm_kwn=0 _eskm_nkw= _eskm_tot=0
for _e2 in $_entries; do
local _s2="${_e2%%=*}" _a2="${_e2#*=}"
case "$_s2" in "$_eskm_st")
_eskm_tot=$((_eskm_tot + 1))
case "$_a2" in OPEN_KW=*=*=*|OPEN_KWSKIP_CONT=*|SKIP_KW_CONT=*|SKIP_KW_CLOSE=*)
_eskm_kw="$_eskm_kw $_a2"; _eskm_kwn=$((_eskm_kwn + 1));;
*) case "$_eskm_nkw" in '') _eskm_nkw="$_a2";; esac;;
esac;;
esac
done
# Merge when: 2+ keyword actions, OR keyword + non-keyword conflict
case "$_eskm_kwn" in 0) _bnf_gen_emit_state_action "$_eskm_st" "$_eskm_act"; return;; esac
case "$_eskm_tot$_eskm_kwn" in "$_eskm_kwn$_eskm_kwn")
case "$_eskm_kwn" in 1)
# Only shortcut if no class entry provides a better fallback
local _has_cls= _clentry _clpat _clrest _clst2
for _clentry in $_DA_CLASS; do
_clpat="${_clentry%%=*}"
case "$_eskm_ch" in ${_clpat}*) ;; *) continue;; esac
_clrest="${_clentry#*=}"
_clst2="${_clrest%%=*}"
case "$_clst2" in "$_eskm_st") _has_cls=1; break;; esac
done
case "$_has_cls" in '')
_bnf_gen_emit_state_action "$_eskm_st" "$_eskm_act"; return;; esac;;
esac;;
esac
_bnf_gen_emit_kw_merge "$_eskm_st" "$_eskm_kw" "$_eskm_nkw" "$_eskm_ch"
}
# Emit merged keyword dispatch block for a state with multiple keyword actions.
# $1=state, $2=keyword actions (space-sep), $3=non-keyword action, $4=current char.
_bnf_gen_emit_kw_merge () {
local _km_st="$1" _km_kw_acts="$2" _km_non_kw="$3" _km_ch="$4"
local _ka _kw_str _any_kw_listed= _merge_cont= _rest _mc _mkw _mcont
local _clentry _clpat _clrest _clst2 _nk_rest _nk_code _nk_cont _kw_ident_code
_printr1 " $_km_st)"
_printr1 " ast_more; MATCH=\"\${CODE%%[!a-zA-Z0-9_]*}\""
for _ka in $_km_kw_acts; do
_kw_str="${_ka#*=}"
case "$_ka" in OPEN_*) _kw_str="${_kw_str#*=}";; esac
_kw_str="${_kw_str%%=*}"
case "$_GKW_LIST" in *"$_kw_str"*) _any_kw_listed=1;; esac
done
case "$_any_kw_listed" in 1)
_printr1 " _ucase \"\$MATCH\"";;
esac
_printr1 " case \"\$REPLY\" in"
# Emit each keyword branch
for _ka in $_km_kw_acts; do
case "$_ka" in
OPEN_KW=*=*=*)
_rest="${_ka#OPEN_KW=}"
_mc="${_rest%%=*}"; _rest="${_rest#*=}"
_mkw="${_rest%%=*}"; _mcont="${_rest#*=}"
_merge_cont="$_mcont"
_mkw_lc=$_mkw
case "$_GKW_CASE" in lower) _lcase_str "$_mkw"; _mkw_lc=$REPLY;; esac
_printr1 " '$_mkw') CONSUMED='$_mkw_lc'; CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH}))"
_printr1 " ast_$_mc; ast_close; STATE=$_mcont; continue;;";;
OPEN_KWSKIP_CONT=*)
_rest="${_ka#OPEN_KWSKIP_CONT=}"
_mc="${_rest%%=*}"; _rest="${_rest#*=}"
_mkw="${_rest%%=*}"; _mcont="${_rest#*=}"
_merge_cont="$_mcont"
_printr1 " '$_mkw') STATE=$_mcont; ast_$_mc; CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH})); continue;;";;
SKIP_KW_CONT=*)
_rest="${_ka#SKIP_KW_CONT=}"
_mkw="${_rest%%=*}"; _mcont="${_rest#*=}"
case "$_merge_cont" in '') _merge_cont="$_mcont";; esac
_printr1 " '$_mkw') CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH})); STATE=$_mcont; continue;;";;
SKIP_KW_CLOSE=*)
_mkw="${_ka#SKIP_KW_CLOSE=}"
_printr1 " '$_mkw') CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH})); ast_close; continue;;";;
esac
done
# Fallback: non-keyword action, class entry match, ident fallback, or error
case "$_km_non_kw" in '')
for _clentry in $_DA_CLASS; do
_clpat="${_clentry%%=*}"
case "$_km_ch" in ${_clpat}*) ;; *) continue;; esac
_clrest="${_clentry#*=}"
_clst2="${_clrest%%=*}"
case "$_clst2" in "$_km_st")
_km_non_kw="${_clrest#*=}"; break;; esac
done;; esac
case "$_km_non_kw" in
PUSH_CONT=*)
_nk_rest="${_km_non_kw#PUSH_CONT=}"
_nk_code="${_nk_rest%%=*}"; _nk_cont="${_nk_rest#*=}"
_printr1 " *) STATE=$_nk_cont; ast_$_nk_code; continue;;";;
OPEN_CONT=*)
_nk_rest="${_km_non_kw#OPEN_CONT=}"
_nk_code="${_nk_rest%%=*}"; _nk_cont="${_nk_rest#*=}"
_printr1 " *) STATE=$_nk_cont; ast_$_nk_code; ast_skip; continue;;";;
*)
eval "_kw_ident_code=\"\${_RCODE_$_GKW_RULE:-}\""
case "$_kw_ident_code" in ?*)
_printr1 " *) CONSUMED=\"\$MATCH\"; CODE=\"\${CODE#\"\$MATCH\"}\"; _COL=\$((_COL+\${#MATCH}))"
_printr1 " ast_$_kw_ident_code; ast_close; STATE=${_merge_cont:-}; continue;;";;
*) _printr1 " *) _error KEYWORD;;";;
esac;;
esac
_printr1 " esac;;"
}
# -- Dispatch structure emission --
# Emit wildcard entries + galt cascade + error fallback for a dispatch block.
# $1=emitted states $2=left delimiter $3=right delimiter (for state match check)
_bnf_gen_emit_wild_fallback () {
local _wentry _wst _wact
for _wentry in $_DA_WILD; do
_wst="${_wentry%%=*}"; _wact="${_wentry#*=}"
case "$1" in *"$2$_wst$3"*) continue;; esac
_printn1 " $_wst)"
_bnf_gen_emit_action "$_wact"
done
_bnf_gen_emit_galt "$1"
_printr1 " *) _pars_err;;"
_printr1 " esac;;"
}
# Emit auto-close cases for alternation/transparent states.
# $1=skip filter (pipe-delimited states to exclude, or empty for none)
_bnf_gen_emit_galt () {
case "$_GALT" in '') return;; esac
local _galt_skip="$1" _filt= _galt_rest="$_GALT" _gs
# Walk pipe-delimited _GALT, filtering out already-emitted states
while :; do
case "$_galt_rest" in
*'|'*) _gs="${_galt_rest%%\|*}"; _galt_rest="${_galt_rest#*\|}";;
?*) _gs="$_galt_rest"; _galt_rest=;;
*) break;;
esac
case "$_galt_skip" in *"$_gs="*|*"|$_gs|"*) ;;
*) case "$_filt" in '') _filt="$_gs";; *) _filt="$_filt|$_gs";; esac;; esac
done
case "$_filt" in ?*)
# Split no-collapse states from regular collapse states
local _col_filt= _nocol_filt=
local _filt_rest="$_filt" _fs
while :; do
case "$_filt_rest" in
*'|'*) _fs="${_filt_rest%%\|*}"; _filt_rest="${_filt_rest#*\|}";;
?*) _fs="$_filt_rest"; _filt_rest=;;
*) break;; esac
case "$_GNOCOL" in *" $_fs "*|*" $_fs")
case "$_nocol_filt" in '') _nocol_filt="$_fs";; *) _nocol_filt="$_nocol_filt|$_fs";; esac;;
*) case "$_col_filt" in '') _col_filt="$_fs";; *) _col_filt="$_col_filt|$_fs";; esac;;
esac
done
_bnf_gen_emit_close_case "$_col_filt" "ast_close_col"
_bnf_gen_emit_close_case "$_nocol_filt" "ast_close"
;; esac
}
# Emit a close-case line with optional precedence reset.
# $1=state list, $2=close action (ast_close or ast_close_col)
_bnf_gen_emit_close_case () {
case "$1" in ?*)
case "$_GPREC_RULE" in ?*)
_printr1 " $1) ${2}_xc;;";;
*) _printr1 " $1) $2; continue;;";;
esac;; esac
}
# Emit a for-each-child loop that joins results with a separator.
# $1=state code, $2=separator (empty=newline-join for line-oriented, concat otherwise)
_bnf_gen_emit_child_loop () {
_printr1 " $1) _r="
_printr1 " for _ch in \"\$@\"; do"
case "$2" in
?*) _printr1 " case \"\$_r\" in ?*) _r=\"\$_r$2 \";; esac";;
*) case "$_GCMT" in ?*)
_printr1 " case \"\$_r\" in ?*) _r=\"\$_r\$_EOL\";; esac";; esac;;
esac
_printr1 " _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\""
_printr1 " done; REPLY=\"\$_r\";;"
}
# Cluster overlapping class patterns for dispatch emission.
# Sets _cls_ngrp and _CLS_GRP_<n> (read by caller via dynamic scoping).
_bnf_gen_emit_cls_cluster () {
local _gcc_pats= _gcc_done= _clentry _clpat _cpat _op _tc _gp
local _gcc_found _gcc_gi _gcc_ov _m1 _m2
for _clentry in $_DA_CLASS; do
_clpat="${_clentry%%=*}"
case "$_gcc_done" in *"|$_clpat|"*) ;; *)
_gcc_done="$_gcc_done|$_clpat|"
_gcc_pats="$_gcc_pats $_clpat";;
esac
done
_cls_ngrp=0
for _cpat in $_gcc_pats; do
_gcc_found=
_gcc_gi=0
while test $_gcc_gi -lt $_cls_ngrp; do
eval "_gp=\"\$_CLS_GRP_$_gcc_gi\""
for _op in $_gp; do
_gcc_ov=0
for _tc in a m z A M Z 0 5 9 _ . : '#' '+' '-' '!' '?' '*' '/'; do
_m1=0; _m2=0
case "$_tc" in ${_cpat}*) _m1=1;; esac
case "$_tc" in ${_op}*) _m2=1;; esac
case "$_m1$_m2" in 11) _gcc_ov=1; break;; esac
done
case $_gcc_ov in 1) _gcc_found=$_gcc_gi; break 2;; esac
done
_gcc_gi=$((_gcc_gi + 1))
done
case "$_gcc_found" in
?*) eval "_CLS_GRP_$_gcc_found=\"\$_CLS_GRP_$_gcc_found \$_cpat\"";;
*) eval "_CLS_GRP_$_cls_ngrp=\"\$_cpat\""
_cls_ngrp=$((_cls_ngrp + 1));;
esac
done
}
# -- Parser structure emission --
# Emit glob variables for char classes containing ' and string accumulators (file scope).
_bnf_gen_emit_glob_vars () {
case "$_GLOB_VARS" in ?*)
_printr1 ""
for _gvrc in $_GLOB_VARS; do
eval "_gvpat=\"\$_GLOB_PAT_$_gvrc\""
_esc_sq "$_gvpat"
_printr1 "_${_GN}_gp_$_gvrc='[$REPLY]*'"
done
;; esac
local _geg_i=1
while test $_geg_i -le $_GSTR_N; do
eval "_sgvar=\"\${_STR_GLOBVAR_$_geg_i:-}\""
case "$_sgvar" in ?*) _printr1 "$_sgvar";; esac
_geg_i=$((_geg_i + 1))
done
}
# Emit expected-token variables for error messages (file scope).
_bnf_gen_emit_exp_tokens () {
_printr1 ""
for _sc in $_GST; do
eval "_exp=\"\${_EXP_$_sc:-}\""
case "$_exp" in ?*)
_esc_sq "$_exp"
_printr1 "_EXP_$_sc='$REPLY'";;
esac
done
}
# Emit precedence climbing: _steal alias and _prec_<name> function.
_bnf_gen_emit_prec () {
case "$_GPREC_RULE" in ?*)
_printr1 ""
_printr1 "# Steal last sibling from parent, make it first child of current NODE"
_printr1 "alias _steal='eval \"_W=\\\"\\\${X\$PARN##*\\\" \\\"}\\\""
_printr1 " X\$PARN=\\\"\\\${X\$PARN% *}\\\""
_printr1 " X\$NODE=\\\"\\\$X\$NODE \\\$_W\\\"\"'"
_printr1 ""
_printr1 "_${_GN}_parser_prec () {"
_printr1 " case \"\$1\" in"
local _gep_i=1
while test $_gep_i -le $_GPREC_N; do
eval "_po=\"\$_GPREC_O_$_gep_i\""
eval "_pp=\"\$_GPREC_P_$_gep_i\""
_printr1 " '$_po') REPLY=$_pp;;"
_gep_i=$((_gep_i + 1))
done
case "$_GTERNARY" in 1)
_printr1 " '$_GTERNARY_OPEN') REPLY=$_GTERNARY_PREC;;";;
esac
_gep_i=1
while test $_gep_i -le $_GUNARY_N; do
eval "_po=\"\$_GUNARY_O_$_gep_i\""
eval "_pp=\"\$_GUNARY_P_$_gep_i\""
_printr1 " '$_po') REPLY=$_pp;;"
_gep_i=$((_gep_i + 1))
done
_printr1 " *) REPLY=0;;"
_printr1 " esac"
_printr1 "}"
;; esac
}
# Emit char-class accumulator fast paths.
# Sets _ACCUM_STATES and _GLOB_VARS (read by caller via dynamic scoping).
_bnf_gen_emit_fp_charclass () {
local _ccquant _ccval _glob _ccraw _has_dash _dtmp _drem _c _ccesc
local _glob_var _numval _closeseq _cs1 _cs2 _has_bracket _cs2pat
_ACCUM_STATES=
_GLOB_VARS=
for _r in $_GR; do
eval "_rcode=\$_RCODE_$_r"
eval "_accum=\"\${_ACCUM_$_rcode:-}\""
case "$_accum" in '') continue;; esac
_ACCUM_STATES="$_ACCUM_STATES $_rcode"
_ccquant="${_accum%%:*}"; _ccval="${_accum#*:}"
case "$_ccval" in
'^'*) _ccraw="${_ccval#^}";;
*) _ccraw="!${_ccval}";;
esac
_has_dash=
_strip_bsdash "$_ccraw"
case "$REPLY" in "$_ccraw") ;; *) _has_dash=1; _ccraw="$REPLY";; esac
case "$_ccraw" in *-) _has_dash=1; _ccraw="${_ccraw%-}";; esac
_has_bracket=
case "$_ccraw" in *'\'*)
_dtmp=; _drem="$_ccraw"
while :; do
case "$_drem" in
*'\'*) _dtmp="$_dtmp${_drem%%\\*}"
_drem="${_drem#*\\}"
_c="${_drem%"${_drem#?}"}"; _drem="${_drem#?}"
case "$_c" in
']') _has_bracket=1;;
'-') _has_dash=1;;
*) _dtmp="$_dtmp$_c";;
esac;;
*) _dtmp="$_dtmp$_drem"; break;; esac
done
_ccraw="$_dtmp";;
esac
case "$_has_bracket" in 1)
case "$_ccraw" in '!'*) _ccraw="!]${_ccraw#!}";; *) _ccraw="]$_ccraw";; esac;; esac
case "$_has_dash" in 1) _ccraw="$_ccraw-";; esac
_esc_dq_only "$_ccraw"; _ccesc="$REPLY"; _glob_var=
case "$_ccesc" in *"'"*)
_glob_var="_${_GN}_gp_$_rcode"
_glob="\$$_glob_var"
_GLOB_VARS="$_GLOB_VARS $_glob_var=[$_ccesc]*";;
*) _glob="[$_ccesc]*";;
esac
_numval=
case "$_GVALNUM" in 1)
case "$_r" in "$_GNUM"|number) _numval=1;; esac;; esac
case "$_ccquant" in Bp|Bs)
eval "_closeseq=\"\${_CLOSESEQ_$_rcode:-}\""
case "$_closeseq" in
??*)
_cs1="${_closeseq%"${_closeseq#?}"}"; _cs2="${_closeseq#?}"
_cs2="${_cs2%"${_cs2#?}"}"
_printr1 " # $_r: accumulate [$_ccval], close on $_closeseq"
_printr1 " $_rcode) ast_more; REST=\"\${CODE%%$_glob}\""
_printr1 " case \"\$REST\" in ?*) ast_bulk_nl;; *)"
case "$_cs2" in
'-') _cs2pat='[-]';; ']') _cs2pat='[]]';; *) _cs2pat="['${_cs2}']";; esac
_printr1 " case \$CODE in '${_cs1}'${_cs2pat}*) ast_close;$_xc continue;; '${_cs1}'*)"
_printr1 " ast_consume; continue;; *) ast_close;$_xc continue;; esac;; esac;;";;
*)
_printr1 " # $_r: accumulate [$_ccval]"
_printr1 " $_rcode) ast_more; REST=\"\${CODE%%$_glob}\""
case "$_numval" in 1)
_printr1 " case \"\$REST\" in ?*) ast_bulk;; *) _numck; ast_close;$_xc continue;; esac;;";;
*)
_printr1 " case \"\$REST\" in ?*) ast_bulk_nl;; *) ast_close;$_xc continue;; esac;;";;
esac;;
esac
;; esac
done
}
# Emit fast paths: extern handlers, string/number/char-class accumulators,
# whitespace skip, and comment handling.
_bnf_gen_emit_fast_paths () {
local _ext _ext_code _sfc _ch _str_rule _str_code _str_close _str_esc _str_embed_stop
local _r _rcode _accum _has_bracket _cs2pat _ws _sc _str_skip _xc
# When precedence climbing is active, fast-path closures must set _XC=1
# so the postfix/binary operator dispatch fires on the next iteration.
case "$_GPREC_RULE" in ?*) _xc=' _XC=1; _PREV=;';; *) _xc=;; esac
_printr1 ""
_printr1 " # --- Fast paths (bulk accumulation) ---"
_printr1 " case \$STATE in"
# Extern rule handlers: call external function when state is entered
for _ext in $_GEXTERN; do
eval "_ext_code=\"\${_RCODE_$_ext:-}\""
case "$_ext_code" in ?*)
_printr1 " # $_ext: externally implemented"
_printr1 " $_ext_code) _parse_${_ext}_$_GN; continue;;";;
esac
done
# String accumulator fast paths (one per #!string directive)
local _gef_si=1
while test $_gef_si -le $_GSTR_N; do
eval "_str_rule=\"\$_GSTR_RULE_$_gef_si\""
eval "_str_code=\"\$_GSTR_CODE_$_gef_si\""
eval "_str_close=\"\$_GSTR_CLOSE_$_gef_si\""
eval "_str_esc=\"\${_GSTR_ESC_$_gef_si:-}\""
eval "_str_embed_stop=\"\${_GSTR_EMBED_STOP_$_gef_si:-}\""
_printr1 " # $_str_rule: accumulate (stops at close/esc)"
_printn1 " $_str_code) case \$CODE in "
case "$_str_close" in
'"') _printn1 "'\"'*";;
"'") _printn1 "\"'\"*";;
*) _printn1 "'$_str_close'*";;
esac
case "$_str_esc" in json|simple)
_printn1 "|'\\'*";;
esac
_sfc="$_str_embed_stop"
while test ${#_sfc} -gt 0; do
_ch="${_sfc%"${_sfc#?}"}"; _sfc="${_sfc#?}"
case "$_ch" in '$') _printn1 "|'\$'*";; *) _printn1 "|'$_ch'*";; esac
done
_printr1 "|'') ;; *)"
_printr1 " ast_more; REST=\"\${CODE%%\$_${_GN}_sg_${_str_code}}\"; ast_bulk_nl;; esac;;"
_gef_si=$((_gef_si + 1))
done
# Number accumulator fast path
case "$_GNUM" in ?*)
_printr1 " $_GNUM_CODE) case \$CODE in [0-9.eE+-]*)"
case "$_GVALNUM" in 1)
_printr1 " ast_more; REST=\"\${CODE%%[!0-9.eE+-]*}\"; ast_bulk;;"
_printr1 " *) _numck; ast_close;$_xc continue;; esac;;"
;; *)
_printr1 " ast_more; REST=\"\${CODE%%[!0-9.eE+-]*}\"; ast_bulk;;"
_printr1 " *) ast_close;$_xc continue;; esac;;"
;; esac
;; esac
_bnf_gen_emit_fp_charclass
# Whitespace skip (all states EXCEPT accumulators and wildcard-entry states)
case "$_GW" in skip|token|line)
_ws=
for _sc in $_GST; do
_gef_si=1; _str_skip=
while test $_gef_si -le $_GSTR_N; do
eval "_str_code=\"\$_GSTR_CODE_$_gef_si\""
case "$_sc" in "$_str_code") _str_skip=1; break;; esac
_gef_si=$((_gef_si + 1))
done
case "$_str_skip" in 1) continue;; esac
case "$_GNUM" in ?*) case "$_sc" in "$_GNUM_CODE") continue;; esac;; esac
case "$_ACCUM_STATES" in *" $_sc "*|*" $_sc") continue;; esac
# Exclude wildcard-entry states from whitespace skip ONLY if they
# are NOT auto-close states. Auto-close states need whitespace
# skipped before the closing terminal can match.
case "$_DA_WILD" in *" $_sc="*)
case "$_GALT" in *"|$_sc|"*|"$_sc|"*|*"|$_sc") ;; *) continue;; esac;; esac
case "$_GDONE" in *" $_sc "*|*" $_sc") continue;; esac
_ws="${_ws}${_sc}|"
done
_ws="${_ws%\|}"
_printr1 " $_ws)"
case "$_GW" in
line)
_printr1 " case \$CODE in ' '*|\"\$_TAB\"*)"
_printr1 " ast_skip_ws; continue;; esac;;"
;;
*)
_printr1 " case \$CODE in ' '*|\"\$_TAB\"*|\"\$_EOL\"*)"
_printr1 " ast_skip; continue;; esac;;"
;;
esac
;; esac
_printr1 " esac"
# Comment handling
case "$_GCMT" in ?*)
_printr1 ""
case "$_GCMT_END" in
'') # Line comment (skip to end of line)
_printr1 " # Line comment skip"
_printr1 " case \$CODE in '$_GCMT'*)"
_printr1 " ast_cmt_line;;"
_printr1 " esac";;
*) # Block comment (skip until end delimiter)
_printr1 " # Block comment skip"
_printr1 " case \$CODE in '$_GCMT'*)"
_printr1 " ast_cmt_block;;"
_printr1 " esac";;
esac
;; esac
}
# Emit expression completion block (precedence climbing).
# Handles postfix operators, binary operator peek, precedence comparison,
# operator consumption, ternary operators, and close cascading.
_bnf_gen_emit_prec_climb () {
case "$_GPREC_RULE" in ?*)
_printr1 ""
_printr1 " # --- Expression completion (precedence climbing) ---"
local _gepc_xc="$_GPREC_RULE_CODE|$_GPREC_BIN"
case "$_GTERNARY" in 1) _gepc_xc="$_gepc_xc|$_GTERNARY_STATE|$_GTERNARY_DONE";; esac
_printr1 " case \$_XC in 1) _XC=0; case \$STATE in $_gepc_xc)"
# Skip whitespace
case "$_GW" in skip)
_printr1 " case \"\$CODE\" in ' '*|\"\$_TAB\"*|\"\$_EOL\"*) ast_skip_wse;; esac";;
esac
# Postfix operator dispatch (before binary op peek)
case "$_GPOST_N" in 0) ;; *)
_printr1 " case \"\$CODE\" in"
_list_find_max_len "_GPOST_OPEN" "$_GPOST_N"
local _gepc_plen=$REPLY
while test $_gepc_plen -gt 0; do
local _gepc_i=1
while test $_gepc_i -le $_GPOST_N; do
eval "_po=\"\$_GPOST_OPEN_$_gepc_i\""
eval "_pst=\"\$_GPOST_STATE_$_gepc_i\""
case ${#_po} in "$_gepc_plen")
_cg_case_pat "$_po"
eval "_pi=\"\${_GPOST_INNER_$_gepc_i:-}\""
eval "_pc=\"\${_GPOST_CLOSE_$_gepc_i:-}\""
case "$_pi$_pc" in
'') # Close-less postfix (e.g. ++): steal, close, re-enter prec climbing
_printr1 " ${REPLY}*) CODE=\"\${CODE#\"$_po\"}\"; _COL=\$((_COL+${#_po})); ast_$_pst; _steal; ast_close_xc;;";;
*) _printr1 " ${REPLY}*) CODE=\"\${CODE#\"$_po\"}\"; _COL=\$((_COL+${#_po})); ast_$_pst; _steal; continue;;";;
esac;;
esac
_gepc_i=$((_gepc_i + 1))
done
_gepc_plen=$((_gepc_plen - 1))
done
_printr1 " esac";;
esac
# Peek at operators — symbolic ops use char patterns, keyword ops use word peek
_printr1 " _OP="
local _has_kw_ops= _has_sym_ops= _gepc_i=1
while test $_gepc_i -le $_GPREC_N; do
eval "_po=\"\$_GPREC_O_$_gepc_i\""
case "$_po" in [a-zA-Z]*) _has_kw_ops=1;; *) _has_sym_ops=1;; esac
_gepc_i=$((_gepc_i + 1))
done
# Keyword operators: peek at word, uppercase, match
case "$_has_kw_ops" in 1)
_printr1 " case \"\$CODE\" in [a-zA-Z_]*)"
_printr1 " ast_more; MATCH=\"\${CODE%%[!a-zA-Z0-9_]*}\""
_printr1 " _ucase \"\$MATCH\""
_printr1 " case \"\$REPLY\" in"
_gepc_i=1
while test $_gepc_i -le $_GPREC_N; do
eval "_po=\"\$_GPREC_O_$_gepc_i\""
eval "_pp=\"\$_GPREC_P_$_gepc_i\""
case "$_po" in [a-zA-Z]*)
_printr1 " '$_po') _OP=\"$_po\"; _np=$_pp;;";;
esac
_gepc_i=$((_gepc_i + 1))
done
_printr1 " esac;; esac";;
esac
# Symbolic operators: char-pattern peek (longest match first)
case "$_has_sym_ops" in 1)
_printr1 " case \"\$CODE\" in"
_list_find_max_len "_GPREC_O" "$_GPREC_N" '[a-zA-Z]*'
local _gepc_len=$REPLY
while test $_gepc_len -gt 0; do
_gepc_i=1
while test $_gepc_i -le $_GPREC_N; do
eval "_po=\"\$_GPREC_O_$_gepc_i\""
eval "_pp=\"\$_GPREC_P_$_gepc_i\""
case "$_po" in [a-zA-Z]*) _gepc_i=$((_gepc_i + 1)); continue;; esac
case ${#_po} in "$_gepc_len")
_cg_case_pat "$_po"
_printr1 " ${REPLY}*) _OP=\"$_po\"; _np=$_pp;;";;
esac
_gepc_i=$((_gepc_i + 1))
done
_gepc_len=$((_gepc_len - 1))
done
case "$_GTERNARY" in 1)
_cg_case_pat "$_GTERNARY_OPEN"
_printr1 " ${REPLY}*) _OP=\"$_GTERNARY_OPEN\"; _np=$_GTERNARY_PREC;;";;
esac
_printr1 " esac";;
esac
# If operator found: precedence comparison + consume
local _gepc_ps="$_GPREC_BIN"
case "$_GTERNARY" in 1) _gepc_ps="$_gepc_ps|$_GTERNARY_STATE|$_GTERNARY_DONE";; esac
_printr1 " case \"\$_OP\" in ?*)"
_printr1 " case \$STATE in $_gepc_ps)"
_printr1 " _W=\"\${NODES##*\" \"}\"; eval \"_W=\\\"\\\${V\$_W:-}\\\"\""
_printr1 " _${_GN}_parser_prec \"\$_W\"; _cp=\$REPLY"
# Build right-assoc check (includes ternary — always right-associative)
local _right_ops= _gepc_i=1
while test $_gepc_i -le $_GPREC_N; do
eval "_pa=\"\$_GPREC_A_$_gepc_i\""
eval "_po=\"\$_GPREC_O_$_gepc_i\""
case "$_pa" in right) _right_ops="$_right_ops|'$_po'";; esac
_gepc_i=$((_gepc_i + 1))
done
case "$_GTERNARY" in 1)
_right_ops="$_right_ops|'$_GTERNARY_OPEN'";; esac
case "$_right_ops" in ?*)
_right_ops="${_right_ops#\|}"
_printr1 " case \"\$_OP\" in $_right_ops) case \$((_np < _cp)) in 1) ast_close_xc;; esac;;"
_printr1 " *) case \$((_np <= _cp)) in 1) ast_close_xc;; esac;; esac";;
*) _printr1 " case \$((_np <= _cp)) in 1) ast_close_xc;; esac";;
esac
_printr1 " ;; esac"
# Consume operator, create binary op node, steal previous atom
case "$_has_kw_ops" in 1)
_printr1 " case \"\$_OP\" in [A-Z]*) CONSUMED=\"\$_OP\"; ast_skip_match;;"
_printr1 " *) ast_consume_op;; esac";;
*) _printr1 " ast_consume_op";;
esac
case "$_GTERNARY" in 1)
_printr1 " case \"\$_OP\" in '$_GTERNARY_OPEN') ast_$_GTERNARY_STATE; _steal; continue;; esac";;
esac
_printr1 " ast_$_GPREC_BIN; _steal; continue"
_printr1 " ;; esac"
# Not an operator: close binary/ternary op, or close expr
case "$_GTERNARY" in 1)
_printr1 " case \$STATE in $_GTERNARY_STATE) STATE=$_GTERNARY_COLON; continue;; esac"
_printr1 " case \$STATE in $_GTERNARY_DONE) ast_close_xc;; esac";; esac
_printr1 " case \$STATE in $_GPREC_BIN) ast_close_xc;; esac"
_printr1 " case \$STATE in $_GPREC_RULE_CODE) ast_close; _PREV=; continue;; esac"
_printr1 " ;; esac;; esac"
;; esac
}
_bnf_gen_emit () {
# --- Prologue ---
_printr1 "use ast_core"
case "$_GKW_LIST$_GPREC_RULE" in ?*)
_printr1 "use ast_consume";; esac
case "$_GPREC_RULE" in ?*)
_printr1 "use ast_prec";; esac
case "$_GCMT" in ?*)
_printr1 "use ast_comment";; esac
case "$_GKW_LIST" in ?*)
_printr1 "use str_core";; esac
_printr1 ""
# Emit literal token aliases (no eval/ast_tokens — ksh93 can't nest eval)
for _etk in $_tok; do
_printr1 "alias ast_$_etk=\"ast_new;STATE=$_etk;ast_push\""
done
# Emit state-code-to-rule-name mapping as comment
_printr1 ""
_printr1 "# State codes:"
_printn1 "# $_GDC=_doc_"
for _mr in $_GR; do
eval "_mc=\$_RCODE_$_mr"
_printn1 " $_mc=$_mr"
done
_printr1 ""
# Also list continuation states
local _cont_list=
for _sc in $_GST; do
# Skip doc root and rule codes (already listed)
case "$_sc" in "$_GDC") continue;; esac
_bnf_gen_code_to_name "$_sc"
case "$REPLY" in ?*) continue;; esac
_cont_list="$_cont_list $_sc"
done
case "$_cont_list" in ?*)
_printn1 "# cont:"
for _sc in $_cont_list; do _printn1 " $_sc"; done
_printr1 ""
;; esac
_bnf_gen_emit_glob_vars
_bnf_gen_emit_exp_tokens
_bnf_gen_emit_prec
_printr1 ""
_printr1 "${_GN}_parser () {"
_printr1 " local CODE= STATE=$_rc V=0 CONSUMED= STATES= NODES=\" 0\" X0=\"$_rc\" \\"
_printr1 " NODE= PARN= PARNT= SIBL= REST= MATCH= _a= _W= _ST= _D= _C= _pq= \\"
_printr1 " _EOF=0 _line= _PREV= _PLEN= _PLC=0 _JT=0 \\"
case "$_GPREC_RULE" in ?*)
_printr1 " _XC=0 _OP= _np=0 _cp=0 \\";;
esac
case "$_GCMT_END" in ?*)
_printr1 " _CMT_S='$_GCMT' _CMT_SL=${#_GCMT} _CMT_E='$_GCMT_END' _CMT_EL=${#_GCMT_END} \\";;
esac
_printr1 " _LN=1 _COL=1 _RD=0"
_printr1 ""
_printr1 " while :; do"
_printr1 " pars_progress"
_printr1 " ast_feed"
_bnf_gen_emit_prec_climb
_bnf_gen_emit_fast_paths
}
_bnf_gen_emit_dispatch () {
local _cc _ch _entries _entry _emitted_st _st _act
local _ind_states _clentry _clpat _clrest _clst _clact
_printr1 ""
_printr1 " # --- Character dispatch ---"
_printr1 " case \$CODE in"
# --- Emit merged dispatch for each character ---
for _cc in $_DA_CODES; do
eval "_ch=\"\$_DA_RAW_$_cc\""
eval "_entries=\"\${_DA_$_cc:-}\""
case "$_entries" in '') continue;; esac
_printr1 ""
_bnf_gen_emit_case_pat "$_ch"
_printr1 " case \$STATE in"
# Emit each state:action pair.
# When multiple keyword actions (OPEN_KW with cont, OPEN_KWSKIP_CONT, SKIP_KW_CONT,
# SKIP_KW_CLOSE) exist for the same state, merge them into a single keyword-check
# block with multiple branches. For non-keyword duplicates, first action wins.
_emitted_st=
for _entry in $_entries; do
_st="${_entry%%=*}"
_act="${_entry#*=}"
# Skip if this state was already emitted
case "$_emitted_st" in *"|$_st|"*) continue;; esac
_emitted_st="$_emitted_st|$_st|"
_bnf_gen_emit_state_or_kw_merge "$_st" "$_act" "$_ch"
done
# Also include class dispatch entries for states not covered above.
# This handles chars like 't' that match both individual (keyword) and
# class ([a-zA-Z0-9_-]) patterns — states from the class pattern need
# to be checked here since the individual pattern matches first.
_ind_states=
for _entry in $_entries; do
_ind_states="$_ind_states ${_entry%%=*}="
done
for _clentry in $_DA_CLASS; do
_clpat="${_clentry%%=*}"
case "$_ch" in ${_clpat}*) ;; *) continue;; esac
_clrest="${_clentry#*=}"
_clst="${_clrest%%=*}"
_clact="${_clrest#*=}"
# Skip if this state was already handled (individual or prior class)
case "$_ind_states" in *" $_clst="*) continue;; esac
_ind_states="$_ind_states $_clst="
_printn1 " $_clst)"
_bnf_gen_emit_action "$_clact"
done
# Wildcard entries (from negated class FIRST — for states not yet handled)
_bnf_gen_emit_wild_fallback "$_ind_states" " " "="
done
}
_bnf_gen_emit_dispatch2 () {
# --- Class-based dispatch (after individual chars, before EOF) ---
case "$_DA_CLASS" in ?*)
_bnf_gen_emit_cls_cluster
# Emit one branch per cluster
local _gi=0
while test $_gi -lt $_cls_ngrp; do
eval "_gp=\"\$_CLS_GRP_$_gi\""
# Build union pattern for this cluster
# Strip dashes from inner patterns, collect them, add a single
# trailing dash at the end to avoid creating invalid ranges
# (e.g. [a-z_-0-9] has the invalid range _-0).
local _grp_cls= _grp_done= _grp_dash=
for _cpat in $_gp; do
local _cls_inner="${_cpat#\[}"; _cls_inner="${_cls_inner%\]}"
case "$_grp_done" in *"|$_cpat|"*) ;; *)
_grp_done="$_grp_done|$_cpat|"
# Strip \- (backslash-dash) FIRST, before trailing dash
_strip_bsdash "$_cls_inner"
case "$REPLY" in "$_cls_inner") ;; *)
_grp_dash=1; _cls_inner="$REPLY";; esac
# Strip leading/trailing dashes, track if any existed
case "$_cls_inner" in
-*) _grp_dash=1; _cls_inner="${_cls_inner#-}";;
esac
case "$_cls_inner" in
*-) _grp_dash=1; _cls_inner="${_cls_inner%-}";;
esac
_grp_cls="$_grp_cls$_cls_inner";;
esac
done
# Append a single trailing dash if any source pattern had one
case "$_grp_dash" in 1) _grp_cls="$_grp_cls-";; esac
# Escape " inside bracket expressions for ksh93 compat
_esc_dq_only "$_grp_cls"; _grp_cls="$REPLY"
_printr1 ""
_printr1 " [$_grp_cls]*)"
_printr1 " case \$STATE in"
local _emitted_states=
for _clentry in $_DA_CLASS; do
local _clpat2="${_clentry%%=*}"
# Only include entries from patterns in this cluster
local _in_grp=0
for _cpat in $_gp; do
case "$_clpat2" in "$_cpat") _in_grp=1; break;; esac
done
case $_in_grp in 0) continue;; esac
local _clrest="${_clentry#*=}"
local _clst="${_clrest%%=*}"
local _clact="${_clrest#*=}"
case "$_emitted_states" in *"|$_clst|"*) continue;; esac
_emitted_states="$_emitted_states|$_clst|"
_bnf_gen_emit_state_action "$_clst" "$_clact"
done
_bnf_gen_emit_wild_fallback "$_emitted_states" "|" "|"
_gi=$((_gi + 1))
done
;; esac
_bnf_gen_emit_eof
}
# Emit EOF handler, wildcard fallback, and parser loop closing.
_bnf_gen_emit_eof () {
local _geo_emitted _wentry _wst _wact
_printr1 ""
_printr1 " '')"
_printr1 " case \$STATE in"
_bnf_gen_emit_galt ""
case "$_GPREC_BIN" in ?*)
_printr1 " $_GPREC_BIN) ast_close; continue;;"
_printr1 " $_GPREC_RULE_CODE) ast_close; continue;;"
;; esac
_printr1 " $_rc) break;;"
_printr1 " *) _pars_err_eof;;"
_printr1 " esac;;"
case "$_DA_WILD$_GALT" in ?*)
_printr1 ""
_printr1 " *)"
_printr1 " case \$STATE in"
_geo_emitted=
for _wentry in $_DA_WILD; do
_wst="${_wentry%%=*}"
_wact="${_wentry#*=}"
_geo_emitted="$_geo_emitted|$_wst|"
_printn1 " $_wst)"
_bnf_gen_emit_action "$_wact"
done
_bnf_gen_emit_galt "$_geo_emitted"
_printr1 " *) _pars_err;;"
_printr1 " esac;;"
;; esac
_printr1 " esac"
_printr1 " done"
_printr1 ""
_printr1 " ast_out"
_printr1 "}"
}
# -- AST emitter generation --
# Close current slot without registering NT (for Bm boundaries).
_bnf_gen_tq_close_slot () {
eval "_tq_S$_tq_slotcnt=\"\$_tq_cur_slot\""
_tq_slots="$_tq_slots $_tq_slotcnt"
_tq_slotcnt=$((_tq_slotcnt + 1))
_tq_cur_slot=
}
# Close current slot, register a non-terminal. $1=node ID to register.
_bnf_gen_tq_close_nt () {
_bnf_gen_tq_close_slot
_tq_nts="$_tq_nts $1"
_tq_ntcnt=$((_tq_ntcnt + 1))
}
# Classify AST children into terminal slots and NTs for template emission.
# Walks into Bq (sequence) wrappers. Bt → accumulate, other → close+register.
_bnf_gen_tq_classify () {
for _tqc in "$@"; do
eval "_tqc_t=\"\${X$_tqc%% *}\""
case "$_tqc_t" in
Bt) eval "_v=\"\${V$_tqc:-}\""
case "$_GKW_CASE" in lower) case "$_GKW_LIST" in *"$_v"*)
_lcase_str "$_v"; _v=$REPLY;; esac;; esac
_bnf_gen_tq_accum_term "$_v";;
Bq) eval "set -- \$X$_tqc"; shift; _bnf_gen_tq_classify "$@";;
*) _bnf_gen_tq_close_nt "$_tqc";;
esac
done
}
# Accumulate terminal value into current slot. $1=terminal text.
_bnf_gen_tq_accum_term () {
case "$_tq_cur_slot" in ?*)
case "$1" in [a-zA-Z][a-zA-Z]*) _tq_cur_slot="$_tq_cur_slot $1";; *) _tq_cur_slot="$_tq_cur_slot$1";; esac;;
*) _tq_cur_slot="$1";; esac
}
# Prepare slot string: eval slot variable and shell-escape for double-quote embedding.
# $1=slot index. Result in REPLY.
_bnf_gen_prep_slot () { eval "_s=\"\$_tq_S$1\""; _esc_dq "$_s"; }
# Compute inter-NT slot separator: _bnf_gen_slot_str + _esc_dqin one step.
# $1=slot index, $2=NT node id. Result in REPLY.
_bnf_gen_prep_inter_slot () { _bnf_gen_slot_str "$1" "$2"; _esc_dq "$REPLY"; }
# Helper: determine slot string between consecutive NTs
# For empty slots (no terminal): check if next NT is negated-class accumulator
# $1=slot index, $2=NT node id (next NT)
_bnf_gen_slot_str () {
local _sv _si _nt_node _ch_t _ntn _ntrc _ntac
_sv=; _si=$1; _nt_node=$2
eval "_sv=\"\$_tq_S$_si\""
case "$_sv" in
?*) # Has terminal(s): format and return
# Conditional padding: = gets spaces when _GCMT set, : always gets trailing space
# Keyword-ending slots get trailing space (e.g., "DELETE FROM " before ident)
case "$_sv" in
'=') case "$_GCMT" in ?*) REPLY=" = "; return;; esac;;
':') REPLY=": "; return;;
esac
REPLY="$_sv";;
*) # Empty slot between consecutive NTs
# Check if next NT's rule is a negated-class accumulator
eval "_ch_t=\"\${X$_nt_node%% *}\""
case "$_ch_t" in Bi)
eval "_ntn=\"\${V$_nt_node:-}\""
eval "_ntrc=\"\${_RCODE_$_ntn:-}\""
eval "_ntac=\"\${_ACCUM_$_ntrc:-}\""
case "$_ntac" in B?:'^'*) REPLY=""; return;; esac;;
esac
REPLY=" ";;
esac
}
# Build slot structure for a sequence rule's emitter.
# $1=body_node (Bq AST node), $2=rcode (state code for prefix lookup).
# Sets: _tq_slots, _tq_nts, _tq_ntcnt, _tq_has_bm, _tq_bm_pos,
# _tq_pre_bm, _tq_post_bm, _tq_bm_sep, _tq_opt_start,
# _tq_slotcnt, _tq_S<i> (slot terminal strings).
_bnf_gen_tq_build () {
local _tqb_body=$1 _tqb_rcode=$2
eval "set -- \$X$_tqb_body"; shift
_tq_slots= _tq_nts= _tq_ntcnt=0 _tq_has_bm=0 _tq_bm_pos=0
_tq_cur_slot= _tq_slotcnt=0
_tq_pre_bm=0 _tq_post_bm=0
_tq_bm_sep= _tq_opt_start=
for _mid in "$@"; do
eval "_ch_t=\"\${X$_mid%% *}\""
case "$_ch_t" in
Bt) eval "_v=\"\${V$_mid:-}\""
case "$_GKW_CASE" in lower) case "$_GKW_LIST" in *"$_v"*)
_lcase_str "$_v"; _v=$REPLY;; esac;; esac
_bnf_gen_tq_accum_term "$_v";;
Bm) ;; # handled below
Bo|Ba) # Check if wraps a Bm (e.g., [ member { ',' member } ])
_tq_found_bm=
eval "set -- \$X$_mid"; shift
for _inner in "$@"; do
eval "_it=\"\${X$_inner%% *}\""
case "$_it" in
Bm) _tq_found_bm=$_inner; break;;
Bq) eval "set -- \$X$_inner"; shift
for _iq in "$@"; do
eval "_iqt=\"\${X$_iq%% *}\""
case "$_iqt" in Bm) _tq_found_bm=$_iq; break 2;; esac
done;;
esac
done
eval "set -- \$X$_tqb_body"; shift # restore
case "$_tq_found_bm" in
?*) _mid=$_tq_found_bm; _ch_t=Bm;;
*) # Optional: walk children as normal slots/NTs
local _pre_opt_ntcnt=$_tq_ntcnt
eval "set -- \$X$_mid"; shift
_bnf_gen_tq_classify "$@"
case "$_tq_opt_start" in '') case $((_tq_ntcnt - _pre_opt_ntcnt)) in
0) ;; *) _tq_opt_start=$_pre_opt_ntcnt;; esac;; esac
eval "set -- \$X$_tqb_body"; shift # restore
continue;;
esac;;
esac
# Process Bm (direct or found inside Bo/Ba)
case "$_ch_t" in Bm)
_bnf_gen_tq_close_slot
_tq_has_bm=1; _tq_bm_pos=$_tq_ntcnt
_tq_pre_bm=$_tq_ntcnt
# Extract Bm separator (if inner is seq starting with terminal)
eval "set -- \$X$_mid"; shift
_tq_bm_inner=$1
eval "_ch_t=\"\${X$_tq_bm_inner%% *}\""
case "$_ch_t" in Bq)
eval "set -- \$X$_tq_bm_inner"; shift
eval "_ch_t=\"\${X$1%% *}\""
case "$_ch_t" in Bt) eval "_tq_bm_sep=\"\${V$1:-}\"";; esac;;
esac
eval "set -- \$X$_tqb_body"; shift # restore
continue;;
esac
# Non-terminal (Bi, etc.) that isn't a Bm wrapper
case "$_ch_t" in Bt) ;; *) _bnf_gen_tq_close_nt "$_mid";; esac
done
# Final slot (trailing terminals after last NT)
eval "_tq_S$_tq_slotcnt=\"\$_tq_cur_slot\""
case $_tq_has_bm in 1) _tq_post_bm=$((_tq_ntcnt - _tq_pre_bm));; esac
# Normalize keyword slots: add trailing space (so "SELECT"+ident → "SELECT a")
# and leading space for inter-NT slots (so ident+"FROM"+ident → "a FROM t")
_si=0
while test $_si -lt $_tq_slotcnt; do
eval "_sv=\"\${_tq_S$_si:-}\""
case "$_sv" in *[a-zA-Z][a-zA-Z]) eval "_tq_S$_si=\"\$_sv \"";; esac
case $_si in 0) ;; *)
eval "_sv=\"\${_tq_S$_si:-}\""
case "$_sv" in [a-zA-Z]*) eval "_tq_S$_si=\" \$_sv\"";; esac;;
esac
_si=$((_si + 1))
done
# Prepend inherited prefix to slot 0
eval "_pfx=\"\${_EMIT_PREFIX_$_tqb_rcode:-}\""
eval "_tq_S0=\"\$_pfx\$_tq_S0\""
}
# Try to propagate terminal prefix from rule $_r to its collapsed target.
# Sets _pfx_changed=1 if any prefix was updated (visible to caller).
_bnf_gen_propagate_prefix_for_rule () {
eval "_rcode=\$_RCODE_$_r; _node=\$_RNODE_$_r"
eval "set -- \$X$_node"; shift; _body_node=$1
eval "_bt=\"\${X$_body_node%% *}\""
case "$_bt" in Bq) ;; *) return;; esac
eval "set -- \$X$_body_node"; shift
# Collect leading terminals + single trailing NT ref (no trailing elements)
local _lead= _rest_ref= _has_trail=
for _ch in "$@"; do
eval "_ch_t=\"\${X$_ch%% *}\""
case "$_ch_t" in
Bt) case "$_rest_ref" in
'') eval "_lead=\"\$_lead\${V$_ch:-}\"";;
*) _has_trail=1;;
esac;;
Bi) case "$_rest_ref" in '') _rest_ref="$_ch";; *) _has_trail=1; break;; esac;;
*) case "$_rest_ref" in '') ;; *) _has_trail=1;; esac; break;;
esac
done
case "$_has_trail" in 1) return;; esac
case "$_lead" in '') return;; esac
case "$_rest_ref" in '') return;; esac
# Skip rules with no-collapse done states
eval "_done_code=\"\${_RCODE__done_$_rcode:-}\""
case "$_done_code" in ?*)
case "$_GNOCOL" in *" $_done_code"*) return;; esac;; esac
eval "_ref_name=\"\${V$_rest_ref:-}\""
eval "_ref_code=\$_RCODE_$_ref_name"
eval "_ref_node=\$_RNODE_$_ref_name"
eval "set -- \$X$_ref_node"; shift; local _ref_body=$1
eval "_rbt=\"\${X$_ref_body%% *}\""
# Check own prefix — skip if cycling
eval "_own_pfx=\"\${_EMIT_PREFIX_$_rcode:-}\""
case "$_own_pfx" in *"$_lead"*) return;; esac
_total_pfx="$_own_pfx$_lead"
# If target is alternation: propagate to each alternative
case "$_rbt" in Ba)
eval "set -- \$X$_ref_body"; shift
for _alt in "$@"; do
eval "_alt_t=\"\${X$_alt%% *}\""
case "$_alt_t" in Bi)
eval "_alt_name=\"\${V$_alt:-}\""
eval "_alt_code=\$_RCODE_$_alt_name"
case "$_alt_code" in "$_rcode") continue;; esac
eval "_cur=\"\${_EMIT_PREFIX_$_alt_code:-}\""
case "$_cur" in "$_total_pfx"*) ;; *)
eval "_EMIT_PREFIX_$_alt_code=\"\$_total_pfx\""
_pfx_changed=1;;
esac;;
esac
done;;
Bi) # Single ref: propagate directly
eval "_cur=\"\${_EMIT_PREFIX_$_ref_code:-}\""
case "$_cur" in "$_total_pfx"*) ;; *)
eval "_EMIT_PREFIX_$_ref_code=\"\$_total_pfx\""
_pfx_changed=1;;
esac;;
esac
}
# Emit sequence template-based emitter case for a Bq rule body.
# Reads _rcode, _body_node from caller scope. Uses _tq_* from _bnf_gen_tq_build.
_bnf_gen_emit_seq () {
_bnf_gen_tq_build "$_body_node" "$_rcode"
case $_tq_has_bm in
0) # Fixed-arity sequence: positional template emit
case $_tq_ntcnt in
0) # All terminals (e.g., cdata_kw = 'C' 'D' 'A' 'T' 'A')
eval "_s=\"\$_tq_S0\""
_esc_dq "$_s"; _printr1 " $_rcode) REPLY=\"$REPLY\";;";;
1) # Single NT: check if accumulator
set -- $_tq_nts; _tq_nt1=$1
eval "_ch_t=\"\${X$_tq_nt1%% *}\""
_tq_is_accum=0
case "$_ch_t" in Bc|Bp|Bs|Bk) _tq_is_accum=1;; esac
_bnf_gen_prep_slot 0; _e0="$REPLY"
_bnf_gen_prep_slot 1; _e1="$REPLY"
case $_tq_is_accum in
1) _printr1 " $_rcode) REPLY=\"${_e0}\$_v${_e1}\";;";;
*) case "$_tq_opt_start" in 0)
_printr1 " $_rcode) case \$# in 0) REPLY=\"${_e0}${_e1}\";; *) _${_GN}_unast_emit \"\$1\"; REPLY=\"${_e0}\${REPLY}${_e1}\";; esac;;";;
*) _printr1 " $_rcode) _${_GN}_unast_emit \"\$1\"; REPLY=\"${_e0}\${REPLY}${_e1}\";;";;
esac;;
esac;;
*) # N>1 NTs: sequential positional emit
set -- $_tq_nts
_bnf_gen_prep_slot 0; _e0="$REPLY"
case "$_tq_opt_start" in
'') # All NTs required: pure positional
_printr1 " $_rcode) _${_GN}_unast_emit \"\$1\"; _r=\"${_e0}\$REPLY\""
_si=1; shift
while test $# -gt 0; do
_bnf_gen_prep_inter_slot $_si "$1"; _se="$REPLY"
case $# in
1) _bnf_gen_prep_slot $((_si + 1)); _ef="$REPLY"
_printr1 " _${_GN}_unast_emit \"\$$((_si + 1))\"; REPLY=\"\${_r}${_se}\${REPLY}${_ef}\";;";;
*) _printr1 " _${_GN}_unast_emit \"\$$((_si + 1))\"; _r=\"\${_r}${_se}\$REPLY\"";;
esac
_si=$((_si + 1)); shift
done;;
*) # Has optional NTs from index _tq_opt_start
case "$_tq_opt_start" in
0) # ALL NTs are optional — S0 already in _r
_printr1 " $_rcode) _r=\"$_e0\""
_printr1 " case \$# in 0) ;; *) _${_GN}_unast_emit \"\$1\"; _r=\"\${_r}\$REPLY\"; shift;; esac"
_si=1;;
*) # Some required, then optional
_printr1 " $_rcode) _${_GN}_unast_emit \"\$1\"; _r=\"${_e0}\$REPLY\""
_si=1; shift
while test $_si -lt $_tq_opt_start; do
_bnf_gen_prep_inter_slot $_si "$1"; _se="$REPLY"
_printr1 " _${_GN}_unast_emit \"\$$((_si + 1))\"; _r=\"\${_r}${_se}\$REPLY\""
_si=$((_si + 1)); shift
done
_printr1 " shift $_tq_opt_start";;
esac
_printr1 " _si=$_si"
while test $# -gt 0; do
_bnf_gen_prep_inter_slot $_si "$1"; _se="$REPLY"
_printr1 " case \$# in 0) ;; *) _${_GN}_unast_emit \"\$1\"; _r=\"\${_r}${_se}\$REPLY\"; shift;; esac"
_si=$((_si + 1)); shift
done
_bnf_gen_prep_slot $_tq_slotcnt; _ef="$REPLY"
_printr1 " REPLY=\"\${_r}${_ef}\";;";;
esac;;
esac;;
1) # Variable-arity: has embedded Bm (shift-based emit)
_bnf_gen_prep_slot 0; _e0="$REPLY"
_bnf_gen_prep_slot $_tq_slotcnt; _final_slot="$REPLY"
_bm_sfmt=" "
case "${_tq_bm_sep:-}" in ?*)
case "$_tq_bm_sep" in
'=') case "$_GCMT" in ?*) _bm_sfmt=" $_tq_bm_sep ";; *) _bm_sfmt="$_tq_bm_sep";; esac;;
':') _bm_sfmt="$_tq_bm_sep ";;
*) _bm_sfmt="$_tq_bm_sep";;
esac;;
esac
_esc_dq "$_bm_sfmt"; _bm_sfmt="$REPLY"
case $_tq_post_bm in
0) _printr1 " $_rcode) _r=\"$_e0\""
_printr1 " for _ch in \"\$@\"; do"
_printr1 " case \"\$_r\" in \"$_e0\") ;; *) _r=\"\$_r$_bm_sfmt\";; esac"
_printr1 " _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\""
_printr1 " done; REPLY=\"\$_r$_final_slot\";;";;
*) set -- $_tq_nts
case $_tq_pre_bm in
0) _printr1 " $_rcode) _r=\"$_e0\"";;
*) _printr1 " $_rcode) _${_GN}_unast_emit \"\$1\"; _r=\"${_e0}\$REPLY\"; shift"
_si=1; shift
while test $_si -lt $_tq_pre_bm; do
_bnf_gen_prep_inter_slot $_si "$1"; _se="$REPLY"
_printr1 " _${_GN}_unast_emit \"\$1\"; _r=\"\${_r}${_se}\$REPLY\"; shift"
_si=$((_si + 1)); shift
done;;
esac
_printr1 " while test \$# -gt $_tq_post_bm; do"
_printr1 " case \"\$_r\" in \"$_e0\") ;; *) _r=\"\$_r$_bm_sfmt\";; esac"
_printr1 " _${_GN}_unast_emit \"\$1\"; _r=\"\$_r\$REPLY\"; shift"
_printr1 " done"
# Emit intermediate slot between Bm items and post-Bm NTs
_bnf_gen_prep_slot $((_tq_pre_bm + 1)); _se="$REPLY"
case "$_se" in ?*)
_printr1 " _r=\"\$_r$_se\"";; esac
_printr1 " _${_GN}_unast_emit \"\$1\"; REPLY=\"\$_r\${REPLY}$_final_slot\";;";;
esac;;
esac
}
_bnf_gen_emit_ast () {
# --- Compute emitter prefix chains for collapsed intermediates ---
# Propagate terminal prefixes from sequences (e.g., '(' value ')') down to
# the collapsed child rule, so the emitter reconstructs leading terminals.
local _pfx_changed=1 _pfx_pass=0
local _r _rcode _node _body_node _bt _is_str _si _str_close _str_rule _mid _ch_t
local _ba_tval _ba_tcnt _ba_esc _bm_sep _bm_inner _bm_inner_t _bm_first_t
local _tq_nt1 _e0 _e1 _tq_is_accum _se _ef _final_slot _bm_sfmt _s
local _pi _pst _po _pc
while test $_pfx_changed -eq 1; do
_pfx_changed=0; _pfx_pass=$((_pfx_pass + 1))
for _r in $_GR; do _bnf_gen_propagate_prefix_for_rule; done
case $_pfx_pass in 20) break;; esac
done
# --- Emitter function ---
_printr1 ""
_printr1 "# --- Emitter (AST to source reconstruction) ---"
_printr1 ""
_printr1 "_${_GN}_unast_emit () {"
_printr1 " local _n=\$1 _t _v _r _ch"
_printr1 " IFS=' '; eval \"set -- \\\$X\$_n\"; IFS=''"
_printr1 " _t=\$1; shift"
_printr1 " eval \"_v=\\\"\\\${V\$_n:-}\\\"\""
_printr1 ""
_printr1 " case \"\$_t\" in"
# Document root: emit all children (newline-join for line-oriented grammars)
_bnf_gen_emit_child_loop "$_GDC" ""
# Generate emitter case for each rule based on its grammar structure
for _r in $_GR; do
# Skip extern rules — emitter provided externally
case "$_GEXTERN" in *" $_r"*) continue;; esac
eval "_rcode=\$_RCODE_$_r; _node=\$_RNODE_$_r"
eval "set -- \$X$_node"; shift
_body_node=$1
eval "_bt=\"\${X$_body_node%% *}\""
# String accumulator: emit value wrapped in configured delimiter
_is_str= _si=1
while test $_si -le $_GSTR_N; do
eval "_str_rule=\"\$_GSTR_RULE_$_si\""
case "$_r" in "$_str_rule") _is_str=$_si; break;; esac
_si=$((_si + 1))
done
case "$_is_str" in ?*)
eval "_str_close=\"\$_GSTR_CLOSE_$_is_str\""
case "$_str_close" in
'"') _printr1 " $_rcode) REPLY=\"\\\"\$_v\\\"\";;";;
"'") _printr1 " $_rcode) REPLY=\"'\$_v'\";;";;
*) _printr1 " $_rcode) REPLY=\"$_str_close\$_v$_str_close\";;";;
esac
continue;; esac
# Number accumulator: emit raw value
case "$_r" in "$_GNUM")
_printr1 " $_rcode) REPLY=\"\$_v\";;"
continue;; esac
case "$_bt" in
Bc|Bp|Bs|Bk) # Character class accumulator: emit raw value
_printr1 " $_rcode) REPLY=\"\$_v\";;"
continue;;
Bi|Bo) # Single-ref, optional: delegate to child, or emit V for keywords
_printr1 " $_rcode) case \$# in 0) REPLY=\"\$_v\";; *) _${_GN}_unast_emit \"\$1\";; esac;;";;
Ba) # Alternation: scan for a terminal-only branch as fallback
_ba_tval= _ba_tcnt=0
eval "set -- \$X$_body_node"; shift
for _mid in "$@"; do
eval "_ch_t=\"\${X$_mid%% *}\""
case "$_ch_t" in Bt)
eval "_ba_tval=\"\${V$_mid:-}\""
_ba_tcnt=$((_ba_tcnt + 1));;
esac
done
case "$_ba_tcnt" in 1)
# Single terminal branch: fallback to its value when $_v is empty
_esc_dq "$_ba_tval"; _ba_esc="$REPLY"
_printr1 " $_rcode) case \$# in 0) case \"\$_v\" in ?*) REPLY=\"\$_v\";; *) REPLY=\"$_ba_esc\";; esac;; 1) _${_GN}_unast_emit \"\$1\";; *) _r=; for _ch in \"\$@\"; do _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\"; done; REPLY=\"\$_r\";; esac;;";;
*) _printr1 " $_rcode) case \$# in 0) REPLY=\"\$_v\";; 1) _${_GN}_unast_emit \"\$1\";; *) _r=; for _ch in \"\$@\"; do _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\"; done; REPLY=\"\$_r\";; esac;;";;
esac;;
Bm) # Repetition: detect separator from inner grammar and join children
# The Bm body contains the repetition content.
# If it's a sequence starting with a terminal (e.g., { ',' item }),
# that terminal is the separator for joining children.
_bm_sep=
eval "set -- \$X$_body_node"; shift # get Bm's children
_bm_inner=$1
eval "_bm_inner_t=\"\${X$_bm_inner%% *}\""
case "$_bm_inner_t" in Bq)
# Inner is a sequence — check if first element is a terminal
eval "set -- \$X$_bm_inner"; shift
eval "_bm_first_t=\"\${X$1%% *}\""
case "$_bm_first_t" in Bt)
eval "_bm_sep=\"\${V$1:-}\"";;
esac;;
esac
_bnf_gen_emit_child_loop "$_rcode" "$_bm_sep";;
Bq) _bnf_gen_emit_seq;;
Bt) _printr1 " $_rcode) REPLY=\"\$_v\";;";;
esac
done
# Binary operator emitter for precedence climbing
case "$_GPREC_BIN" in ?*)
_printr1 " $_GPREC_BIN) _${_GN}_unast_emit \"\$1\"; _r=\"\$REPLY\""
_printr1 " _${_GN}_unast_emit \"\$2\"; REPLY=\"\$_r\$_v\$REPLY\";;"
;; esac
# Postfix operator emitters
_pi=1
while test $_pi -le $_GPOST_N; do
eval "_pst=\"\$_GPOST_STATE_$_pi\""
eval "_po=\"\$_GPOST_OPEN_$_pi\""
eval "_pc=\"\${_GPOST_CLOSE_$_pi:-}\""
case "$_pc" in ?*)
# Bracket postfix: child1 is LHS, rest are args — emit LHS<open>args<close>
_printr1 " $_pst) _${_GN}_unast_emit \"\$1\"; _r=\"\$REPLY$_po\"; shift"
_printr1 " local _sep="
_printr1 " for _ch in \"\$@\"; do"
_printr1 " case \"\$_sep\" in ?*) _r=\"\$_r, \";; esac"
_printr1 " _${_GN}_unast_emit \"\$_ch\"; _r=\"\$_r\$REPLY\"; _sep=1"
_printr1 " done; REPLY=\"\$_r$_pc\";;";;
*)
eval "_pi2=\"\${_GPOST_INNER_$_pi:-}\""
case "$_pi2" in
'') # Close-less postfix with no inner (e.g. ++): child1 is LHS — emit LHS<op>
_printr1 " $_pst) _${_GN}_unast_emit \"\$1\"; REPLY=\"\$REPLY$_po\";;";;
*) # Simple postfix: child1 is LHS, child2 is inner — emit LHS<open>inner
_printr1 " $_pst) _${_GN}_unast_emit \"\$1\"; _r=\"\$REPLY$_po\""
_printr1 " _${_GN}_unast_emit \"\$2\"; REPLY=\"\$_r\$REPLY\";;";;
esac;;
esac
_pi=$((_pi + 1))
done
# Ternary: child1 is condition, child2 is true branch, child3 is false branch
case "$_GTERNARY" in 1)
_printr1 " $_GTERNARY_STATE) _${_GN}_unast_emit \"\$1\"; _r=\"\$REPLY$_GTERNARY_OPEN\""
_printr1 " _${_GN}_unast_emit \"\$2\"; _r=\"\$_r\$REPLY$_GTERNARY_SEP\""
_printr1 " _${_GN}_unast_emit \"\$3\"; REPLY=\"\$_r\$REPLY\";;";;
esac
_printr1 " *) REPLY=\"??\${_t}??\";;"
_printr1 " esac"
_printr1 "}"
_printr1 ""
_printr1 "_${_GN}_unast_emit_root () { _${_GN}_unast_emit \"\$@\"; }"
_printr1 ""
_printr1 "${_GN}_unast () {"
_printr1 " _readall; eval \"\$REPLY\""
_printr1 " _${_GN}_unast_emit_root 0"
_printr1 " _printr1 \"\$REPLY\""
_printr1 "}"
}
# ============================================================
# Entry Point
# ============================================================
gen_bnf () {
local _outdir="${1:-}"
_readall; eval "$REPLY"
IFS=' '
# Phase 1 scalar outputs
local _GN _GP _GW _GR _G1 _GDC _GST _GSN
local _GNUM _GVALNUM _GSTRICT _GCMT _GCMT_END _GEXTERN
local _GKW_RULE _GKW_LIST _GSTR_SKIP_LIST
local _GPREC_RULE _GPREC_N _GPREC_RULE_CODE _GPREC_BIN
local _GPOST_N _GUNARY_N _GSTR_N
local _GTERNARY _GTERNARY_OPEN _GTERNARY_SEP _GTERNARY_PREC
local _GTERNARY_STATE _GTERNARY_COLON _GTERNARY_DONE
# Phase 2 scalar outputs
local _DA_CODES _DA_CLASS _DA_WILD _DA_INHERIT
local _GALT _GDONE _GNOCOL _GNUM_CODE _GLOB_VARS
# Emit-phase shared
local _rc _tok
_bnf_gen_p1
_bnf_gen_p2
_rc="$_GDC"; _tok="${_GST# }"
# Emit parser module
{ _bnf_gen_emit; _bnf_gen_emit_dispatch; _bnf_gen_emit_dispatch2; } > "$_outdir/parser.sh"
# Emit unast module
{ _printr1 "use ${_GN}_parser"
_printr1 "use io_readall"
_printr1 ""
_bnf_gen_emit_ast
} > "$_outdir/unast.sh"
# Emit reast module
{ _printr1 "use ${_GN}_parser"
_printr1 "use ${_GN}_unast"
_printr1 ""
_printr1 "${_GN}_reast () { ${_GN}_parser | ${_GN}_unast; }"
} > "$_outdir/reast.sh"
}
bnf_gen () { gen_bnf "$@"; }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment