andy0130tw · February 19, 2025 06:03
diff --git a/iv-template-line-parser.ts b/iv-template-line-parser.ts
 import { blockFunctionNameSet } from './names'

 interface InputConfig {
  debug?: boolean
  locations?: boolean
  allowServiceRules?: boolean
 }

 type IVLanguageVersion = '1.0' | '2.0' | '2.1'

 export class IVTemplate {
  constructor(
    readonly version: IVLanguageVersion,
    readonly blocks: Block[]
  ) {}
 }

 class IVSyntaxError extends Error {}

 export const ST = Object.freeze({
  Option: 'option',
  Condition: 'condition',
  Property: 'property',
  Variable: 'variable',
  Function: 'function',
  BlockFunction: 'blockFunction',
  Include: 'include',
  // auxiliary types
  Eof: '_eof',
  AtRule: '_atRule',  // func or blockFunc, based on name
  ReplaceTagAbbr: '_replaceTagAbbr',
  BlockFuncStart: '_blockFuncStart',
  BlockFuncEnd: '_blockFuncEnd',
 })

 type StatementType = typeof ST[keyof typeof ST]
 type AuxStatementType = StatementType & `_${string}`
 type ASTNodeType = Exclude<StatementType, AuxStatementType>

 interface IVDiagnostic {
  message: string
  pos: number
  lineRange: [number, number]
 }

 // FIXME: refine for each type
 export interface IVASTNode<T extends ASTNodeType = ASTNodeType> {
  type: ASTNodeType & T
  _source?: string
  pos?: number
  lineRange?: [number, number]
 }

 interface Block {
  conditions: IVASTNode<typeof ST.Condition>[]
  body: IVASTNode[]
 }

 export class Input {
  readonly config: InputConfig

  type: typeof ST.Eof | ReturnType<typeof inferStatementType> = ST.Eof
  pos = 0
  startPos = 0
  startLineNum = 0
  endLineNum = 0
  content = ''
  seenFirstStatement: boolean = false

  languageVersion: IVLanguageVersion | undefined
  warnings: IVDiagnostic[] = []

  constructor(
    readonly source: string,
    config: InputConfig = {}
  ) {
    this.config = {
      debug: false,
      locations: true,
      allowServiceRules: false,
      ...config
    }
    this.nextStatement()
  }

  get eof() { return this.type == ST.Eof }

  emitWarning(message: string) {
    this.warnings.push({
      message,
      pos: this.startPos,
      lineRange: [this.startLineNum, this.endLineNum],
    })
  }

  throw(message: string): never {
    throw new IVSyntaxError(`${formatLineRange(this.startLineNum, this.endLineNum)}: ${message}`)
  }

  nextStatement() {
    const posMax = this.source.length
    if (this.pos == posMax) {
      this.type = ST.Eof
      this.content = ''
      this.endLineNum = this.startLineNum
      return
    }

    let isLastLine = false
    let pos = this.pos
    let content: string | null = null
    let lineNum = this.endLineNum + 1
    let startPos = -1
    let startLineNum = -1

    while (pos < posMax) {
      const frags = []
      let done = false
      while (!done) {
        let lineEnd = this.source.indexOf('\n', pos)
        if (lineEnd < 0) {
          isLastLine = true
          lineEnd = posMax
        }

        // find backslash to determine whether to look up the next line
        const line = this.source.slice(pos, lineEnd).trimStart()
        const [found, frag] = extractBackslash(line)
        done = !found
        if (frag != '') {
          if (!frags.length) {
            // a fresh start
            startLineNum = lineNum
            startPos = pos
          }
          frags.push(frag)
        }
        if (isLastLine) {
          pos = lineEnd
          done = true
        } else {
          // skip past the new line char
          pos = lineEnd + 1
          lineNum++
        }
      }

      if (frags.length) {
        const joined = stripTrailingComment(frags.join(''))
        frags.length = 0
        if (joined != '') {
          content = joined
          break
        }
      }
    }

    this.pos = pos
    this.startPos = startPos
    this.startLineNum = startLineNum
    this.endLineNum = isLastLine ? lineNum : lineNum - 1
    if (content != null) {
      this.content = content
      if (this.type != ST.Eof) {
        this.seenFirstStatement = true
      }
      this.type = inferStatementType(content)
    } else {
      // do an extra cycle to set EOF
      this.nextStatement()
    }
  }

  parse() {
    const template = parseTemplate(this)
    return {
      ...template,
      warnings: this.warnings,
    }
  }
 }

 function inferStatementType(str: string) {
  if (str === '}') return ST.BlockFuncEnd

  switch (str[0]) {
  case '~': return ST.Option
  case '?':
  case '!':
    return ST.Condition
  case '@': return ST.AtRule
  case '<': return ST.ReplaceTagAbbr
  case '$': return ST.Variable
  case '+': return ST.Include
  }

  return ST.Property
 }

 function extractBackslash(str: string): [boolean, string] {
  // "!": it MUST match something
  const match = str.match(/(\s*)(\\?)(\s*)$/)!
  if (match[2]) {
    return [true, str.slice(0, str.length - match[0].length)]
  } else {
    return [false, str.slice(0, str.length - match[1].length)]
  }
 }

 function parseTemplate(input: Input) {
  const blocks: Block[] = []
  let activeBlock: Block = { conditions: [], body: [] }
  while (!input.eof) {
    if (input.type === ST.Condition) {
      if (activeBlock.body.length) {
        blocks.push(activeBlock)
        activeBlock = { conditions: [], body: [] }
      }
      activeBlock.conditions.push(parseStatement(input) as IVASTNode<typeof ST.Condition>)
    } else {
      activeBlock.body.push(parseStatement(input))
    }
  }
  if (activeBlock.conditions.length || activeBlock.body.length) {
    blocks.push(activeBlock)
  }

  return new IVTemplate(input.languageVersion ?? '1.0', blocks)
 }

 function parseStatement(input: Input, parentBlockName?: string): IVASTNode {
  function doParse() {
    switch (input.type) {
    case ST.Option:
      return parseOption(input)
    case ST.Condition:
      if (parentBlockName != null) {
        const { name } = readRuleParts(input, 1)
        input.throw(`Conditions are not allowed to appear inside a block func: ${input.content[0]}${name} in @${parentBlockName}`)
      }
      return parseCondition(input)
    case ST.AtRule:
      return parseAtRule(input)
    case ST.ReplaceTagAbbr:
      return parseReplaceTagAbbr(input)
    case ST.Variable:
      return parseVariable(input)
    case ST.Include:
      if (!input.config.allowServiceRules) {
        return input.throw('Include rules are not supported in user templates.')
      }
      return parseInclude(input)
    case ST.BlockFuncEnd:
      if (parentBlockName == null) {
        input.throw('Unexpected }')
      }
      break
    case ST.Property: return parseProperty(input)
    }

    return input.throw(input.eof ?
      'Expected a statement' :
      `Unexpected statement type ${input.type}`)
  }

  const result = doParse()

  if (input.languageVersion === undefined) {
    input.languageVersion = '1.0'
  }

  if (!input.seenFirstStatement && input.languageVersion === '1.0') {
    input.emitWarning('Version 1.0 is outdated. Please specify at least 2.0.')
  }

  let meta: Partial<IVASTNode> | undefined
  if (input.config.debug) {
    meta = Object.assign(meta ?? {}, { _source: input.content })
  }
  if (input.config.locations) {
    meta = Object.assign(meta ?? {}, {
      pos: input.startPos,
      lineRange: [input.startLineNum, input.endLineNum],
    })
  }

  // we recurse AFTER the version is resolved
  // here is the only nested construct
  if (result.type == ST.BlockFuncStart) {
    input.nextStatement()

    let stmts = []
    while (input.type != ST.BlockFuncEnd) {
      stmts.push(parseStatement(input, result.name))
      if (input.eof) {
        input.throw('Expected } but found EOF')
      }
    }

    input.nextStatement()

    const outer = {
      type: ST.BlockFunction,
      name: result.name,
      query: result.query,
      body: stmts,
    }

    return Object.assign(outer, meta)
  }

  input.nextStatement()

  return Object.assign(result, meta)
 }

 function parseOption(input: Input) {
  const { assertOpeningBrace, name, content } = readRuleParts(input, 1)

  if (content == null || content == '')
    input.throw('Option value is required.')
  if (!name.match(/^[a-zA-Z]\w*$/))
    input.throw(`Invalid option name: [${name}]`)

  assertOpeningBrace(false)

  if (name === 'version') {
    const str = readJSONString(content)

    if (str == null) {
      input.throw(`Version should be a string: [${content}]`)
    }

    let normalized: IVLanguageVersion

    if (str.match(/^1(\.0?)?$/)) {
      normalized = '1.0'
    } else if (str.match(/^2(\.0?)?$/)) {
      normalized = '2.0'
    } else if (str === '2.1') {
      normalized = '2.1'
    } else {
      input.throw(`Invalid version: [${str}]`)
    }

    // interpret version early because it *might* affect parsing
    // (but I have not found any in the reference engine)
    if (input.languageVersion !== undefined) {
      input.emitWarning('Version can only be specified once at the beginning.')
    } else {
      input.languageVersion = normalized
    }

    return { type: ST.Option, name: 'version', value: normalized }
  } else {
    input.throw(`Unhandled option: [${name}]`)
  }
 }

 function parseCondition(input: Input) {
  const conditionType = input.content[0] === '?' ? 'OR' : 'AND'
  const { assertOpeningBrace, name, content } = readRuleParts(input, 1)

  if (!name.match(/^[a-zA-Z_][-\w\.]*$/))
    input.throw(`Invalid condition name: [${name}]`)

  assertOpeningBrace(false)

  return {
    type: ST.Condition,
    conditionType,
    name,
    // depending on condition name, the content is eitherxpath or regexp (but never string)
    content,
  }
 }

 function parseVariable(input: Input) {
  const { assertOpeningBrace, name: nameWithSuffix, content } = readRuleParts(input, 1)

  let name = nameWithSuffix

  let strategy = { append: false, weak: false }
  // the order matters! "?+" is allowed but NOT "+?"
  if (nameWithSuffix.at(-1) == '+') {
    // name+ means append
    strategy.append = true
    name = name.slice(0, -1)
  }
  if (nameWithSuffix.at(-1) == '?') {
    // name? means do not overwrite if non-empty
    strategy.weak = true
    name = name.slice(0, -1)
  }

  if (!name.match(/^[a-zA-Z]\w*$/))
    input.throw(`Invalid variable name: [${name}]`)

  assertOpeningBrace(false)

  return {
    type: ST.Variable,
    name,
    strategy: Object.keys(strategy).some(x => x) ? strategy : null,
    content,
  }
 }

 function parseProperty(input: Input) {
  const { assertOpeningBrace, name: nameWithSuffix, content } = readRuleParts(input)
  let name = nameWithSuffix

  let force: 0 | 1 | 2 = 0
  for (let i = 0; i < 2; i++) {
    if (nameWithSuffix.at(-1) == '!') {
      force++
      name = name.slice(0, -1)
    }
  }

  if (!name.match(/^[a-zA-Z]\w*$/))
    input.throw(`Invalid property name: [${name}]`)

  assertOpeningBrace(false)

  return {
    type: ST.Property,
    name,
    force,
    content,
  }
 }

 function parseAtRule(input: Input) {
  const { assertOpeningBrace, name: nameWithArgs, content } = readRuleParts(input, 1)
  let name = nameWithArgs

  let args: IVArgument[] | null = null
  // XXX: validate function signatures?
  const lparenPos = findCharToken(nameWithArgs, '(')
  if (lparenPos >= 0) {
    if (nameWithArgs.at(-1) == ')') {
      name = nameWithArgs.slice(0, lparenPos).trimEnd()
      args = readArguments(nameWithArgs.slice(lparenPos + 1, -1))
    }
  }

  if (!name.match(/^\w+$/))
    input.throw(`Invalid function name: [${name}]`)

  const isBlockFuncName = blockFunctionNameSet.has(name)
  assertOpeningBrace(isBlockFuncName)

  if (isBlockFuncName) {
    if (args == null || args.length == 0) {
      input.throw(`@${name} should have 1 argument`)
    }
    return {
      type: ST.BlockFuncStart,
      name,
      query: args[0],  // excess args are dropped
    }
  }

  return {
    type: ST.Function,
    name,
    content,
    args,
  }
 }

 function parseReplaceTagAbbr(input: Input) {
  const tagEndPos = input.content.indexOf('>')
  if (tagEndPos < 0) {
    input.throw('> expected but not found')
  }

  const tag = input.content.slice(0, tagEndPos + 1)
  const rest = input.content.slice(tagEndPos + 1)
  // parse as if the current token were...
  input.content = `@replace_tag(${tag})${rest}`

  return parseAtRule(input)
 }

 function parseInclude(input: Input) {
  // naive implementation that should not be used in real cases
  return {
    type: ST.Include,
    content: input.content.slice(1).trimStart(),
  }
 }

 function formatLineRange(a: number, b: number) {
  return a == b ? `${a}` : `${a}-${b}`
 }

 function skipByPattern(str: string, regex: RegExp, pos: number) {
  if (!regex.sticky) throw new Error('Cannot match without sticky flag set')
  regex.lastIndex = pos
  return regex.exec(str) ? regex.lastIndex : -1
 }

 function findStringExtent(str: string, pos: number = 0) {
  const pat =
    str[pos] == `"` ? /(?:\\.|[^"\\])*"/y :
    str[pos] == `'` ? /(?:\\.|[^'\\])*'/y : null
  if (pat == null) return -1
  return skipByPattern(str, pat, pos + 1)
 }

 function readJSONString(str: string) {
  if (str[0] != '"') return null
  const extent = findStringExtent(str)
  // assert that the quote char is at the end
  if (extent != str.length) return null
  try {
    // use the built-in JSON parser to do the heavy lifting
    return JSON.parse(str) as string
  } catch (err) {
    if (err instanceof SyntaxError) return null
    throw err
  }
 }

 function readString(str: string) {
  // wow this is more convoluted than I have thought
  const esc = (s: string) => `"${s.slice(1, -1).replace(/"/g, `\\"`).replace(/\\\'/g, `'`)}"`
  try {
    const ret =
      str[0] == `"` ? JSON.parse(str) as string :
      str[0] == `'` ? JSON.parse(esc(str)) as string : undefined
    if (ret === undefined)
      throw new Error('the input does not appear to be a valid string literal.')
    return ret
  } catch (err) {
    if (err instanceof SyntaxError) return null
    throw err
  }
 }

 export function findCharToken(str: string, chr: string, pos: number = 0) {
  if (chr.length != 1) throw new Error('Illegal char')
  // FIXME: is this airtight?
  if (chr == '\\' || chr == ']') chr = '\\' + chr
  const pat = new RegExp(`[^'"${chr}]*`, 'y')
  while (pos < str.length) {
    let nextPos = skipByPattern(str, pat, pos)
    if (nextPos == str.length) break
    // report if the chr (instead of a quote) found
    if (str[nextPos] == chr) {
      pos = nextPos
      break
    }

    // skip to where the string literal ends
    nextPos = findStringExtent(str, nextPos)
    // if there is no pairing quote, it does not count as a string literal
    if (nextPos < 0) break
    pos = nextPos
  }
  return str.indexOf(chr, pos)
 }

 export function stripTrailingComment(str: string) {
  const commentStart = findCharToken(str, '#')
  return commentStart >= 0 ? str.slice(0, commentStart).trimEnd() : str
 }

 /**
 * We rely on the fact that the input and all the output parts are trimmed.
 */
 export function readRuleParts(input: Input, pos: number = 0):
  { assertOpeningBrace: (yes: boolean) => void, name: string, content: string | null } {

  const str = input.content.slice(pos)
  const expectTrue = (t: boolean) => t || input.throw('Unexpected {')
  const expectFalse = (f: boolean) => !f || input.throw('Expected { but found none')

  // comment: the ref impl is super weird as you can see
  if (str.at(-1) === '{') {
    const name = str.slice(0, -1).trimEnd()
    return {
      assertOpeningBrace: expectTrue,
      name,
      content: null,
    }
  }

  let colonPos = findCharToken(str, ':')
  if (colonPos < 0) {
    return {
      assertOpeningBrace: expectFalse,
      name: str,
      content: null,
    }
  }

  return {
    assertOpeningBrace: expectFalse,
    name: str.slice(0, colonPos).trimEnd(),
    content: str.slice(colonPos + 1).trimStart(),
  }
 }

 type IVArgument = string

 export function readArguments(str: string) {
  const args: IVArgument[] = []
  let pos = 0
  let trailingComma = false
  while (pos < str.length) {
    const startPos = skipByPattern(str, /\s*/y, pos)
    let endPos
    let content: IVArgument

    let stringExtent = -1
    // `"` or `""""` are treated as unquoted strings, WTF
    if (str[startPos] == `'` || str[startPos] == `"`) {
      const extent = findStringExtent(str, startPos)
      if (extent == str.length || (extent >= 0 && str[extent].match(/[\s,]/))) {
        stringExtent = extent
      }
    }

    if (stringExtent >= 0) {
      endPos = stringExtent
      // the official engine treats it as an empty string when the parsing fails,
      // which leads to some confusing error message like "Regexp pattern is empty"
      content = readString(str.slice(startPos, endPos)) ?? ''
    } else {
      endPos = skipByPattern(str, /[^\s,]*/y, startPos)
      if (endPos < 0) {
        endPos = str.length
      }
      content = str.slice(startPos, endPos)
    }
    args.push(content)


    // skip to the start of the next argument, consuming a comma if met
    const nextPos = skipByPattern(str, /\s*/y, endPos)
    if (nextPos < str.length && str[nextPos] == ',') {
      trailingComma = true
      pos = nextPos + 1
    } else {
      trailingComma = false
      pos = nextPos
    }
  }

  if (trailingComma) {
    args.push('')
  }
  return args
 }
diff --git a/~fuzzer.ts b/~fuzzer.ts
 import { Input } from './iv-template-line-parser'

 const rand = (n: number) => Math.floor(Math.random() * n)

 function referenceParser(str: string) {
  function trimCont(s: string) {
    const mat = s.match(/(\s*(?:\\|\\\r))$/)
    if (mat) {
      s = s.slice(0, s.length - mat[0].length)
    }
    return s
  }

  const xs = (str + '\n').split('\n').map(x => x.trim())
  const result: string[] = []
  for (const x of xs) {
    if (result.length) {
      const last = result[result.length - 1]
      let s = trimCont(last)
      if (last != s) {
        result[result.length - 1] = s + x
        continue
      }
    }
    result.push(x)
  }
  return result.filter(x => x)
 }

 function fuzz() {
  const source = Array.from({length: rand(3)}).map(() =>
    ' '.repeat(rand(3)) + 'A'.repeat(rand(3)) + ' '.repeat(rand(3)) +
    ['', '\\', '\\\r'][rand(3)] + ' '.repeat(rand(3))).join('\n') + '\n'.repeat(rand(3))

  const ref = referenceParser(source)
  let result: string[] | undefined

  let err
  try {
    result = new Input(source, {debug: true}).parse().blocks.flatMap(blk => [...blk.conditions, ...blk.body]).map(x => x._source!)
  } catch (e) {
    err = (e as Error).message
  }

  if (result == null) {
    // when parse fails
  } else if (ref.length != result.length) {
    err = 'Array length differs'
  } else {
    for (let i = 0; i < ref.length; i++) {
      if (ref[i] != result[i]) {
        err = `Incorrect line #${i}`
        break
      }
    }
  }

  if (err) {
    console.log('FAIL --> ' + err)
    console.log(source.split('\n').map(s => `[${s}]`))
    console.log('expected =', ref)
    console.log('actual   =', result)
    throw 0
  }
 }

 for (let i = 0; i < 100000; i++) {
  fuzz()
 }
	import { blockFunctionNameSet } from './names'

	interface InputConfig {
	debug?: boolean
	locations?: boolean
	allowServiceRules?: boolean
	}

	type IVLanguageVersion = '1.0' \| '2.0' \| '2.1'

	export class IVTemplate {
	constructor(
	readonly version: IVLanguageVersion,
	readonly blocks: Block[]
	) {}
	}

	class IVSyntaxError extends Error {}

	export const ST = Object.freeze({
	Option: 'option',
	Condition: 'condition',
	Property: 'property',
	Variable: 'variable',
	Function: 'function',
	BlockFunction: 'blockFunction',
	Include: 'include',
	// auxiliary types
	Eof: '_eof',
	AtRule: '_atRule', // func or blockFunc, based on name
	ReplaceTagAbbr: '_replaceTagAbbr',
	BlockFuncStart: '_blockFuncStart',
	BlockFuncEnd: '_blockFuncEnd',
	})

	type StatementType = typeof ST[keyof typeof ST]
	type AuxStatementType = StatementType & `_${string}`
	type ASTNodeType = Exclude<StatementType, AuxStatementType>

	interface IVDiagnostic {
	message: string
	pos: number
	lineRange: [number, number]
	}

	// FIXME: refine for each type
	export interface IVASTNode<T extends ASTNodeType = ASTNodeType> {
	type: ASTNodeType & T
	_source?: string
	pos?: number
	lineRange?: [number, number]
	}

	interface Block {
	conditions: IVASTNode<typeof ST.Condition>[]
	body: IVASTNode[]
	}

	export class Input {
	readonly config: InputConfig

	type: typeof ST.Eof \| ReturnType<typeof inferStatementType> = ST.Eof
	pos = 0
	startPos = 0
	startLineNum = 0
	endLineNum = 0
	content = ''
	seenFirstStatement: boolean = false

	languageVersion: IVLanguageVersion \| undefined
	warnings: IVDiagnostic[] = []

	constructor(
	readonly source: string,
	config: InputConfig = {}
	) {
	this.config = {
	debug: false,
	locations: true,
	allowServiceRules: false,
	...config
	}
	this.nextStatement()
	}

	get eof() { return this.type == ST.Eof }

	emitWarning(message: string) {
	this.warnings.push({
	message,
	pos: this.startPos,
	lineRange: [this.startLineNum, this.endLineNum],
	})
	}

	throw(message: string): never {
	throw new IVSyntaxError(`${formatLineRange(this.startLineNum, this.endLineNum)}: ${message}`)
	}

	nextStatement() {
	const posMax = this.source.length
	if (this.pos == posMax) {
	this.type = ST.Eof
	this.content = ''
	this.endLineNum = this.startLineNum
	return
	}

	let isLastLine = false
	let pos = this.pos
	let content: string \| null = null
	let lineNum = this.endLineNum + 1
	let startPos = -1
	let startLineNum = -1

	while (pos < posMax) {
	const frags = []
	let done = false
	while (!done) {
	let lineEnd = this.source.indexOf('\n', pos)
	if (lineEnd < 0) {
	isLastLine = true
	lineEnd = posMax
	}

	// find backslash to determine whether to look up the next line
	const line = this.source.slice(pos, lineEnd).trimStart()
	const [found, frag] = extractBackslash(line)
	done = !found
	if (frag != '') {
	if (!frags.length) {
	// a fresh start
	startLineNum = lineNum
	startPos = pos
	}
	frags.push(frag)
	}
	if (isLastLine) {
	pos = lineEnd
	done = true
	} else {
	// skip past the new line char
	pos = lineEnd + 1
	lineNum++
	}
	}

	if (frags.length) {
	const joined = stripTrailingComment(frags.join(''))
	frags.length = 0
	if (joined != '') {
	content = joined
	break
	}
	}
	}

	this.pos = pos
	this.startPos = startPos
	this.startLineNum = startLineNum
	this.endLineNum = isLastLine ? lineNum : lineNum - 1
	if (content != null) {
	this.content = content
	if (this.type != ST.Eof) {
	this.seenFirstStatement = true
	}
	this.type = inferStatementType(content)
	} else {
	// do an extra cycle to set EOF
	this.nextStatement()
	}
	}

	parse() {
	const template = parseTemplate(this)
	return {
	...template,
	warnings: this.warnings,
	}
	}
	}

	function inferStatementType(str: string) {
	if (str === '}') return ST.BlockFuncEnd

	switch (str[0]) {
	case '~': return ST.Option
	case '?':
	case '!':
	return ST.Condition
	case '@': return ST.AtRule
	case '<': return ST.ReplaceTagAbbr
	case '$': return ST.Variable
	case '+': return ST.Include
	}

	return ST.Property
	}

	function extractBackslash(str: string): [boolean, string] {
	// "!": it MUST match something
	const match = str.match(/(\s)(\\?)(\s)$/)!
	if (match[2]) {
	return [true, str.slice(0, str.length - match[0].length)]
	} else {
	return [false, str.slice(0, str.length - match[1].length)]
	}
	}

	function parseTemplate(input: Input) {
	const blocks: Block[] = []
	let activeBlock: Block = { conditions: [], body: [] }
	while (!input.eof) {
	if (input.type === ST.Condition) {
	if (activeBlock.body.length) {
	blocks.push(activeBlock)
	activeBlock = { conditions: [], body: [] }
	}
	activeBlock.conditions.push(parseStatement(input) as IVASTNode<typeof ST.Condition>)
	} else {
	activeBlock.body.push(parseStatement(input))
	}
	}
	if (activeBlock.conditions.length \|\| activeBlock.body.length) {
	blocks.push(activeBlock)
	}

	return new IVTemplate(input.languageVersion ?? '1.0', blocks)
	}

	function parseStatement(input: Input, parentBlockName?: string): IVASTNode {
	function doParse() {
	switch (input.type) {
	case ST.Option:
	return parseOption(input)
	case ST.Condition:
	if (parentBlockName != null) {
	const { name } = readRuleParts(input, 1)
	input.throw(`Conditions are not allowed to appear inside a block func: ${input.content[0]}${name} in @${parentBlockName}`)
	}
	return parseCondition(input)
	case ST.AtRule:
	return parseAtRule(input)
	case ST.ReplaceTagAbbr:
	return parseReplaceTagAbbr(input)
	case ST.Variable:
	return parseVariable(input)
	case ST.Include:
	if (!input.config.allowServiceRules) {
	return input.throw('Include rules are not supported in user templates.')
	}
	return parseInclude(input)
	case ST.BlockFuncEnd:
	if (parentBlockName == null) {
	input.throw('Unexpected }')
	}
	break
	case ST.Property: return parseProperty(input)
	}

	return input.throw(input.eof ?
	'Expected a statement' :
	`Unexpected statement type ${input.type}`)
	}

	const result = doParse()

	if (input.languageVersion === undefined) {
	input.languageVersion = '1.0'
	}

	if (!input.seenFirstStatement && input.languageVersion === '1.0') {
	input.emitWarning('Version 1.0 is outdated. Please specify at least 2.0.')
	}

	let meta: Partial<IVASTNode> \| undefined
	if (input.config.debug) {
	meta = Object.assign(meta ?? {}, { _source: input.content })
	}
	if (input.config.locations) {
	meta = Object.assign(meta ?? {}, {
	pos: input.startPos,
	lineRange: [input.startLineNum, input.endLineNum],
	})
	}

	// we recurse AFTER the version is resolved
	// here is the only nested construct
	if (result.type == ST.BlockFuncStart) {
	input.nextStatement()

	let stmts = []
	while (input.type != ST.BlockFuncEnd) {
	stmts.push(parseStatement(input, result.name))
	if (input.eof) {
	input.throw('Expected } but found EOF')
	}
	}

	input.nextStatement()

	const outer = {
	type: ST.BlockFunction,
	name: result.name,
	query: result.query,
	body: stmts,
	}

	return Object.assign(outer, meta)
	}

	input.nextStatement()

	return Object.assign(result, meta)
	}

	function parseOption(input: Input) {
	const { assertOpeningBrace, name, content } = readRuleParts(input, 1)

	if (content == null \|\| content == '')
	input.throw('Option value is required.')
	if (!name.match(/^[a-zA-Z]\w*$/))
	input.throw(`Invalid option name: [${name}]`)

	assertOpeningBrace(false)

	if (name === 'version') {
	const str = readJSONString(content)

	if (str == null) {
	input.throw(`Version should be a string: [${content}]`)
	}

	let normalized: IVLanguageVersion

	if (str.match(/^1(\.0?)?$/)) {
	normalized = '1.0'
	} else if (str.match(/^2(\.0?)?$/)) {
	normalized = '2.0'
	} else if (str === '2.1') {
	normalized = '2.1'
	} else {
	input.throw(`Invalid version: [${str}]`)
	}

	// interpret version early because it might affect parsing
	// (but I have not found any in the reference engine)
	if (input.languageVersion !== undefined) {
	input.emitWarning('Version can only be specified once at the beginning.')
	} else {
	input.languageVersion = normalized
	}

	return { type: ST.Option, name: 'version', value: normalized }
	} else {
	input.throw(`Unhandled option: [${name}]`)
	}
	}

	function parseCondition(input: Input) {
	const conditionType = input.content[0] === '?' ? 'OR' : 'AND'
	const { assertOpeningBrace, name, content } = readRuleParts(input, 1)

	if (!name.match(/^[a-zA-Z_][-\w\.]*$/))
	input.throw(`Invalid condition name: [${name}]`)

	assertOpeningBrace(false)

	return {
	type: ST.Condition,
	conditionType,
	name,
	// depending on condition name, the content is eitherxpath or regexp (but never string)
	content,
	}
	}

	function parseVariable(input: Input) {
	const { assertOpeningBrace, name: nameWithSuffix, content } = readRuleParts(input, 1)

	let name = nameWithSuffix

	let strategy = { append: false, weak: false }
	// the order matters! "?+" is allowed but NOT "+?"
	if (nameWithSuffix.at(-1) == '+') {
	// name+ means append
	strategy.append = true
	name = name.slice(0, -1)
	}
	if (nameWithSuffix.at(-1) == '?') {
	// name? means do not overwrite if non-empty
	strategy.weak = true
	name = name.slice(0, -1)
	}

	if (!name.match(/^[a-zA-Z]\w*$/))
	input.throw(`Invalid variable name: [${name}]`)

	assertOpeningBrace(false)

	return {
	type: ST.Variable,
	name,
	strategy: Object.keys(strategy).some(x => x) ? strategy : null,
	content,
	}
	}

	function parseProperty(input: Input) {
	const { assertOpeningBrace, name: nameWithSuffix, content } = readRuleParts(input)
	let name = nameWithSuffix

	let force: 0 \| 1 \| 2 = 0
	for (let i = 0; i < 2; i++) {
	if (nameWithSuffix.at(-1) == '!') {
	force++
	name = name.slice(0, -1)
	}
	}

	if (!name.match(/^[a-zA-Z]\w*$/))
	input.throw(`Invalid property name: [${name}]`)

	assertOpeningBrace(false)

	return {
	type: ST.Property,
	name,
	force,
	content,
	}
	}

	function parseAtRule(input: Input) {
	const { assertOpeningBrace, name: nameWithArgs, content } = readRuleParts(input, 1)
	let name = nameWithArgs

	let args: IVArgument[] \| null = null
	// XXX: validate function signatures?
	const lparenPos = findCharToken(nameWithArgs, '(')
	if (lparenPos >= 0) {
	if (nameWithArgs.at(-1) == ')') {
	name = nameWithArgs.slice(0, lparenPos).trimEnd()
	args = readArguments(nameWithArgs.slice(lparenPos + 1, -1))
	}
	}

	if (!name.match(/^\w+$/))
	input.throw(`Invalid function name: [${name}]`)

	const isBlockFuncName = blockFunctionNameSet.has(name)
	assertOpeningBrace(isBlockFuncName)

	if (isBlockFuncName) {
	if (args == null \|\| args.length == 0) {
	input.throw(`@${name} should have 1 argument`)
	}
	return {
	type: ST.BlockFuncStart,
	name,
	query: args[0], // excess args are dropped
	}
	}

	return {
	type: ST.Function,
	name,
	content,
	args,
	}
	}

	function parseReplaceTagAbbr(input: Input) {
	const tagEndPos = input.content.indexOf('>')
	if (tagEndPos < 0) {
	input.throw('> expected but not found')
	}

	const tag = input.content.slice(0, tagEndPos + 1)
	const rest = input.content.slice(tagEndPos + 1)
	// parse as if the current token were...
	input.content = `@replace_tag(${tag})${rest}`

	return parseAtRule(input)
	}

	function parseInclude(input: Input) {
	// naive implementation that should not be used in real cases
	return {
	type: ST.Include,
	content: input.content.slice(1).trimStart(),
	}
	}

	function formatLineRange(a: number, b: number) {
	return a == b ? `${a}` : `${a}-${b}`
	}

	function skipByPattern(str: string, regex: RegExp, pos: number) {
	if (!regex.sticky) throw new Error('Cannot match without sticky flag set')
	regex.lastIndex = pos
	return regex.exec(str) ? regex.lastIndex : -1
	}

	function findStringExtent(str: string, pos: number = 0) {
	const pat =
	str[pos] == `"` ? /(?:\\.\|[^"\\])*"/y :
	str[pos] == `'` ? /(?:\\.\|[^'\\])*'/y : null
	if (pat == null) return -1
	return skipByPattern(str, pat, pos + 1)
	}

	function readJSONString(str: string) {
	if (str[0] != '"') return null
	const extent = findStringExtent(str)
	// assert that the quote char is at the end
	if (extent != str.length) return null
	try {
	// use the built-in JSON parser to do the heavy lifting
	return JSON.parse(str) as string
	} catch (err) {
	if (err instanceof SyntaxError) return null
	throw err
	}
	}

	function readString(str: string) {
	// wow this is more convoluted than I have thought
	const esc = (s: string) => `"${s.slice(1, -1).replace(/"/g, `\\"`).replace(/\\\'/g, `'`)}"`
	try {
	const ret =
	str[0] == `"` ? JSON.parse(str) as string :
	str[0] == `'` ? JSON.parse(esc(str)) as string : undefined
	if (ret === undefined)
	throw new Error('the input does not appear to be a valid string literal.')
	return ret
	} catch (err) {
	if (err instanceof SyntaxError) return null
	throw err
	}
	}

	export function findCharToken(str: string, chr: string, pos: number = 0) {
	if (chr.length != 1) throw new Error('Illegal char')
	// FIXME: is this airtight?
	if (chr == '\\' \|\| chr == ']') chr = '\\' + chr
	const pat = new RegExp(`[^'"${chr}]*`, 'y')
	while (pos < str.length) {
	let nextPos = skipByPattern(str, pat, pos)
	if (nextPos == str.length) break
	// report if the chr (instead of a quote) found
	if (str[nextPos] == chr) {
	pos = nextPos
	break
	}

	// skip to where the string literal ends
	nextPos = findStringExtent(str, nextPos)
	// if there is no pairing quote, it does not count as a string literal
	if (nextPos < 0) break
	pos = nextPos
	}
	return str.indexOf(chr, pos)
	}

	export function stripTrailingComment(str: string) {
	const commentStart = findCharToken(str, '#')
	return commentStart >= 0 ? str.slice(0, commentStart).trimEnd() : str
	}

	/**
	* We rely on the fact that the input and all the output parts are trimmed.
	*/
	export function readRuleParts(input: Input, pos: number = 0):
	{ assertOpeningBrace: (yes: boolean) => void, name: string, content: string \| null } {

	const str = input.content.slice(pos)
	const expectTrue = (t: boolean) => t \|\| input.throw('Unexpected {')
	const expectFalse = (f: boolean) => !f \|\| input.throw('Expected { but found none')

	// comment: the ref impl is super weird as you can see
	if (str.at(-1) === '{') {
	const name = str.slice(0, -1).trimEnd()
	return {
	assertOpeningBrace: expectTrue,
	name,
	content: null,
	}
	}

	let colonPos = findCharToken(str, ':')
	if (colonPos < 0) {
	return {
	assertOpeningBrace: expectFalse,
	name: str,
	content: null,
	}
	}

	return {
	assertOpeningBrace: expectFalse,
	name: str.slice(0, colonPos).trimEnd(),
	content: str.slice(colonPos + 1).trimStart(),
	}
	}

	type IVArgument = string

	export function readArguments(str: string) {
	const args: IVArgument[] = []
	let pos = 0
	let trailingComma = false
	while (pos < str.length) {
	const startPos = skipByPattern(str, /\s*/y, pos)
	let endPos
	let content: IVArgument

	let stringExtent = -1
	// `"` or `""""` are treated as unquoted strings, WTF
	if (str[startPos] == `'` \|\| str[startPos] == `"`) {
	const extent = findStringExtent(str, startPos)
	if (extent == str.length \|\| (extent >= 0 && str[extent].match(/[\s,]/))) {
	stringExtent = extent
	}
	}

	if (stringExtent >= 0) {
	endPos = stringExtent
	// the official engine treats it as an empty string when the parsing fails,
	// which leads to some confusing error message like "Regexp pattern is empty"
	content = readString(str.slice(startPos, endPos)) ?? ''
	} else {
	endPos = skipByPattern(str, /[^\s,]*/y, startPos)
	if (endPos < 0) {
	endPos = str.length
	}
	content = str.slice(startPos, endPos)
	}
	args.push(content)


	// skip to the start of the next argument, consuming a comma if met
	const nextPos = skipByPattern(str, /\s*/y, endPos)
	if (nextPos < str.length && str[nextPos] == ',') {
	trailingComma = true
	pos = nextPos + 1
	} else {
	trailingComma = false
	pos = nextPos
	}
	}

	if (trailingComma) {
	args.push('')
	}
	return args
	}
	import { Input } from './iv-template-line-parser'

	const rand = (n: number) => Math.floor(Math.random() * n)

	function referenceParser(str: string) {
	function trimCont(s: string) {
	const mat = s.match(/(\s*(?:\\\|\\\r))$/)
	if (mat) {
	s = s.slice(0, s.length - mat[0].length)
	}
	return s
	}

	const xs = (str + '\n').split('\n').map(x => x.trim())
	const result: string[] = []
	for (const x of xs) {
	if (result.length) {
	const last = result[result.length - 1]
	let s = trimCont(last)
	if (last != s) {
	result[result.length - 1] = s + x
	continue
	}
	}
	result.push(x)
	}
	return result.filter(x => x)
	}

	function fuzz() {
	const source = Array.from({length: rand(3)}).map(() =>
	' '.repeat(rand(3)) + 'A'.repeat(rand(3)) + ' '.repeat(rand(3)) +
	['', '\\', '\\\r'][rand(3)] + ' '.repeat(rand(3))).join('\n') + '\n'.repeat(rand(3))

	const ref = referenceParser(source)
	let result: string[] \| undefined

	let err
	try {
	result = new Input(source, {debug: true}).parse().blocks.flatMap(blk => [...blk.conditions, ...blk.body]).map(x => x._source!)
	} catch (e) {
	err = (e as Error).message
	}

	if (result == null) {
	// when parse fails
	} else if (ref.length != result.length) {
	err = 'Array length differs'
	} else {
	for (let i = 0; i < ref.length; i++) {
	if (ref[i] != result[i]) {
	err = `Incorrect line #${i}`
	break
	}
	}
	}

	if (err) {
	console.log('FAIL --> ' + err)
	console.log(source.split('\n').map(s => `[${s}]`))
	console.log('expected =', ref)
	console.log('actual =', result)
	throw 0
	}
	}

	for (let i = 0; i < 100000; i++) {
	fuzz()
	}