patrickgalbraith · May 31, 2024 00:17
diff --git a/jsonpath.grammar b/jsonpath.grammar
 //
 // RFC 9535 JSONPath Grammar for Lezer
 //
 // Parses all selectors defined in https://github.com/jsonpath-standard/jsonpath-compliance-test-suite
 // Note however it is a bit looser than the grammar defined in the standard when it comes
 // to allowed escape characters and certain nested path segments.
 //

 @top Query { JsonPathQuery }

 JsonPathQuery { RootIdentifier segments }

 segments { segment* }

 selector[@isGroup=Selector] { 
  NameSelector { stringLiteral }
  | WildcardSelector
  | SliceSelector
  | IndexSelector { "-"? digit+ }
  | FilterSelector 
 }

 SliceSelector { 
  Start { "-"? digit+ }?
  ":" End { "-"? digit+ }?
  (":" Step { "-"? digit+ }?)? 
 }

 literal { 
  ("-"? number)
  | stringLiteral
  | true 
  | false 
  | null 
 }

 logicalNotOp { "!" }

 CurrentNodeIdentifier { "@" }

 // -------------
 // Filters
 // -------------
 FilterSelector  { "?" (LogicalExpr | basicExpr) }
 LogicalExpr { basicExpr (("&&" | "||") basicExpr)+ }
 basicExpr { ParenExpr | ComparisonExpr | TestExpr }

 // Parenthesized expression
 ParenExpr { logicalNotOp? "(" (LogicalExpr | basicExpr) ")" }

 // Test expression
 TestExpr { logicalNotOp? (FilterQuery | FunctionExpr) ~funcExpr }
 FilterQuery {
  (CurrentNodeIdentifier segments) // relQuery
  | JsonPathQuery
 }

 // Comparison expression
 ComparisonExpr { Comparable ComparisonOp Comparable }
 Comparable { literal | singularQuery | FunctionExpr }
 ComparisonOp { "==" | "!=" | "<=" | ">=" | "<" | ">" }

 singularQuery { 
  (RootIdentifier | CurrentNodeIdentifier) segments
 }

 // Function expression
 FunctionName { lcAlpha (lcAlpha | "_" | digit)* }
 FunctionArgument {
  literal
  | (FilterQuery | FunctionExpr) ~funcExpr
  | LogicalExpr
  | basicExpr
 }
 FunctionExpr { 
  FunctionName "(" (FunctionArgument ("," FunctionArgument)*)? ")" 
 }
 // -------------
 // End Filters
 // -------------

 segment[@isGroup=Segment] { ChildSegment | DescendantSegment }

 ChildSegment { 
  BracketedSelection |
  ("."
   (WildcardSelector | MemberNameShorthand)
  )
 }

 BracketedSelection { "[" selector ("," selector)* "]"  }

 MemberNameShorthand { nameFirst (nameFirst | digit)* }

 DescendantSegment { 
  ".." (
    BracketedSelection |
    WildcardSelector |
    MemberNameShorthand
  ) 
 }

 @tokens {
  number { digit+ ("." digit*)? (exp digit+ ("." digit*)?)? }

  lcAlpha { $[a-z] }
  
  nameFirst {
    lcAlpha |
    $[A-Z] |
    "_" |
    $[\u{80}-\u{d7ff}] |
    // Skip surrogate codepoints
    $[\u{e000}-\u{10ffff}]
  }
  
  stringLiteral { "\"" (!["\n\\] | "\\" _)* "\"" | "'" (!['\n\\] | "\\" _)* "'" }

  RootIdentifier { "$" }

  WildcardSelector { "*" }

  true { "true" }
  false { "false" }
  null { "null" }

  exp  { $[eE] $[+\-]? }

  digit { @digit }

  whitespace { $[ \n\r\t] }
 }

 @skip { whitespace }
	//
	// RFC 9535 JSONPath Grammar for Lezer
	//
	// Parses all selectors defined in https://github.com/jsonpath-standard/jsonpath-compliance-test-suite
	// Note however it is a bit looser than the grammar defined in the standard when it comes
	// to allowed escape characters and certain nested path segments.
	//

	@top Query { JsonPathQuery }

	JsonPathQuery { RootIdentifier segments }

	segments { segment* }

	selector[@isGroup=Selector] {
	NameSelector { stringLiteral }
	\| WildcardSelector
	\| SliceSelector
	\| IndexSelector { "-"? digit+ }
	\| FilterSelector
	}

	SliceSelector {
	Start { "-"? digit+ }?
	":" End { "-"? digit+ }?
	(":" Step { "-"? digit+ }?)?
	}

	literal {
	("-"? number)
	\| stringLiteral
	\| true
	\| false
	\| null
	}

	logicalNotOp { "!" }

	CurrentNodeIdentifier { "@" }

	// -------------
	// Filters
	// -------------
	FilterSelector { "?" (LogicalExpr \| basicExpr) }
	LogicalExpr { basicExpr (("&&" \| "\|\|") basicExpr)+ }
	basicExpr { ParenExpr \| ComparisonExpr \| TestExpr }

	// Parenthesized expression
	ParenExpr { logicalNotOp? "(" (LogicalExpr \| basicExpr) ")" }

	// Test expression
	TestExpr { logicalNotOp? (FilterQuery \| FunctionExpr) ~funcExpr }
	FilterQuery {
	(CurrentNodeIdentifier segments) // relQuery
	\| JsonPathQuery
	}

	// Comparison expression
	ComparisonExpr { Comparable ComparisonOp Comparable }
	Comparable { literal \| singularQuery \| FunctionExpr }
	ComparisonOp { "==" \| "!=" \| "<=" \| ">=" \| "<" \| ">" }

	singularQuery {
	(RootIdentifier \| CurrentNodeIdentifier) segments
	}

	// Function expression
	FunctionName { lcAlpha (lcAlpha \| "_" \| digit)* }
	FunctionArgument {
	literal
	\| (FilterQuery \| FunctionExpr) ~funcExpr
	\| LogicalExpr
	\| basicExpr
	}
	FunctionExpr {
	FunctionName "(" (FunctionArgument ("," FunctionArgument)*)? ")"
	}
	// -------------
	// End Filters
	// -------------

	segment[@isGroup=Segment] { ChildSegment \| DescendantSegment }

	ChildSegment {
	BracketedSelection \|
	("."
	(WildcardSelector \| MemberNameShorthand)
	)
	}

	BracketedSelection { "[" selector ("," selector)* "]" }

	MemberNameShorthand { nameFirst (nameFirst \| digit)* }

	DescendantSegment {
	".." (
	BracketedSelection \|
	WildcardSelector \|
	MemberNameShorthand
	)
	}

	@tokens {
	number { digit+ ("." digit)? (exp digit+ ("." digit)?)? }

	lcAlpha { $[a-z] }

	nameFirst {
	lcAlpha \|
	$[A-Z] \|
	"_" \|
	$[\u{80}-\u{d7ff}] \|
	// Skip surrogate codepoints
	$[\u{e000}-\u{10ffff}]
	}

	stringLiteral { "\"" (!["\n\\] \| "\\" _)* "\"" \| "'" (!['\n\\] \| "\\" _)* "'" }

	RootIdentifier { "$" }

	WildcardSelector { "*" }

	true { "true" }
	false { "false" }
	null { "null" }

	exp { $[eE] $[+\-]? }

	digit { @digit }

	whitespace { $[ \n\r\t] }
	}

	@skip { whitespace }