Created
September 19, 2025 18:18
-
-
Save kmicinski/67869b6ec7383d04656be07cd8e96892 to your computer and use it in GitHub Desktop.
Parsing left-associative operators in LL(k) grammars
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #lang racket | |
| ;; When we say an operator "associates to the left," | |
| ;; we mean that we interpret a construct like | |
| ;; 10 - 2 - 2 is interpreted as (10 - 2) - 2 rather | |
| ;; than the (incorrect) 10 - (2 - 2). Most typical | |
| ;; arithmetic operators like +, *, /, and - associate | |
| ;; to the left. | |
| ;; Unfortunately, LL parsers do not make this natural | |
| ;; The issue is this. If we try to write a grammar | |
| ;; like the following: | |
| ;; E → E - N | N | |
| ;; N → num | |
| ;; (((20 - 10) - 2) - 1) | |
| ;; E → E - N → E - N - N → E - N - N - N → ... → 20 - 10 - 2 - 1 | |
| ;; This grammar is not LL(k), from the point of view | |
| ;; of programming, the issue is that we would need | |
| ;; left recursion: | |
| ;; (define (parse-E stream) | |
| ;; (match (parse-E stream) <-- INFINITE LOOP | |
| ;; [`(,parsed-E ,rst) ...])) | |
| (define ex '((INT 3) MINUS (INT 5) MINUS (INT 1) MINUS (INT 12) MINUS (INT 20) eof)) | |
| ;; E → num - E | num | |
| (define (parse-E stream) | |
| (match stream | |
| [`((INT ,v) MINUS ,rst ...) | |
| ;; I saw an integer and a -, I need to parse an E | |
| ;; assume parse-E returns a two-element list | |
| ;; `(,ast ,rst) where ast is the returned ast | |
| ;; and rst is the rest of the input | |
| (match (parse-E rst) | |
| [`(,parsed-E ,rst+) | |
| `((- ,v ,parsed-E) ,rst+)])] | |
| [`((INT ,v) ,rst ...) | |
| `(,v ,rst)])) | |
| ;; Basic trick: to recognize that really | |
| ;; we wrote a regular expression which was | |
| ;; E → num (- num)* | |
| ;; and it's not too hard to implement this | |
| ;; using a loop (either a recursive function, etc.) | |
| ;; | |
| ;; Basically this means the result of parsing E | |
| ;; is a list of numbers like '(3 5 1) | |
| ;; | |
| ;; Well then, it's not too hard to write a little | |
| ;; function that walks over this list and emits | |
| ;; an expression of the desired structure. If we | |
| ;; have the values as a list, we can then fix | |
| ;; up the associativity post-hoc. | |
| ;; | |
| ;; '(3 5 1) -> '(- (- 3 5) 1) | |
| (define (fixup lst) | |
| (foldl (lambda (x acc) `(- ,acc ,x)) | |
| (first lst) | |
| (rest lst))) | |
| ;; Goal: recognize G → num (- num)* | |
| ;; returns `(,parsed-G ,rst) | |
| (define (parse-G stream) | |
| (match stream | |
| [`((INT ,n) ,rst ...) | |
| (parse-G-helper rst (list n))])) | |
| ;; recognizes (- num)* | |
| (define (parse-G-helper stream acc) | |
| (match stream | |
| [`(MINUS (INT ,n) ,rst ...) | |
| (parse-G-helper rst (cons n acc))] | |
| ;; we didn't match, now we're done | |
| [_ | |
| ;; our result is a two-element list | |
| ;; `(,parsed-E ,rst) | |
| `(,(fixup (reverse acc)) ,stream)])) | |
| ;; [email protected] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment