Created
June 22, 2022 13:13
-
-
Save swatson555/0d2fca887b70aba9de66b1eaf35c571e to your computer and use it in GitHub Desktop.
nanopass compiler for r0 language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env scheme --script | |
(import (nanopass)) | |
(define unique-var | |
(let () | |
(define count 0) | |
(lambda (name) | |
(let ([c count]) | |
(set! count (+ count 1)) | |
(string->symbol | |
(string-append (symbol->string name) "." (number->string c))))))) | |
;;; | |
;;; The r0 language from the book Essentials of Compiliation. | |
;;; | |
(define program-info? list?) | |
(define-language r0 | |
(terminals | |
(program-info (info)) | |
(integer (n))) | |
(Program (p) | |
(program info e)) | |
(Expr (e) | |
n | |
(- e) | |
(+ e0 e1) | |
(read))) | |
;; (define-parser parse r0) | |
;; or ... | |
(define-pass parse : * (p) -> r0 () | |
(Program : * (p) -> Program () | |
(cond | |
[(eq? 'program (car p)) `(program ,(cadr p) ,(Expr (caddr p)))])) | |
(Expr : * (e) -> Expr () | |
(cond | |
[(integer? e) e] | |
[(eq? 'read (car e)) `(read)] | |
[(eq? '+ (car e)) `(+ ,(Expr (cadr e)) ,(Expr (caddr e)))] | |
[(eq? '- (car e)) `(- ,(Expr (cadr e)))])) | |
(Program p)) | |
;;; | |
;;; Extends the r0 language with variables and let exprssions. | |
;;; | |
(define variable? symbol?) | |
(define-language r0-let | |
(extends r0) | |
(terminals | |
(+ (variable (x)))) | |
(Expr (e body) | |
(+ x) | |
(+ (let ((x e)) body)))) | |
;;; | |
;;; Remove complex operations. The code from this pass is based on | |
;;; the same pass from Keep's scheme to C nanopass compiler. | |
;;; | |
;;; Also called flatten. Takes an application exprsssion and makes | |
;;; all arguments atomic and without structure. | |
;;; | |
;;; So that, (+ (+ 1 2) 2) -> (let ((tmp (+ 1 2))) (+ tmp 2)) | |
;;; | |
(define-pass remove-complex : r0 (p) -> r0-let () | |
(definitions | |
(with-output-language (r0-let Expr) | |
(define build-let | |
(lambda (x* e* body) | |
(if (null? x*) | |
body | |
`(let ([,(car x*) ,(Expr (car e*))]) | |
,(build-let (cdr x*) (cdr e*) body))))) | |
(define simplify* | |
(lambda (e* f) | |
(let loop ([e* e*] [t* '()] [te* '()] [re* '()]) | |
(if (null? e*) | |
(build-let t* te* (f (reverse re*))) | |
(let ([e (car e*)]) | |
(nanopass-case (r0-let Expr) e | |
[,n (loop (cdr e*) t* te* (cons n re*))] | |
[else (let ([t (unique-var 'tmp)]) | |
(loop (cdr e*) (cons t t*) | |
(cons e te*) (cons t re*)))])))))))) | |
(Program : Program (p) -> Program () | |
[(program ,info ,e) `(program ,info ,(Expr e))]) | |
(Expr : Expr (e) -> Expr () | |
[,n n] | |
[(read) `(read)] | |
[(+ ,e0 ,e1) | |
(simplify* (list e0 e1) | |
(lambda (e*) `(+ ,(car e*) ,(cadr e*))))] | |
[(- ,e0) | |
(simplify* (list e0) | |
(lambda (e*) `(- ,(car e*))))]) | |
(Program p)) | |
;;; | |
;;; The c0 language from the book Essentials of Compilation. | |
;;; | |
(define-language c0 | |
(terminals | |
(integer (n)) | |
(variable (x)) | |
(program-info (info))) | |
(Program (p) | |
(program info t)) | |
(Arg (a) n x) | |
(Expr (e) | |
a | |
(read) | |
(- a) | |
(+ a0 a1)) | |
(Seq (se) | |
(seq s t)) | |
(Stmt (s) | |
(assign x e)) | |
(Tail (t) | |
se | |
(return e))) | |
;;; | |
;;; Make control paths explicit. | |
;;; | |
(define-pass explicate-control : r0-let (p) -> c0 () | |
(definitions | |
(with-output-language (c0 Seq) | |
(define (explicate-assign x e t) | |
(nanopass-case (r0-let Expr) e | |
[(let ((,x0 ,e0)) ,body) | |
(explicate-assign x0 e0 (explicate-assign x body t))] | |
[else | |
`(seq (assign ,x ,(Expr-let e)) ,t)])))) | |
(Program : Program (p) -> Program () | |
[(program ,info ,e) `(program ,info ,(Expr e))]) | |
(Expr-let : Expr (e) -> Expr () | |
[,n n] | |
[(read) `(read)] | |
[(- ,e0) `(- ,e0)] | |
[(+ ,e0 ,e1) `(+ ,e0 ,e1)]) | |
(Expr : Expr (e) -> Tail () | |
[,n | |
`(return ,n)] | |
[(read) | |
`(return (read))] | |
[(- ,e0) | |
`(return (- ,e0))] | |
[(+ ,e0 ,e1) | |
`(return (+ ,e0 ,e1))] | |
[(let ((,x ,e)) ,body) | |
(explicate-assign x e (Expr body))]) | |
(Program p)) | |
;;; | |
;;; aarch64 language. | |
;;; | |
;;; A subset of the aarch64 language. | |
;;; | |
;;; note! deref form is in this langauge | |
;;; even tho it probably shouldn't be. | |
;;; | |
(define register? | |
(lambda (x) | |
(memq x '(x0 x1 x2 x3 x4 | |
x5 x6 x7 x8 x9 | |
x10 x11 x12 x13 x14 | |
x15 x16 x17 x18 x19 | |
x20 x21 x22 x23 x24 | |
x25 x26 x27 x28 x29 | |
x30 x31 sp xzr)))) | |
(define-language aarch64 | |
(terminals | |
(integer (n)) | |
(register (r)) | |
(program-info (info))) | |
(Program (p) | |
(program info instrs)) | |
(Instrs (instrs) (instr ...)) | |
(Arg (a) (int n) (reg r) (deref r n)) | |
(Instr (instr) | |
(mov a0 a1) | |
(str a0 a1) | |
(ldr a0 a1) | |
(add a0 a1 a2) | |
(sub a0 a1 a2))) | |
;;; | |
;;; aarch64 variable language. | |
;;; | |
;;; The aarch64 langauge with locations as | |
;;; variables instead of registers as locations. | |
;;; | |
(define-language aarch64-var | |
(extends aarch64) | |
(terminals | |
(+ (variable (x)))) | |
(Arg (a) | |
(- (deref r n)) | |
(+ (var x)))) | |
;;; | |
;;; Select instructions. | |
;;; | |
(define-pass select-instructions : c0 (p) -> aarch64-var () | |
(definitions | |
(with-output-language (aarch64-var Instrs) | |
(define append | |
(lambda (instrs instrs*) | |
(nanopass-case (aarch64-var Instrs) instrs | |
[(,instr ...) | |
(define left-instrs instr) | |
(nanopass-case (aarch64-var Instrs) instrs* | |
[(,instr ...) | |
(define right-instrs instr) | |
`(,left-instrs ... ,right-instrs ...)])])))) | |
(with-output-language (aarch64-var Arg) | |
(define select-argument | |
(lambda (arg) | |
(nanopass-case (c0 Arg) arg | |
[,n `(int ,n)] | |
[,x `(var ,x)]))))) | |
(Program : Program (p) -> Program () | |
[(program ,info ,t) `(program ,info ,(select t))]) | |
(select : Tail (t) -> Instrs | |
[(return ,a) | |
`((mov (reg x0) ,(select-argument a)))] | |
[(return (- ,a)) | |
`((sub (reg x0) (int 0) ,(select-argument a)))] | |
[(return (+ ,a0 ,a1)) | |
`((mov (reg x0) ,(select-argument a0)) | |
(add (reg x0) (reg x0) ,(select-argument a1)))] | |
[(seq (assign ,x ,a) ,t) | |
(append `((mov ,(select-argument x) ,(select-argument a))) | |
(select t))] | |
[(seq (assign ,x (- ,a)) ,t) | |
(append `((sub ,(select-argument x) (int 0) ,(select-argument a))) | |
(select t))] | |
[(seq (assign ,x (+ ,a0 ,a1)) ,t) | |
(append `((add ,(select-argument x) ,(select-argument a0) ,(select-argument a1))) | |
(select t))]) | |
(Program p)) | |
;;; | |
;;; Assign homes to variable locations. | |
;;; | |
;;; Unfortunatly there are a lot of combinations | |
;;; since aarch64 uses 3 argument positions for | |
;;; arithmetic. | |
;;; | |
(define-pass assign-homes : aarch64-var (p) -> aarch64 () | |
(definitions | |
(define allocated 0) | |
(define variables '()) | |
(define location | |
(lambda (var) | |
(define stack-location (assq var variables)) | |
(if stack-location | |
(cdr stack-location) | |
(begin | |
(set! variables (cons (cons var (* allocated 8)) variables)) | |
(set! allocated (+ allocated 1)) | |
(location var))))) | |
(with-output-language (aarch64 Instr) | |
(define replace | |
(lambda (instr) | |
(nanopass-case (aarch64-var Instr) instr | |
[(mov (reg ,r) (var ,x)) | |
`(mov (reg ,r) (deref sp ,(location x)))] | |
[(mov (var ,x) (reg ,r)) | |
`(mov (deref sp ,(location x)) (reg ,r))] | |
[(mov (var ,x0) (var ,x1)) | |
`(mov (deref sp ,(location x0)) (deref sp ,(location x1)))] | |
[(add (var ,x) (int ,n0) (int ,n1)) | |
`(add (deref sp ,(location x)) (int ,n0) (int ,n1))] | |
[(add (var ,x0) (var ,x1) (int ,n)) | |
`(add (deref sp ,(location x0)) | |
(deref sp ,(location x1)) | |
(int ,n))] | |
[(add (var ,x0) (int ,n) (var ,x1)) | |
`(add (deref sp ,(location x0)) | |
(int ,n) | |
(deref sp ,(location x1)))] | |
[(add (reg ,r0) (reg ,r1) (var ,x)) | |
`(add (reg ,r0) | |
(reg ,r1) | |
(deref sp ,(location x)))] | |
[(add (var ,x0) (var ,x1) (var ,x2)) | |
`(add (deref sp ,(location x0)) | |
(deref sp ,(location x1)) | |
(deref sp ,(location x2)))] | |
[(add (reg ,r0) (reg ,r1) (int ,n)) | |
`(add (reg ,r0) | |
(reg ,r1) | |
(int ,n))] | |
[(sub (var ,x0) (int ,n0) (int ,n1)) | |
`(sub (deref sp ,(location x0)) | |
(int ,n0) | |
(int ,n1))] | |
[(sub (var ,x0) (int ,n) (var ,x1)) | |
`(sub (deref sp ,(location x0)) | |
(int ,n) | |
(deref sp ,(location x1)))] | |
[(sub (reg ,r) (int ,n) (var ,x)) | |
`(sub (reg ,r) | |
(int ,n) | |
(deref sp ,(location x)))] | |
[else | |
(Instr instr)]))))) | |
(Program : Program (p) -> Program () | |
[(program ,info ,instrs) | |
(define aarch64-instrs (assign instrs)) | |
(define aarch64-info (cons (cons 'stack-size (* allocated 8)) info)) | |
`(program ,aarch64-info ,aarch64-instrs)]) | |
(Instr : Instr (instr) -> Instr () | |
[(mov ,[a0] ,[a1]) `(mov ,a0 ,a1)] | |
[(str ,[a0] ,[a1]) `(str ,a0 ,a1)] | |
[(ldr ,[a0] ,[a1]) `(ldr ,a0 ,a1)] | |
[(add ,[a0] ,[a1] ,[a2]) `(add ,a0 ,a1 ,a2)] | |
[(sub ,[a0] ,[a1] ,[a2]) `(sub ,a0 ,a1 ,a2)]) | |
(assign : Instrs (instrs) -> Instrs () | |
[(,instr ...) | |
`(,(map replace instr) ...)]) | |
(Program p)) | |
;;; | |
;;; Patch instructions for assembly. | |
;;; | |
(define-pass patch-instructions : aarch64 (p) -> aarch64 () | |
(definitions | |
(with-output-language (aarch64 Instr) | |
(define replace | |
(lambda (instrs) | |
(if (null? instrs) | |
'() | |
(nanopass-case (aarch64 Instr) (car instrs) | |
[(add (deref ,r0 ,n0) (deref ,r1 ,n1) (deref ,r2 ,n2)) | |
(append (list `(ldr (reg x0) (deref ,r1 ,n1)) | |
`(ldr (reg x1) (deref ,r2 ,n2)) | |
`(add (reg x1) (reg x0) (reg x1)) | |
`(str (reg x1) (deref ,r0 ,n0))) | |
(replace (cdr instrs)))] | |
[(add (deref ,r ,n) (int ,n0) (int ,n1)) | |
(append (list `(mov (reg x0) (int ,n0)) | |
`(add (reg x1) (reg x0) (int ,n1)) | |
`(str (reg x1) (deref ,r ,n))) | |
(replace (cdr instrs)))] | |
[(add (deref ,r0 ,n0) (deref ,r1 ,n1) (int ,n2)) | |
(append (list `(ldr (reg x1) (deref ,r1 ,n1)) | |
`(add (reg x1) (reg x1) (int ,n2)) | |
`(str (reg x1) (deref ,r0 ,n0))) | |
(replace (cdr instrs)))] | |
[(add (reg ,r0) (reg ,r1) (deref ,r ,n)) | |
(append (list `(ldr (reg x1) (deref ,r ,n)) | |
`(add (reg ,r0) (reg ,r1) (reg x1))) | |
(replace (cdr instrs)))] | |
[(add (deref ,r0 ,n0) (int ,n) (deref ,r1 ,n1)) | |
(append (list `(ldr (reg x1) (deref ,r1 ,n1)) | |
`(add (reg x1) (reg x1) (int ,n)) | |
`(str (reg x1) (deref ,r0 ,n0))) | |
(replace (cdr instrs)))] | |
[(sub (deref ,r0 ,n0) | |
(int ,n1) | |
(int ,n2)) | |
(append (list `(mov (reg x1) (reg xzr)) | |
`(sub (reg x1) (reg x1) (int ,n2)) | |
`(str (reg x1) (deref ,r0 ,n0))) | |
(replace (cdr instrs)))] | |
[(sub (deref ,r0 ,n0) | |
(int ,n) | |
(deref ,r1 ,n1)) | |
(append (list `(mov (reg x0) (reg xzr)) | |
`(ldr (reg x1) (deref ,r1 ,n1)) | |
`(sub (reg x1) (reg x0) (reg x1)) | |
`(str (reg x1) (deref ,r0 ,n0))) | |
(replace (cdr instrs)))] | |
[(sub (reg ,r) | |
(int ,n) | |
(deref ,r0 ,n0)) | |
(append (list `(mov (reg x0) (reg xzr)) | |
`(ldr (reg x1) (deref ,r0 ,n0)) | |
`(sub (reg ,r) (reg x0) (reg x1))) | |
(replace (cdr instrs)))] | |
[(sub (reg ,r) (int ,n0) (int ,n1)) | |
(append (list `(mov (reg x1) (reg xzr)) | |
`(sub (reg ,r) (reg x1) (int ,n1))) | |
(replace (cdr instrs)))] | |
[(mov (reg ,r0) (deref ,r1 ,n)) | |
(cons `(ldr (reg ,r0) (deref ,r1 ,n)) | |
(replace (cdr instrs)))] | |
[else | |
(cons (car instrs) (replace (cdr instrs)))])))))) | |
(Program : Program (p) -> Program () | |
[(program ,info ,instrs) `(program ,info ,(patch instrs))]) | |
(patch : Instrs (instrs) -> Instrs () | |
[(,instr ...) | |
`(,(replace instr) ...)]) | |
(Program p)) | |
;;; | |
;;; Emit assembly listing. | |
;;; | |
(define-pass print-instructions : aarch64 (port p) -> * () | |
(definitions | |
(define emit-prelude | |
(lambda (size) | |
(define text | |
(format | |
" | |
.globl _main | |
.align 2 | |
.text | |
_main: | |
sub sp, sp, 16 | |
str x29, [sp, 0] | |
str x30, [sp, 8] | |
sub sp, sp, ~a ;; we're not really using the stack pointer properly here. | |
b start | |
" size)) | |
(fprintf port text))) | |
(define emit-conclusion | |
(lambda (size) | |
(define text | |
(format | |
" | |
conclusion: | |
str x0, [sp] | |
adrp x0, result_string@PAGE | |
add x0, x0, result_string@PAGEOFF | |
bl _printf | |
add sp, sp, ~a | |
ldr x30, [sp, 8] | |
ldr x29, [sp, 0] | |
add sp, sp, 16 | |
mov x0, 0 | |
ret | |
.data | |
result_string: | |
.asciz \"arithmetic value: %ld\\n\" | |
" size)) | |
(fprintf port text))) | |
(define emit-instruction | |
(lambda (instr) | |
(nanopass-case (aarch64 Instr) instr | |
[(mov (reg ,r) (int ,n)) (fprintf port " mov ~a, ~a~n" r n)] | |
[(mov (reg ,r0) (reg ,r1)) (fprintf port " mov ~a, ~a~n" r0 r1)] | |
[(str (reg ,r) (deref ,r0 ,n)) (fprintf port " str ~a, [~a, ~a]~n" r r0 n)] | |
[(ldr (reg ,r) (deref ,r0 ,n)) (fprintf port " ldr ~a, [~a, ~a]~n" r r0 n)] | |
[(add (reg ,r) (int ,n0) (int ,n1)) (fprintf port " add ~a, ~a, ~a~n" r n0 n1)] | |
[(add (reg ,r0) (reg ,r1) (int ,n)) (fprintf port " add ~a, ~a, ~a~n" r0 r1 n)] | |
[(add (reg ,r0) (reg ,r1) (reg ,r2)) (fprintf port " add ~a, ~a, ~a~n" r0 r1 r2)] | |
[(sub (reg ,r0) (reg ,r1) (int ,n0)) (fprintf port " sub ~a, ~a, ~a~n" r0 r1 n0)] | |
[(sub (reg ,r0) (reg ,r1) (reg ,r2)) (fprintf port " sub ~a, ~a, ~a~n" r0 r1 r2)]))) | |
(define emit-instructions | |
(lambda (instrs) | |
(fprintf port "start:~n") | |
(for-each emit-instruction instrs) | |
(fprintf port " b conclusion~n")))) | |
(Program : Program (p) -> * () | |
[(program ,info ,instrs) | |
`(program ,info ,(emit instrs (cdr (assq 'stack-size info))))]) | |
(emit : Instrs (instrs stack-size) -> * () | |
[(,instr ...) | |
;; macintosh uses a 16-byte alignment... | |
(define size | |
(if (= (remainder stack-size 16) 0) | |
stack-size | |
(+ 8 stack-size))) | |
(emit-prelude size) | |
(emit-instructions instr) | |
(emit-conclusion size)]) | |
(Program p)) | |
;;;; | |
;;;; Command Line Interface | |
;;;; | |
;;;; Either pass the program text to the invocation like, | |
;;;; $ ./compile "(+ 1 2)" | |
;;;; or ... | |
;;;; $ ./compile | |
;;;; (+ 1 2) | |
;;;; $ | |
;;;; | |
(define script-args (command-line)) | |
(define program-text | |
(if (>= (length script-args) 2) | |
(read (open-string-input-port (cadr script-args))) | |
(read))) | |
(define compiled-text | |
(patch-instructions | |
(assign-homes | |
(select-instructions | |
(explicate-control | |
(remove-complex | |
(parse `(program () ,program-text)))))))) | |
(define-values (to-stdin from-stdout from-stderr process-id) | |
(open-process-ports "gcc -o r0 -x assembler -" | |
(buffer-mode block) | |
(native-transcoder))) | |
(print-instructions to-stdin compiled-text) | |
(close-output-port to-stdin) | |
(unless (port-eof? from-stdout) | |
(printf (get-string-all from-stdout))) | |
(unless (port-eof? from-stderr) | |
(printf (get-string-all from-stderr))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment