Skip to content

Instantly share code, notes, and snippets.

@swatson555
Created June 22, 2022 13:13
Show Gist options
  • Save swatson555/0d2fca887b70aba9de66b1eaf35c571e to your computer and use it in GitHub Desktop.
Save swatson555/0d2fca887b70aba9de66b1eaf35c571e to your computer and use it in GitHub Desktop.
nanopass compiler for r0 language
#!/usr/bin/env scheme --script
(import (nanopass))
(define unique-var
(let ()
(define count 0)
(lambda (name)
(let ([c count])
(set! count (+ count 1))
(string->symbol
(string-append (symbol->string name) "." (number->string c)))))))
;;;
;;; The r0 language from the book Essentials of Compiliation.
;;;
(define program-info? list?)
(define-language r0
(terminals
(program-info (info))
(integer (n)))
(Program (p)
(program info e))
(Expr (e)
n
(- e)
(+ e0 e1)
(read)))
;; (define-parser parse r0)
;; or ...
(define-pass parse : * (p) -> r0 ()
(Program : * (p) -> Program ()
(cond
[(eq? 'program (car p)) `(program ,(cadr p) ,(Expr (caddr p)))]))
(Expr : * (e) -> Expr ()
(cond
[(integer? e) e]
[(eq? 'read (car e)) `(read)]
[(eq? '+ (car e)) `(+ ,(Expr (cadr e)) ,(Expr (caddr e)))]
[(eq? '- (car e)) `(- ,(Expr (cadr e)))]))
(Program p))
;;;
;;; Extends the r0 language with variables and let exprssions.
;;;
(define variable? symbol?)
(define-language r0-let
(extends r0)
(terminals
(+ (variable (x))))
(Expr (e body)
(+ x)
(+ (let ((x e)) body))))
;;;
;;; Remove complex operations. The code from this pass is based on
;;; the same pass from Keep's scheme to C nanopass compiler.
;;;
;;; Also called flatten. Takes an application exprsssion and makes
;;; all arguments atomic and without structure.
;;;
;;; So that, (+ (+ 1 2) 2) -> (let ((tmp (+ 1 2))) (+ tmp 2))
;;;
(define-pass remove-complex : r0 (p) -> r0-let ()
(definitions
(with-output-language (r0-let Expr)
(define build-let
(lambda (x* e* body)
(if (null? x*)
body
`(let ([,(car x*) ,(Expr (car e*))])
,(build-let (cdr x*) (cdr e*) body)))))
(define simplify*
(lambda (e* f)
(let loop ([e* e*] [t* '()] [te* '()] [re* '()])
(if (null? e*)
(build-let t* te* (f (reverse re*)))
(let ([e (car e*)])
(nanopass-case (r0-let Expr) e
[,n (loop (cdr e*) t* te* (cons n re*))]
[else (let ([t (unique-var 'tmp)])
(loop (cdr e*) (cons t t*)
(cons e te*) (cons t re*)))]))))))))
(Program : Program (p) -> Program ()
[(program ,info ,e) `(program ,info ,(Expr e))])
(Expr : Expr (e) -> Expr ()
[,n n]
[(read) `(read)]
[(+ ,e0 ,e1)
(simplify* (list e0 e1)
(lambda (e*) `(+ ,(car e*) ,(cadr e*))))]
[(- ,e0)
(simplify* (list e0)
(lambda (e*) `(- ,(car e*))))])
(Program p))
;;;
;;; The c0 language from the book Essentials of Compilation.
;;;
(define-language c0
(terminals
(integer (n))
(variable (x))
(program-info (info)))
(Program (p)
(program info t))
(Arg (a) n x)
(Expr (e)
a
(read)
(- a)
(+ a0 a1))
(Seq (se)
(seq s t))
(Stmt (s)
(assign x e))
(Tail (t)
se
(return e)))
;;;
;;; Make control paths explicit.
;;;
(define-pass explicate-control : r0-let (p) -> c0 ()
(definitions
(with-output-language (c0 Seq)
(define (explicate-assign x e t)
(nanopass-case (r0-let Expr) e
[(let ((,x0 ,e0)) ,body)
(explicate-assign x0 e0 (explicate-assign x body t))]
[else
`(seq (assign ,x ,(Expr-let e)) ,t)]))))
(Program : Program (p) -> Program ()
[(program ,info ,e) `(program ,info ,(Expr e))])
(Expr-let : Expr (e) -> Expr ()
[,n n]
[(read) `(read)]
[(- ,e0) `(- ,e0)]
[(+ ,e0 ,e1) `(+ ,e0 ,e1)])
(Expr : Expr (e) -> Tail ()
[,n
`(return ,n)]
[(read)
`(return (read))]
[(- ,e0)
`(return (- ,e0))]
[(+ ,e0 ,e1)
`(return (+ ,e0 ,e1))]
[(let ((,x ,e)) ,body)
(explicate-assign x e (Expr body))])
(Program p))
;;;
;;; aarch64 language.
;;;
;;; A subset of the aarch64 language.
;;;
;;; note! deref form is in this langauge
;;; even tho it probably shouldn't be.
;;;
(define register?
(lambda (x)
(memq x '(x0 x1 x2 x3 x4
x5 x6 x7 x8 x9
x10 x11 x12 x13 x14
x15 x16 x17 x18 x19
x20 x21 x22 x23 x24
x25 x26 x27 x28 x29
x30 x31 sp xzr))))
(define-language aarch64
(terminals
(integer (n))
(register (r))
(program-info (info)))
(Program (p)
(program info instrs))
(Instrs (instrs) (instr ...))
(Arg (a) (int n) (reg r) (deref r n))
(Instr (instr)
(mov a0 a1)
(str a0 a1)
(ldr a0 a1)
(add a0 a1 a2)
(sub a0 a1 a2)))
;;;
;;; aarch64 variable language.
;;;
;;; The aarch64 langauge with locations as
;;; variables instead of registers as locations.
;;;
(define-language aarch64-var
(extends aarch64)
(terminals
(+ (variable (x))))
(Arg (a)
(- (deref r n))
(+ (var x))))
;;;
;;; Select instructions.
;;;
(define-pass select-instructions : c0 (p) -> aarch64-var ()
(definitions
(with-output-language (aarch64-var Instrs)
(define append
(lambda (instrs instrs*)
(nanopass-case (aarch64-var Instrs) instrs
[(,instr ...)
(define left-instrs instr)
(nanopass-case (aarch64-var Instrs) instrs*
[(,instr ...)
(define right-instrs instr)
`(,left-instrs ... ,right-instrs ...)])]))))
(with-output-language (aarch64-var Arg)
(define select-argument
(lambda (arg)
(nanopass-case (c0 Arg) arg
[,n `(int ,n)]
[,x `(var ,x)])))))
(Program : Program (p) -> Program ()
[(program ,info ,t) `(program ,info ,(select t))])
(select : Tail (t) -> Instrs
[(return ,a)
`((mov (reg x0) ,(select-argument a)))]
[(return (- ,a))
`((sub (reg x0) (int 0) ,(select-argument a)))]
[(return (+ ,a0 ,a1))
`((mov (reg x0) ,(select-argument a0))
(add (reg x0) (reg x0) ,(select-argument a1)))]
[(seq (assign ,x ,a) ,t)
(append `((mov ,(select-argument x) ,(select-argument a)))
(select t))]
[(seq (assign ,x (- ,a)) ,t)
(append `((sub ,(select-argument x) (int 0) ,(select-argument a)))
(select t))]
[(seq (assign ,x (+ ,a0 ,a1)) ,t)
(append `((add ,(select-argument x) ,(select-argument a0) ,(select-argument a1)))
(select t))])
(Program p))
;;;
;;; Assign homes to variable locations.
;;;
;;; Unfortunatly there are a lot of combinations
;;; since aarch64 uses 3 argument positions for
;;; arithmetic.
;;;
(define-pass assign-homes : aarch64-var (p) -> aarch64 ()
(definitions
(define allocated 0)
(define variables '())
(define location
(lambda (var)
(define stack-location (assq var variables))
(if stack-location
(cdr stack-location)
(begin
(set! variables (cons (cons var (* allocated 8)) variables))
(set! allocated (+ allocated 1))
(location var)))))
(with-output-language (aarch64 Instr)
(define replace
(lambda (instr)
(nanopass-case (aarch64-var Instr) instr
[(mov (reg ,r) (var ,x))
`(mov (reg ,r) (deref sp ,(location x)))]
[(mov (var ,x) (reg ,r))
`(mov (deref sp ,(location x)) (reg ,r))]
[(mov (var ,x0) (var ,x1))
`(mov (deref sp ,(location x0)) (deref sp ,(location x1)))]
[(add (var ,x) (int ,n0) (int ,n1))
`(add (deref sp ,(location x)) (int ,n0) (int ,n1))]
[(add (var ,x0) (var ,x1) (int ,n))
`(add (deref sp ,(location x0))
(deref sp ,(location x1))
(int ,n))]
[(add (var ,x0) (int ,n) (var ,x1))
`(add (deref sp ,(location x0))
(int ,n)
(deref sp ,(location x1)))]
[(add (reg ,r0) (reg ,r1) (var ,x))
`(add (reg ,r0)
(reg ,r1)
(deref sp ,(location x)))]
[(add (var ,x0) (var ,x1) (var ,x2))
`(add (deref sp ,(location x0))
(deref sp ,(location x1))
(deref sp ,(location x2)))]
[(add (reg ,r0) (reg ,r1) (int ,n))
`(add (reg ,r0)
(reg ,r1)
(int ,n))]
[(sub (var ,x0) (int ,n0) (int ,n1))
`(sub (deref sp ,(location x0))
(int ,n0)
(int ,n1))]
[(sub (var ,x0) (int ,n) (var ,x1))
`(sub (deref sp ,(location x0))
(int ,n)
(deref sp ,(location x1)))]
[(sub (reg ,r) (int ,n) (var ,x))
`(sub (reg ,r)
(int ,n)
(deref sp ,(location x)))]
[else
(Instr instr)])))))
(Program : Program (p) -> Program ()
[(program ,info ,instrs)
(define aarch64-instrs (assign instrs))
(define aarch64-info (cons (cons 'stack-size (* allocated 8)) info))
`(program ,aarch64-info ,aarch64-instrs)])
(Instr : Instr (instr) -> Instr ()
[(mov ,[a0] ,[a1]) `(mov ,a0 ,a1)]
[(str ,[a0] ,[a1]) `(str ,a0 ,a1)]
[(ldr ,[a0] ,[a1]) `(ldr ,a0 ,a1)]
[(add ,[a0] ,[a1] ,[a2]) `(add ,a0 ,a1 ,a2)]
[(sub ,[a0] ,[a1] ,[a2]) `(sub ,a0 ,a1 ,a2)])
(assign : Instrs (instrs) -> Instrs ()
[(,instr ...)
`(,(map replace instr) ...)])
(Program p))
;;;
;;; Patch instructions for assembly.
;;;
(define-pass patch-instructions : aarch64 (p) -> aarch64 ()
(definitions
(with-output-language (aarch64 Instr)
(define replace
(lambda (instrs)
(if (null? instrs)
'()
(nanopass-case (aarch64 Instr) (car instrs)
[(add (deref ,r0 ,n0) (deref ,r1 ,n1) (deref ,r2 ,n2))
(append (list `(ldr (reg x0) (deref ,r1 ,n1))
`(ldr (reg x1) (deref ,r2 ,n2))
`(add (reg x1) (reg x0) (reg x1))
`(str (reg x1) (deref ,r0 ,n0)))
(replace (cdr instrs)))]
[(add (deref ,r ,n) (int ,n0) (int ,n1))
(append (list `(mov (reg x0) (int ,n0))
`(add (reg x1) (reg x0) (int ,n1))
`(str (reg x1) (deref ,r ,n)))
(replace (cdr instrs)))]
[(add (deref ,r0 ,n0) (deref ,r1 ,n1) (int ,n2))
(append (list `(ldr (reg x1) (deref ,r1 ,n1))
`(add (reg x1) (reg x1) (int ,n2))
`(str (reg x1) (deref ,r0 ,n0)))
(replace (cdr instrs)))]
[(add (reg ,r0) (reg ,r1) (deref ,r ,n))
(append (list `(ldr (reg x1) (deref ,r ,n))
`(add (reg ,r0) (reg ,r1) (reg x1)))
(replace (cdr instrs)))]
[(add (deref ,r0 ,n0) (int ,n) (deref ,r1 ,n1))
(append (list `(ldr (reg x1) (deref ,r1 ,n1))
`(add (reg x1) (reg x1) (int ,n))
`(str (reg x1) (deref ,r0 ,n0)))
(replace (cdr instrs)))]
[(sub (deref ,r0 ,n0)
(int ,n1)
(int ,n2))
(append (list `(mov (reg x1) (reg xzr))
`(sub (reg x1) (reg x1) (int ,n2))
`(str (reg x1) (deref ,r0 ,n0)))
(replace (cdr instrs)))]
[(sub (deref ,r0 ,n0)
(int ,n)
(deref ,r1 ,n1))
(append (list `(mov (reg x0) (reg xzr))
`(ldr (reg x1) (deref ,r1 ,n1))
`(sub (reg x1) (reg x0) (reg x1))
`(str (reg x1) (deref ,r0 ,n0)))
(replace (cdr instrs)))]
[(sub (reg ,r)
(int ,n)
(deref ,r0 ,n0))
(append (list `(mov (reg x0) (reg xzr))
`(ldr (reg x1) (deref ,r0 ,n0))
`(sub (reg ,r) (reg x0) (reg x1)))
(replace (cdr instrs)))]
[(sub (reg ,r) (int ,n0) (int ,n1))
(append (list `(mov (reg x1) (reg xzr))
`(sub (reg ,r) (reg x1) (int ,n1)))
(replace (cdr instrs)))]
[(mov (reg ,r0) (deref ,r1 ,n))
(cons `(ldr (reg ,r0) (deref ,r1 ,n))
(replace (cdr instrs)))]
[else
(cons (car instrs) (replace (cdr instrs)))]))))))
(Program : Program (p) -> Program ()
[(program ,info ,instrs) `(program ,info ,(patch instrs))])
(patch : Instrs (instrs) -> Instrs ()
[(,instr ...)
`(,(replace instr) ...)])
(Program p))
;;;
;;; Emit assembly listing.
;;;
(define-pass print-instructions : aarch64 (port p) -> * ()
(definitions
(define emit-prelude
(lambda (size)
(define text
(format
"
.globl _main
.align 2
.text
_main:
sub sp, sp, 16
str x29, [sp, 0]
str x30, [sp, 8]
sub sp, sp, ~a ;; we're not really using the stack pointer properly here.
b start
" size))
(fprintf port text)))
(define emit-conclusion
(lambda (size)
(define text
(format
"
conclusion:
str x0, [sp]
adrp x0, result_string@PAGE
add x0, x0, result_string@PAGEOFF
bl _printf
add sp, sp, ~a
ldr x30, [sp, 8]
ldr x29, [sp, 0]
add sp, sp, 16
mov x0, 0
ret
.data
result_string:
.asciz \"arithmetic value: %ld\\n\"
" size))
(fprintf port text)))
(define emit-instruction
(lambda (instr)
(nanopass-case (aarch64 Instr) instr
[(mov (reg ,r) (int ,n)) (fprintf port " mov ~a, ~a~n" r n)]
[(mov (reg ,r0) (reg ,r1)) (fprintf port " mov ~a, ~a~n" r0 r1)]
[(str (reg ,r) (deref ,r0 ,n)) (fprintf port " str ~a, [~a, ~a]~n" r r0 n)]
[(ldr (reg ,r) (deref ,r0 ,n)) (fprintf port " ldr ~a, [~a, ~a]~n" r r0 n)]
[(add (reg ,r) (int ,n0) (int ,n1)) (fprintf port " add ~a, ~a, ~a~n" r n0 n1)]
[(add (reg ,r0) (reg ,r1) (int ,n)) (fprintf port " add ~a, ~a, ~a~n" r0 r1 n)]
[(add (reg ,r0) (reg ,r1) (reg ,r2)) (fprintf port " add ~a, ~a, ~a~n" r0 r1 r2)]
[(sub (reg ,r0) (reg ,r1) (int ,n0)) (fprintf port " sub ~a, ~a, ~a~n" r0 r1 n0)]
[(sub (reg ,r0) (reg ,r1) (reg ,r2)) (fprintf port " sub ~a, ~a, ~a~n" r0 r1 r2)])))
(define emit-instructions
(lambda (instrs)
(fprintf port "start:~n")
(for-each emit-instruction instrs)
(fprintf port " b conclusion~n"))))
(Program : Program (p) -> * ()
[(program ,info ,instrs)
`(program ,info ,(emit instrs (cdr (assq 'stack-size info))))])
(emit : Instrs (instrs stack-size) -> * ()
[(,instr ...)
;; macintosh uses a 16-byte alignment...
(define size
(if (= (remainder stack-size 16) 0)
stack-size
(+ 8 stack-size)))
(emit-prelude size)
(emit-instructions instr)
(emit-conclusion size)])
(Program p))
;;;;
;;;; Command Line Interface
;;;;
;;;; Either pass the program text to the invocation like,
;;;; $ ./compile "(+ 1 2)"
;;;; or ...
;;;; $ ./compile
;;;; (+ 1 2)
;;;; $
;;;;
(define script-args (command-line))
(define program-text
(if (>= (length script-args) 2)
(read (open-string-input-port (cadr script-args)))
(read)))
(define compiled-text
(patch-instructions
(assign-homes
(select-instructions
(explicate-control
(remove-complex
(parse `(program () ,program-text))))))))
(define-values (to-stdin from-stdout from-stderr process-id)
(open-process-ports "gcc -o r0 -x assembler -"
(buffer-mode block)
(native-transcoder)))
(print-instructions to-stdin compiled-text)
(close-output-port to-stdin)
(unless (port-eof? from-stdout)
(printf (get-string-all from-stdout)))
(unless (port-eof? from-stderr)
(printf (get-string-all from-stderr)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment