Last active
August 15, 2021 20:05
-
-
Save trufae/6ddb4a6abff1000fd61d2e91095cbb52 to your computer and use it in GitHub Desktop.
Assembler proposal for the V compiler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module main | |
| import os | |
| import encoding.hex | |
| struct AsmLabel { | |
| name string | |
| off int | |
| } | |
| struct AsmBlock { | |
| mut: | |
| addr u64 | |
| code []byte | |
| relocs []AsmReloc | |
| labels []AsmLabel | |
| } | |
| enum RelocType { | |
| abs | |
| rel | |
| } | |
| struct AsmReloc { | |
| typ RelocType | |
| off int | |
| siz int | |
| name string | |
| delta int | |
| } | |
| enum AsmArg { | |
| reg | |
| imm | |
| imm8 | |
| adr | |
| adr8 | |
| } | |
| const cpuregs = [ | |
| 'rax', | |
| 'rbx', | |
| 'rcx', | |
| 'rdx', | |
| 'rsi', | |
| 'rdi', | |
| 'rsp', | |
| 'rbp', | |
| ] | |
| fn check_syntax(code []string, nargs int, argtyp []AsmArg) ? { | |
| ins := code[0] | |
| if code.len != nargs + 1 { | |
| return error('expected $nargs for $ins') | |
| } | |
| for i := 0; i < nargs; i++ { | |
| cod := code[i + 1] | |
| match argtyp[i] { | |
| .reg { | |
| if cod in cpuregs { | |
| } else { | |
| return error('invalid register name for $ins') | |
| } | |
| } | |
| .imm { | |
| if cod[0] != `0` && cod.int() == 0 && !cod[0].is_letter() { | |
| return error('invalid immediate $cod for $ins') | |
| } | |
| } | |
| .imm8 { | |
| n := cod.int() | |
| if cod[0] != `0` && n == 0 && !cod[0].is_letter() { | |
| return error('invalid immediate $cod for $ins') | |
| } | |
| if (n >> 8) > 0 { | |
| return error('invalid 8bit immediate $cod for $ins') | |
| } | |
| } | |
| .adr, .adr8 { | |
| // no checks at assembly time, relocation must happen later | |
| } | |
| } | |
| } | |
| } | |
| fn (mut block AsmBlock) assemble_instruction(code []string) ?[]byte { | |
| match code[0] { | |
| 'nop' { | |
| check_syntax(code, 0, []) ? | |
| return [byte(0x90)] | |
| } | |
| 'syscall' { | |
| check_syntax(code, 0, []) ? | |
| return [byte(0x0f), 0x05] | |
| } | |
| 'jmp' { // jmp | |
| check_syntax(code, 1, [.imm8]) or { return err } | |
| // only short jump for now | |
| block.relocs << AsmReloc{ | |
| typ: .rel | |
| off: block.code.len + 1 | |
| name: code[1] | |
| siz: 1 | |
| delta: 2 | |
| } | |
| return [byte(0xeb), 0] | |
| } | |
| 'int' { // int 0x80 | |
| check_syntax(code, 1, [.imm]) or { return err } | |
| imm := byte(code[1].int()) | |
| return [byte(0xcd), imm] | |
| } | |
| 'int3' { // int3 | |
| check_syntax(code, 0, []) or { return err } | |
| return [byte(0xcc)] | |
| } | |
| 'push' { // push reg|imm | |
| check_syntax(code, 1, [.reg]) or { | |
| check_syntax(code, 1, [.imm]) or { return err } | |
| // XXX only 0-255 value is supported | |
| imm := byte(code[1].int()) | |
| return [byte(0x6a), imm] | |
| } | |
| reg := cpuregs.index(code[1]) | |
| return [byte(0x50 + reg)] | |
| } | |
| 'lea' { // mov reg, addr | |
| check_syntax(code, 2, [.reg, .adr]) ? | |
| dst := code[2] | |
| // IGNORED reg := cpuregs.index(code[1]) | |
| block.relocs << AsmReloc{ | |
| typ: .rel | |
| off: block.code.len + 4 | |
| name: dst | |
| siz: 4 | |
| delta: 7 | |
| } | |
| return [byte(0x48), 0x8d, 0x05, 0, 0, 0, 0] | |
| } | |
| 'mov' { // mov reg, imm | |
| check_syntax(code, 2, [.reg, .imm]) ? | |
| // reg := code[1] | |
| dst := code[2].int() | |
| i0 := byte(dst & 0xff) | |
| i1 := byte((dst >> 8) & 0xff) | |
| i2 := byte((dst >> 16) & 0xff) | |
| i3 := byte((dst >> 24) & 0xff) | |
| return [byte(0xb8), i3, i2, i1, i0] | |
| } | |
| else { | |
| return error('unknown instruction: ${code[0]}') | |
| } | |
| } | |
| } | |
| [inline] | |
| fn byt(n int, s int) byte { | |
| return byte((n >> (s * 8)) & 0xff) | |
| } | |
| fn (block AsmBlock) resolve(name string) int { | |
| for lab in block.labels { | |
| if lab.name == name { | |
| return lab.off | |
| } | |
| } | |
| return 0 | |
| } | |
| fn (mut block AsmBlock) patch_relocs() { | |
| for rel in block.relocs { | |
| match rel.typ { | |
| .abs { | |
| if rel.siz == 1 { | |
| eprintln('TODO: abs size') | |
| } else { | |
| eprintln('TODO: unsupported rel $rel') | |
| } | |
| } | |
| .rel { | |
| match rel.siz { | |
| 4 { | |
| n := rel.delta - block.resolve(rel.name) | |
| block.code[rel.off] = byt(n, 3) | |
| block.code[rel.off + 1] = byt(n, 2) | |
| block.code[rel.off + 2] = byt(n, 1) | |
| block.code[rel.off + 3] = byt(n, 0) | |
| } | |
| 1 { | |
| n := rel.delta - block.resolve(rel.name) | |
| block.code[rel.off] = byt(n, 0) | |
| } | |
| else { | |
| eprintln('TODO: unsupported rel $rel') | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| fn (block AsmBlock) to_cstring() string { | |
| return '' | |
| } | |
| fn trim_comment(res string, token string) string { | |
| comment := res.index(token) or { -1 } | |
| if comment != -1 { | |
| return res[0..comment].trim_space() | |
| } | |
| return res | |
| } | |
| fn assemble(code string) ?AsmBlock { | |
| mut block := AsmBlock{} | |
| mut lines := code.trim_space().split_into_lines() | |
| for curline in lines { | |
| // remove comments | |
| mut line := curline.trim_space() | |
| line = trim_comment(line, '//') | |
| line = trim_comment(line, ';') | |
| line = trim_comment(line, '#') | |
| line = line.replace_once(' ', ',') | |
| mut words := line.split(',') | |
| for i := 0; i < words.len; i++ { | |
| words[i] = words[i].trim_space() | |
| } | |
| if words.len == 0 || words[0].len == 0 { | |
| continue | |
| } | |
| if words[0].ends_with(':') { | |
| block.labels << AsmLabel{ | |
| name: words[0][0..words[0].len - 1] | |
| off: block.code.len | |
| } | |
| } else { | |
| res := block.assemble_instruction(words) or { return err } | |
| block.code << res | |
| } | |
| } | |
| return block | |
| } | |
| fn main() { | |
| cstr := if os.args.len < 2 { 'nop' } else { os.args[1..].join(' ') } | |
| block := assemble(cstr) or { panic(err) } | |
| // r := assemble('mov rax, 33') or { panic(err) } | |
| println(block.code) | |
| cs := ' | |
| mov rax, 33 | |
| syscall | |
| label: | |
| int 0x80 | |
| nop | |
| jmp label | |
| mov rax, 21 | |
| ' | |
| mut bb := assemble(cs) or { panic(err) } | |
| bb.patch_relocs() | |
| println(hex.encode(bb.code)) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment