Skip to content

Instantly share code, notes, and snippets.

@trufae
Last active August 15, 2021 20:05
Show Gist options
  • Select an option

  • Save trufae/6ddb4a6abff1000fd61d2e91095cbb52 to your computer and use it in GitHub Desktop.

Select an option

Save trufae/6ddb4a6abff1000fd61d2e91095cbb52 to your computer and use it in GitHub Desktop.
Assembler proposal for the V compiler
module main
import os
import encoding.hex
struct AsmLabel {
name string
off int
}
struct AsmBlock {
mut:
addr u64
code []byte
relocs []AsmReloc
labels []AsmLabel
}
enum RelocType {
abs
rel
}
struct AsmReloc {
typ RelocType
off int
siz int
name string
delta int
}
enum AsmArg {
reg
imm
imm8
adr
adr8
}
const cpuregs = [
'rax',
'rbx',
'rcx',
'rdx',
'rsi',
'rdi',
'rsp',
'rbp',
]
fn check_syntax(code []string, nargs int, argtyp []AsmArg) ? {
ins := code[0]
if code.len != nargs + 1 {
return error('expected $nargs for $ins')
}
for i := 0; i < nargs; i++ {
cod := code[i + 1]
match argtyp[i] {
.reg {
if cod in cpuregs {
} else {
return error('invalid register name for $ins')
}
}
.imm {
if cod[0] != `0` && cod.int() == 0 && !cod[0].is_letter() {
return error('invalid immediate $cod for $ins')
}
}
.imm8 {
n := cod.int()
if cod[0] != `0` && n == 0 && !cod[0].is_letter() {
return error('invalid immediate $cod for $ins')
}
if (n >> 8) > 0 {
return error('invalid 8bit immediate $cod for $ins')
}
}
.adr, .adr8 {
// no checks at assembly time, relocation must happen later
}
}
}
}
fn (mut block AsmBlock) assemble_instruction(code []string) ?[]byte {
match code[0] {
'nop' {
check_syntax(code, 0, []) ?
return [byte(0x90)]
}
'syscall' {
check_syntax(code, 0, []) ?
return [byte(0x0f), 0x05]
}
'jmp' { // jmp
check_syntax(code, 1, [.imm8]) or { return err }
// only short jump for now
block.relocs << AsmReloc{
typ: .rel
off: block.code.len + 1
name: code[1]
siz: 1
delta: 2
}
return [byte(0xeb), 0]
}
'int' { // int 0x80
check_syntax(code, 1, [.imm]) or { return err }
imm := byte(code[1].int())
return [byte(0xcd), imm]
}
'int3' { // int3
check_syntax(code, 0, []) or { return err }
return [byte(0xcc)]
}
'push' { // push reg|imm
check_syntax(code, 1, [.reg]) or {
check_syntax(code, 1, [.imm]) or { return err }
// XXX only 0-255 value is supported
imm := byte(code[1].int())
return [byte(0x6a), imm]
}
reg := cpuregs.index(code[1])
return [byte(0x50 + reg)]
}
'lea' { // mov reg, addr
check_syntax(code, 2, [.reg, .adr]) ?
dst := code[2]
// IGNORED reg := cpuregs.index(code[1])
block.relocs << AsmReloc{
typ: .rel
off: block.code.len + 4
name: dst
siz: 4
delta: 7
}
return [byte(0x48), 0x8d, 0x05, 0, 0, 0, 0]
}
'mov' { // mov reg, imm
check_syntax(code, 2, [.reg, .imm]) ?
// reg := code[1]
dst := code[2].int()
i0 := byte(dst & 0xff)
i1 := byte((dst >> 8) & 0xff)
i2 := byte((dst >> 16) & 0xff)
i3 := byte((dst >> 24) & 0xff)
return [byte(0xb8), i3, i2, i1, i0]
}
else {
return error('unknown instruction: ${code[0]}')
}
}
}
[inline]
fn byt(n int, s int) byte {
return byte((n >> (s * 8)) & 0xff)
}
fn (block AsmBlock) resolve(name string) int {
for lab in block.labels {
if lab.name == name {
return lab.off
}
}
return 0
}
fn (mut block AsmBlock) patch_relocs() {
for rel in block.relocs {
match rel.typ {
.abs {
if rel.siz == 1 {
eprintln('TODO: abs size')
} else {
eprintln('TODO: unsupported rel $rel')
}
}
.rel {
match rel.siz {
4 {
n := rel.delta - block.resolve(rel.name)
block.code[rel.off] = byt(n, 3)
block.code[rel.off + 1] = byt(n, 2)
block.code[rel.off + 2] = byt(n, 1)
block.code[rel.off + 3] = byt(n, 0)
}
1 {
n := rel.delta - block.resolve(rel.name)
block.code[rel.off] = byt(n, 0)
}
else {
eprintln('TODO: unsupported rel $rel')
}
}
}
}
}
}
fn (block AsmBlock) to_cstring() string {
return ''
}
fn trim_comment(res string, token string) string {
comment := res.index(token) or { -1 }
if comment != -1 {
return res[0..comment].trim_space()
}
return res
}
fn assemble(code string) ?AsmBlock {
mut block := AsmBlock{}
mut lines := code.trim_space().split_into_lines()
for curline in lines {
// remove comments
mut line := curline.trim_space()
line = trim_comment(line, '//')
line = trim_comment(line, ';')
line = trim_comment(line, '#')
line = line.replace_once(' ', ',')
mut words := line.split(',')
for i := 0; i < words.len; i++ {
words[i] = words[i].trim_space()
}
if words.len == 0 || words[0].len == 0 {
continue
}
if words[0].ends_with(':') {
block.labels << AsmLabel{
name: words[0][0..words[0].len - 1]
off: block.code.len
}
} else {
res := block.assemble_instruction(words) or { return err }
block.code << res
}
}
return block
}
fn main() {
cstr := if os.args.len < 2 { 'nop' } else { os.args[1..].join(' ') }
block := assemble(cstr) or { panic(err) }
// r := assemble('mov rax, 33') or { panic(err) }
println(block.code)
cs := '
mov rax, 33
syscall
label:
int 0x80
nop
jmp label
mov rax, 21
'
mut bb := assemble(cs) or { panic(err) }
bb.patch_relocs()
println(hex.encode(bb.code))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment