An arm64 assembler for linux that can assemble itself. Written by Claude Opus 4.6
¯\_(ツ)_/¯
| // asm.s — self-hosting aarch64 assembler | |
| // | |
| // reads an aarch64 assembly source file (GAS-compatible subset), | |
| // emits a static PIE ELF binary directly. no linker required. | |
| // | |
| // usage: asm <input.s> -o <output> | |
| // ── syscall numbers ───────────────────────────────────────────────── | |
| .equ SYS_exit, 93 | |
| .equ SYS_read, 63 | |
| .equ SYS_write, 64 | |
| .equ SYS_openat, 56 | |
| .equ SYS_close, 57 | |
| .equ SYS_fchmod, 52 | |
| // ── file constants ────────────────────────────────────────────────── | |
| .equ AT_FDCWD, -100 | |
| .equ O_RDONLY, 0 | |
| .equ O_WRONLY_CREAT_TRUNC, 577 // O_WRONLY|O_CREAT|O_TRUNC = 1|64|512 | |
| .equ STDERR, 2 | |
| // ── ELF constants ─────────────────────────────────────────────────── | |
| .equ ELF_HEADER_SIZE, 64 | |
| .equ PHDR_SIZE, 56 | |
| .equ CODE_START, 120 // ELF_HEADER_SIZE + PHDR_SIZE | |
| // ── section IDs ───────────────────────────────────────────────────── | |
| .equ SEC_TEXT, 0 | |
| .equ SEC_RODATA, 1 | |
| .equ SEC_BSS, 2 | |
| // ── state block offsets (all u64) ─────────────────────────────────── | |
| .equ ST_CUR_SEC, 0 // current section (SEC_TEXT/RODATA/BSS) | |
| .equ ST_TEXT_POS, 8 // current offset within .text | |
| .equ ST_RODATA_POS, 16 // current offset within .rodata | |
| .equ ST_BSS_POS, 24 // current offset within .bss | |
| .equ ST_TEXT_BASE, 32 // virtual address of .text start | |
| .equ ST_RODATA_BASE, 40 // virtual address of .rodata start | |
| .equ ST_BSS_BASE, 48 // virtual address of .bss start | |
| .equ ST_PASS, 56 // current pass (1 or 2) | |
| .equ ST_LINE_NUM, 64 // current source line number | |
| .equ ST_INPUT_LEN, 72 // input file length in bytes | |
| .equ ST_FILE_SIZE, 80 // total output file size | |
| .equ ST_MEM_SIZE, 88 // total memory size (file + bss) | |
| .equ ST_ENTRY, 96 // entry point address (_start offset) | |
| .equ ST_INPUT_NAME, 104 // pointer to input filename string | |
| .equ ST_OUTPUT_NAME, 112 // pointer to output filename string | |
| .equ ST_SYM_NPOS, 120 // next free offset in sym_names pool | |
| .equ ST_SIZE, 128 | |
| // ── symbol table entry layout (24 bytes) ──────────────────────────── | |
| // name_off u32 @ 0 offset into sym_names (0 = empty slot) | |
| // name_len u32 @ 4 length of name | |
| // flags u64 @ 8 SYMF_* bits | |
| // value u64 @ 16 address or .equ value | |
| .equ SYM_ENT_SIZE, 24 | |
| .equ SYM_NAME_OFF, 0 | |
| .equ SYM_NAME_LEN, 4 | |
| .equ SYM_FLAGS, 8 | |
| .equ SYM_VALUE, 16 | |
| .equ SYM_TBL_SLOTS, 1024 // must be power of 2 | |
| // ── symbol flags ──────────────────────────────────────────────────── | |
| .equ SYMF_DEFINED, 1 | |
| .equ SYMF_GLOBAL, 2 | |
| .equ SYMF_EQU, 4 | |
| .equ SYMF_SEC_SHIFT, 4 // section stored in bits 5:4 of flags | |
| // ── buffer sizes ──────────────────────────────────────────────────── | |
| .equ INPUT_BUF_SIZE, 262144 // 256 KB | |
| .equ TEXT_BUF_SIZE, 131072 // 128 KB | |
| .equ RODATA_BUF_SIZE, 65536 // 64 KB | |
| .equ SYM_NAMES_SIZE, 32768 // 32 KB | |
| .equ SYM_TBL_BYTES, 24576 // SYM_TBL_SLOTS * SYM_ENT_SIZE | |
| // ── numeric labels ────────────────────────────────────────────────── | |
| .equ NUMLAB_MAX_DEFS, 128 // max definitions per digit | |
| .equ NUMLAB_DIGITS, 10 // digits 0-9 | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // BSS | |
| // ═════════════════════════════════════════════════════════════════════ | |
| .bss | |
| .align 4 | |
| state: .skip ST_SIZE | |
| input_buf: .skip INPUT_BUF_SIZE | |
| text_buf: .skip TEXT_BUF_SIZE | |
| rodata_buf: .skip RODATA_BUF_SIZE | |
| sym_table: .skip SYM_TBL_BYTES | |
| sym_names: .skip SYM_NAMES_SIZE | |
| // numeric label storage: 10 digits × 128 defs × 8 bytes | |
| numlab_defs: .skip NUMLAB_DIGITS * NUMLAB_MAX_DEFS * 8 | |
| numlab_cnts: .skip NUMLAB_DIGITS * 8 | |
| numlab_curs: .skip NUMLAB_DIGITS * 8 | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // Read-only data | |
| // ═════════════════════════════════════════════════════════════════════ | |
| .section .rodata | |
| msg_usage: .ascii "usage: asm <input.s> -o <output>\n" | |
| .equ msg_usage_len, . - msg_usage | |
| msg_open: .ascii "asm: cannot open input file\n" | |
| .equ msg_open_len, . - msg_open | |
| msg_create: .ascii "asm: cannot create output file\n" | |
| .equ msg_create_len, . - msg_create | |
| msg_syntax: .ascii "syntax error" | |
| .equ msg_syntax_len, . - msg_syntax | |
| msg_undef: .ascii "undefined symbol" | |
| .equ msg_undef_len, . - msg_undef | |
| msg_colon: .ascii ":" | |
| msg_space: .ascii " " | |
| msg_newline: .ascii "\n" | |
| // directive name strings | |
| str_text: .asciz "text" | |
| str_bss: .asciz "bss" | |
| str_section: .asciz "section" | |
| str_rodata: .asciz ".rodata" | |
| str_align: .asciz "align" | |
| str_skip: .asciz "skip" | |
| str_ascii: .asciz "ascii" | |
| str_asciz: .asciz "asciz" | |
| str_equ: .asciz "equ" | |
| str_global: .asciz "global" | |
| // entry point symbol name | |
| str__start: .asciz "_start" | |
| msg_badins: .ascii "unknown instruction" | |
| .equ msg_badins_len, . - msg_badins | |
| msg_badimm: .ascii "invalid immediate" | |
| .equ msg_badimm_len, . - msg_badimm | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // Code | |
| // ═════════════════════════════════════════════════════════════════════ | |
| .text | |
| .global _start | |
| // ───────────────────────────────────────────────────────────────────── | |
| // _start — entry point | |
| // ───────────────────────────────────────────────────────────────────── | |
| _start: | |
| // grab argc / argv from the stack | |
| ldr x19, [sp] // x19 = argc | |
| add x20, sp, #8 // x20 = &argv[0] | |
| // we need: asm <input> -o <output> → argc >= 4 | |
| cmp x19, #4 | |
| b.lt err_usage | |
| // set up state block pointer (x28 is callee-saved, lives forever) | |
| adrp x28, state | |
| add x28, x28, :lo12:state | |
| // store input/output filenames | |
| ldr x0, [x20, #8] // argv[1] = input filename | |
| str x0, [x28, #ST_INPUT_NAME] | |
| ldr x0, [x20, #24] // argv[3] = output filename | |
| str x0, [x28, #ST_OUTPUT_NAME] | |
| // sym_names pool starts at offset 1 (0 = empty sentinel) | |
| mov x0, #1 | |
| str x0, [x28, #ST_SYM_NPOS] | |
| // ── open and read the input file ──────────────────────────────── | |
| mov x0, #AT_FDCWD | |
| ldr x1, [x28, #ST_INPUT_NAME] | |
| mov x2, #O_RDONLY | |
| mov x3, #0 | |
| mov x8, #SYS_openat | |
| svc #0 | |
| tbnz x0, #63, err_open | |
| mov x19, x0 // x19 = fd | |
| adrp x1, input_buf | |
| add x1, x1, :lo12:input_buf | |
| mov x2, #INPUT_BUF_SIZE | |
| mov x0, x19 | |
| mov x8, #SYS_read | |
| svc #0 | |
| tbnz x0, #63, err_open | |
| str x0, [x28, #ST_INPUT_LEN] | |
| mov x0, x19 | |
| mov x8, #SYS_close | |
| svc #0 | |
| // ── pass 1: collect symbols and measure sections ─────────────── | |
| mov x0, #1 | |
| bl run_pass | |
| // ── compute section base addresses ────────────────────────────── | |
| mov x0, #CODE_START | |
| str x0, [x28, #ST_TEXT_BASE] | |
| ldr x1, [x28, #ST_TEXT_POS] | |
| add x1, x0, x1 // rodata_base = text_base + text_size | |
| str x1, [x28, #ST_RODATA_BASE] | |
| ldr x2, [x28, #ST_RODATA_POS] | |
| add x2, x1, x2 // bss_base = rodata_base + rodata_size | |
| str x2, [x28, #ST_BSS_BASE] | |
| str x2, [x28, #ST_FILE_SIZE] // file_size = bss_base | |
| ldr x3, [x28, #ST_BSS_POS] | |
| add x3, x2, x3 // mem_size = bss_base + bss_size | |
| str x3, [x28, #ST_MEM_SIZE] | |
| // ── rebase symbols: add section bases to label addresses ──────── | |
| bl rebase_symbols | |
| // ── look up _start for the entry point ────────────────────────── | |
| adrp x0, str__start | |
| add x0, x0, :lo12:str__start | |
| mov x1, #6 // strlen("_start") | |
| bl sym_lookup | |
| cbz x1, 1f // not found: entry = text_base | |
| ldr x0, [x0, #SYM_VALUE] | |
| str x0, [x28, #ST_ENTRY] | |
| b 2f | |
| 1: ldr x0, [x28, #ST_TEXT_BASE] | |
| str x0, [x28, #ST_ENTRY] | |
| 2: | |
| // ── pass 2: encode instructions and emit data ─────────────── | |
| mov x0, #2 | |
| bl run_pass | |
| // ── construct ELF header + program header on stack ──────── | |
| sub sp, sp, #128 | |
| // zero-fill | |
| mov x0, sp | |
| mov x1, #0 | |
| mov x2, #128 | |
| bl memset | |
| // ELF magic: 7f 45 4c 46 | |
| movz w9, #0x457f | |
| movk w9, #0x464c, lsl #16 | |
| str w9, [sp] | |
| // e_ident[4..7]: class=2(64-bit), data=1(LE), version=1, osabi=0 | |
| movz w9, #0x0102 | |
| movk w9, #0x0001, lsl #16 | |
| str w9, [sp, #4] | |
| // e_type=3(ET_DYN), e_machine=0xB7(EM_AARCH64) | |
| movz w9, #3 | |
| movk w9, #0x00B7, lsl #16 | |
| str w9, [sp, #0x10] | |
| // e_version = 1 | |
| mov w9, #1 | |
| str w9, [sp, #0x14] | |
| // e_entry | |
| ldr x9, [x28, #ST_ENTRY] | |
| str x9, [sp, #0x18] | |
| // e_phoff = 64 | |
| mov x9, #64 | |
| str x9, [sp, #0x20] | |
| // e_ehsize=64, e_phentsize=56 | |
| movz w9, #64 | |
| movk w9, #56, lsl #16 | |
| str w9, [sp, #0x34] | |
| // e_phnum=1 (rest are zero, already set) | |
| mov w9, #1 | |
| str w9, [sp, #0x38] | |
| // ── program header at sp+64 ────────────────────────────── | |
| // p_type = 1 (PT_LOAD) | |
| mov w9, #1 | |
| str w9, [sp, #64] | |
| // p_flags = 7 (PF_R|PF_W|PF_X) | |
| mov w9, #7 | |
| str w9, [sp, #68] | |
| // p_offset = 0, p_vaddr = 0, p_paddr = 0 (already zero) | |
| // p_filesz | |
| ldr x9, [x28, #ST_FILE_SIZE] | |
| str x9, [sp, #96] | |
| // p_memsz | |
| ldr x9, [x28, #ST_MEM_SIZE] | |
| str x9, [sp, #104] | |
| // p_align = 0x10000 | |
| mov x9, #0x10000 | |
| str x9, [sp, #112] | |
| // ── open output file ───────────────────────────────────── | |
| mov x0, #AT_FDCWD | |
| ldr x1, [x28, #ST_OUTPUT_NAME] | |
| mov x2, #O_WRONLY_CREAT_TRUNC | |
| mov w3, #493 // 0755 octal | |
| mov x8, #SYS_openat | |
| svc #0 | |
| tbnz x0, #63, err_create | |
| mov x19, x0 // fd | |
| // write ELF header + program header (120 bytes) | |
| mov x0, x19 | |
| mov x1, sp | |
| mov x2, #CODE_START | |
| mov x8, #SYS_write | |
| svc #0 | |
| // write .text section | |
| mov x0, x19 | |
| adrp x1, text_buf | |
| add x1, x1, :lo12:text_buf | |
| ldr x2, [x28, #ST_TEXT_POS] | |
| mov x8, #SYS_write | |
| svc #0 | |
| // write .rodata section | |
| mov x0, x19 | |
| adrp x1, rodata_buf | |
| add x1, x1, :lo12:rodata_buf | |
| ldr x2, [x28, #ST_RODATA_POS] | |
| mov x8, #SYS_write | |
| svc #0 | |
| // fchmod to make executable | |
| mov x0, x19 | |
| mov x1, #493 | |
| mov x8, #SYS_fchmod | |
| svc #0 | |
| // close | |
| mov x0, x19 | |
| mov x8, #SYS_close | |
| svc #0 | |
| add sp, sp, #128 | |
| mov x0, #0 | |
| mov x8, #SYS_exit | |
| svc #0 | |
| // ───────────────────────────────────────────────────────────────────── | |
| // Error exits | |
| // ───────────────────────────────────────────────────────────────────── | |
| err_usage: | |
| mov x0, #STDERR | |
| adrp x1, msg_usage | |
| add x1, x1, :lo12:msg_usage | |
| mov x2, #msg_usage_len | |
| mov x8, #SYS_write | |
| svc #0 | |
| mov x0, #1 | |
| mov x8, #SYS_exit | |
| svc #0 | |
| err_open: | |
| mov x0, #STDERR | |
| adrp x1, msg_open | |
| add x1, x1, :lo12:msg_open | |
| mov x2, #msg_open_len | |
| mov x8, #SYS_write | |
| svc #0 | |
| mov x0, #1 | |
| mov x8, #SYS_exit | |
| svc #0 | |
| err_create: | |
| mov x0, #STDERR | |
| adrp x1, msg_create | |
| add x1, x1, :lo12:msg_create | |
| mov x2, #msg_create_len | |
| mov x8, #SYS_write | |
| svc #0 | |
| mov x0, #1 | |
| mov x8, #SYS_exit | |
| svc #0 | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // Utility functions (spec §8.5) | |
| // | |
| // Calling convention: args in x0-x7, return in x0 (x1 for pairs). | |
| // Leaf functions — no stack frame needed. | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // ───────────────────────────────────────────────────────────────────── | |
| // strlen — return length of null-terminated string | |
| // x0 = string pointer | |
| // returns x0 = length | |
| // ───────────────────────────────────────────────────────────────────── | |
| strlen: | |
| mov x9, x0 | |
| 1: ldrb w10, [x0], #1 | |
| cbnz w10, 1b | |
| sub x0, x0, x9 | |
| sub x0, x0, #1 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // str_eq — compare two null-terminated strings | |
| // x0 = string a, x1 = string b | |
| // returns x0 = 1 if equal, 0 if not | |
| // ───────────────────────────────────────────────────────────────────── | |
| str_eq: | |
| 1: ldrb w9, [x0], #1 | |
| ldrb w10, [x1], #1 | |
| cmp w9, w10 | |
| b.ne 2f | |
| cbz w9, 3f // both null → equal | |
| b 1b | |
| 2: mov x0, #0 | |
| ret | |
| 3: mov x0, #1 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // str_eq_n — compare first n bytes of two buffers | |
| // x0 = buf a, x1 = buf b, x2 = n | |
| // returns x0 = 1 if equal, 0 if not | |
| // ───────────────────────────────────────────────────────────────────── | |
| str_eq_n: | |
| cbz x2, 2f // zero length → equal | |
| mov x9, #0 | |
| 1: ldrb w10, [x0, x9] | |
| ldrb w11, [x1, x9] | |
| cmp w10, w11 | |
| b.ne 3f | |
| add x9, x9, #1 | |
| cmp x9, x2 | |
| b.lt 1b | |
| 2: mov x0, #1 | |
| ret | |
| 3: mov x0, #0 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // memcpy — copy n bytes from src to dst | |
| // x0 = dst, x1 = src, x2 = n | |
| // returns x0 = dst (original) | |
| // ───────────────────────────────────────────────────────────────────── | |
| memcpy: | |
| cbz x2, 2f | |
| mov x9, #0 | |
| 1: ldrb w10, [x1, x9] | |
| strb w10, [x0, x9] | |
| add x9, x9, #1 | |
| cmp x9, x2 | |
| b.lt 1b | |
| 2: ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // memset — fill n bytes with value | |
| // x0 = dst, x1 = byte value, x2 = n | |
| // returns x0 = dst (original) | |
| // ───────────────────────────────────────────────────────────────────── | |
| memset: | |
| cbz x2, 2f | |
| mov x9, #0 | |
| 1: strb w1, [x0, x9] | |
| add x9, x9, #1 | |
| cmp x9, x2 | |
| b.lt 1b | |
| 2: ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // write_u8 — write 1 byte to buffer | |
| // x0 = buf pointer, x1 = value | |
| // ───────────────────────────────────────────────────────────────────── | |
| write_u8: | |
| strb w1, [x0] | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // write_u16 — write 16-bit little-endian value to buffer | |
| // x0 = buf pointer, x1 = value | |
| // ───────────────────────────────────────────────────────────────────── | |
| write_u16: | |
| strb w1, [x0] | |
| lsr w9, w1, #8 | |
| strb w9, [x0, #1] | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // write_u32 — write 32-bit little-endian value to buffer | |
| // x0 = buf pointer, x1 = value | |
| // ───────────────────────────────────────────────────────────────────── | |
| write_u32: | |
| strb w1, [x0] | |
| lsr w9, w1, #8 | |
| strb w9, [x0, #1] | |
| lsr w9, w9, #8 | |
| strb w9, [x0, #2] | |
| lsr w9, w9, #8 | |
| strb w9, [x0, #3] | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // write_u64 — write 64-bit little-endian value to buffer | |
| // x0 = buf pointer, x1 = value | |
| // ───────────────────────────────────────────────────────────────────── | |
| write_u64: | |
| strb w1, [x0] | |
| lsr x9, x1, #8 | |
| strb w9, [x0, #1] | |
| lsr x9, x9, #8 | |
| strb w9, [x0, #2] | |
| lsr x9, x9, #8 | |
| strb w9, [x0, #3] | |
| lsr x9, x9, #8 | |
| strb w9, [x0, #4] | |
| lsr x9, x9, #8 | |
| strb w9, [x0, #5] | |
| lsr x9, x9, #8 | |
| strb w9, [x0, #6] | |
| lsr x9, x9, #8 | |
| strb w9, [x0, #7] | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // skip_ws — advance pointer past spaces and tabs | |
| // x0 = pointer | |
| // returns x0 = first non-whitespace position | |
| // ───────────────────────────────────────────────────────────────────── | |
| skip_ws: | |
| 1: ldrb w9, [x0] | |
| cmp w9, #' ' | |
| b.eq 2f | |
| cmp w9, #'\t' | |
| b.ne 3f | |
| 2: add x0, x0, #1 | |
| b 1b | |
| 3: ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_int — parse decimal, hex, or character literal | |
| // x0 = pointer (at first character of the number) | |
| // returns x0 = value, x1 = pointer past the parsed number | |
| // | |
| // formats: 123 -42 0x1F 0xFF 'A' '\n' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_int: | |
| ldrb w9, [x0] | |
| // character literal? | |
| cmp w9, #'\'' | |
| b.eq parse_int_char | |
| // negative? | |
| mov x11, #0 // sign flag | |
| cmp w9, #'-' | |
| b.ne 1f | |
| mov x11, #1 | |
| add x0, x0, #1 | |
| ldrb w9, [x0] | |
| 1: // hex prefix? | |
| cmp w9, #'0' | |
| b.ne parse_int_dec | |
| ldrb w10, [x0, #1] | |
| cmp w10, #'x' | |
| b.eq parse_int_hex | |
| cmp w10, #'X' | |
| b.eq parse_int_hex | |
| parse_int_dec: | |
| mov x12, #0 // accumulator | |
| 2: ldrb w9, [x0] | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.hi parse_int_done | |
| mov x13, #10 | |
| mul x12, x12, x13 | |
| add x12, x12, x10 | |
| add x0, x0, #1 | |
| b 2b | |
| parse_int_hex: | |
| add x0, x0, #2 // skip "0x" | |
| mov x12, #0 | |
| 3: ldrb w9, [x0] | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.ls 4f | |
| sub w10, w9, #'a' | |
| cmp w10, #5 | |
| b.ls 5f | |
| sub w10, w9, #'A' | |
| cmp w10, #5 | |
| b.hi parse_int_done | |
| 5: add w10, w10, #10 | |
| 4: lsl x12, x12, #4 | |
| add x12, x12, x10 | |
| add x0, x0, #1 | |
| b 3b | |
| parse_int_done: | |
| cbz x11, 6f | |
| neg x12, x12 | |
| 6: mov x1, x0 | |
| mov x0, x12 | |
| ret | |
| parse_int_char: | |
| add x0, x0, #1 // skip opening quote | |
| ldrb w9, [x0] | |
| cmp w9, #'\\' | |
| b.eq parse_int_esc | |
| // plain character | |
| mov x12, x9 | |
| add x0, x0, #1 // skip char | |
| b parse_int_char_end | |
| parse_int_esc: | |
| add x0, x0, #1 // skip backslash | |
| ldrb w9, [x0] | |
| add x0, x0, #1 // skip escape char | |
| cmp w9, #'n' | |
| b.eq 7f | |
| cmp w9, #'t' | |
| b.eq 8f | |
| cmp w9, #'0' | |
| b.eq 9f | |
| // unknown escape (including \\ and \'): use the character as-is | |
| mov x12, x9 | |
| b parse_int_char_end | |
| 7: mov x12, #10 // '\n' | |
| b parse_int_char_end | |
| 8: mov x12, #9 // '\t' | |
| b parse_int_char_end | |
| 9: mov x12, #0 // '\0' | |
| parse_int_char_end: | |
| ldrb w9, [x0] | |
| cmp w9, #'\'' | |
| b.ne 1f | |
| add x0, x0, #1 // skip closing quote | |
| 1: mov x1, x0 | |
| mov x0, x12 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_ident — parse an identifier [a-zA-Z_][a-zA-Z0-9_]* | |
| // x0 = pointer | |
| // returns x0 = start of ident, x1 = length, x2 = pointer past ident | |
| // if no valid identifier, x1 = 0 | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_ident: | |
| mov x9, x0 // start | |
| ldrb w10, [x0] | |
| // first char must be [a-zA-Z_] | |
| cmp w10, #'_' | |
| b.eq 1f | |
| // a-z | |
| sub w11, w10, #'a' | |
| cmp w11, #25 | |
| b.ls 1f | |
| // A-Z | |
| sub w11, w10, #'A' | |
| cmp w11, #25 | |
| b.ls 1f | |
| // not an identifier | |
| mov x1, #0 | |
| mov x2, x0 | |
| ret | |
| 1: add x0, x0, #1 | |
| 2: ldrb w10, [x0] | |
| cmp w10, #'_' | |
| b.eq 3f | |
| sub w11, w10, #'a' | |
| cmp w11, #25 | |
| b.ls 3f | |
| sub w11, w10, #'A' | |
| cmp w11, #25 | |
| b.ls 3f | |
| sub w11, w10, #'0' | |
| cmp w11, #9 | |
| b.ls 3f | |
| // end of identifier | |
| sub x1, x0, x9 // length | |
| mov x2, x0 // end pointer | |
| mov x0, x9 // start | |
| ret | |
| 3: add x0, x0, #1 | |
| b 2b | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_register — parse register name | |
| // x0 = pointer | |
| // returns x0 = reg number (0-31), x1 = is_64bit, x2 = pointer past | |
| // on error: x0 = -1 | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_register: | |
| ldrb w9, [x0] | |
| // sp? | |
| cmp w9, #'s' | |
| b.ne 1f | |
| ldrb w10, [x0, #1] | |
| cmp w10, #'p' | |
| b.ne 1f | |
| // make sure it's not a longer ident (e.g. "spaghetti") | |
| ldrb w10, [x0, #2] | |
| sub w11, w10, #'a' | |
| cmp w11, #25 | |
| b.ls 1f | |
| sub w11, w10, #'A' | |
| cmp w11, #25 | |
| b.ls 1f | |
| sub w11, w10, #'0' | |
| cmp w11, #9 | |
| b.ls 1f | |
| cmp w10, #'_' | |
| b.eq 1f | |
| add x2, x0, #2 // end pointer (before clobbering x0) | |
| mov x0, #31 | |
| mov x1, #1 | |
| ret | |
| 1: // xzr / x0-x30? | |
| cmp w9, #'x' | |
| b.eq parse_reg_x | |
| // wzr / w0-w30? | |
| cmp w9, #'w' | |
| b.eq parse_reg_w | |
| // not a register | |
| mov x0, #-1 | |
| ret | |
| parse_reg_x: | |
| ldrb w10, [x0, #1] | |
| cmp w10, #'z' | |
| b.ne parse_reg_x_num | |
| ldrb w10, [x0, #2] | |
| cmp w10, #'r' | |
| b.ne parse_reg_x_num | |
| add x2, x0, #3 | |
| mov x0, #31 | |
| mov x1, #1 | |
| ret | |
| parse_reg_x_num: | |
| add x9, x0, #1 // past 'x' | |
| mov x12, #0 // accumulator | |
| mov x13, #0 // digit count | |
| 4: ldrb w10, [x9] | |
| sub w11, w10, #'0' | |
| cmp w11, #9 | |
| b.hi 5f | |
| mov x14, #10 | |
| mul x12, x12, x14 | |
| add x12, x12, x11 | |
| add x9, x9, #1 | |
| add x13, x13, #1 | |
| b 4b | |
| 5: cbz x13, parse_reg_fail // no digits after 'x' | |
| cmp x12, #30 | |
| b.hi parse_reg_fail // x31+ invalid | |
| mov x2, x9 | |
| mov x0, x12 | |
| mov x1, #1 // 64-bit | |
| ret | |
| parse_reg_w: | |
| ldrb w10, [x0, #1] | |
| cmp w10, #'z' | |
| b.ne parse_reg_w_num | |
| ldrb w10, [x0, #2] | |
| cmp w10, #'r' | |
| b.ne parse_reg_w_num | |
| add x2, x0, #3 | |
| mov x0, #31 | |
| mov x1, #0 | |
| ret | |
| parse_reg_w_num: | |
| add x9, x0, #1 // past 'w' | |
| mov x12, #0 | |
| mov x13, #0 | |
| 6: ldrb w10, [x9] | |
| sub w11, w10, #'0' | |
| cmp w11, #9 | |
| b.hi 7f | |
| mov x14, #10 | |
| mul x12, x12, x14 | |
| add x12, x12, x11 | |
| add x9, x9, #1 | |
| add x13, x13, #1 | |
| b 6b | |
| 7: cbz x13, parse_reg_fail | |
| cmp x12, #30 | |
| b.hi parse_reg_fail | |
| mov x2, x9 | |
| mov x0, x12 | |
| mov x1, #0 // 32-bit | |
| ret | |
| parse_reg_fail: | |
| mov x0, #-1 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // is_ident_char — check if w0 is [a-zA-Z0-9_] | |
| // returns x0 = 1 if ident char, 0 if not | |
| // ───────────────────────────────────────────────────────────────────── | |
| is_ident_char: | |
| cmp w0, #'_' | |
| b.eq 1f | |
| sub w9, w0, #'a' | |
| cmp w9, #25 | |
| b.ls 1f | |
| sub w9, w0, #'A' | |
| cmp w9, #25 | |
| b.ls 1f | |
| sub w9, w0, #'0' | |
| cmp w9, #9 | |
| b.ls 1f | |
| mov x0, #0 | |
| ret | |
| 1: mov x0, #1 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // sym_hash — FNV-1a hash of a name | |
| // x0 = name pointer, x1 = length | |
| // returns x0 = hash value | |
| // ───────────────────────────────────────────────────────────────────── | |
| sym_hash: | |
| // FNV offset basis | |
| movz x9, #0x2325 | |
| movk x9, #0x8422, lsl #16 | |
| movk x9, #0xe484, lsl #32 | |
| movk x9, #0xcbf2, lsl #48 | |
| // FNV prime | |
| movz x10, #0x01b3 | |
| movk x10, #0x0100, lsl #16 | |
| mov x11, #0 // index | |
| 1: cmp x11, x1 | |
| b.ge 2f | |
| ldrb w12, [x0, x11] | |
| eor x9, x9, x12 | |
| mul x9, x9, x10 | |
| add x11, x11, #1 | |
| b 1b | |
| 2: mov x0, x9 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // sym_lookup — find a symbol in the hash table | |
| // x0 = name pointer, x1 = name length | |
| // returns x0 = pointer to entry, x1 = 1 if found (0 if empty slot) | |
| // | |
| // uses x28 (state block) to reach sym_table / sym_names | |
| // ───────────────────────────────────────────────────────────────────── | |
| sym_lookup: | |
| // save args | |
| sub sp, sp, #48 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| str x21, [sp, #24] | |
| str x22, [sp, #32] | |
| mov x19, x0 // name ptr | |
| mov x20, x1 // name len | |
| // hash the name | |
| bl sym_hash | |
| mov x21, x0 // hash | |
| // slot = hash & (SYM_TBL_SLOTS - 1) | |
| and x21, x21, #(SYM_TBL_SLOTS - 1) | |
| adrp x22, sym_table | |
| add x22, x22, :lo12:sym_table | |
| sym_lookup_probe: | |
| // entry = &sym_table[slot * SYM_ENT_SIZE] | |
| mov x9, #SYM_ENT_SIZE | |
| mul x9, x21, x9 | |
| add x9, x22, x9 // x9 = entry pointer | |
| // check if slot is empty (name_off == 0) | |
| ldr w10, [x9, #SYM_NAME_OFF] | |
| cbz w10, sym_lookup_empty | |
| // compare name_len | |
| ldr w11, [x9, #SYM_NAME_LEN] | |
| cmp w11, w20 | |
| b.ne sym_lookup_next | |
| // compare name bytes | |
| adrp x12, sym_names | |
| add x12, x12, :lo12:sym_names | |
| add x0, x12, x10 // sym_names + name_off | |
| mov x1, x19 // candidate name | |
| mov x2, x20 // length | |
| bl str_eq_n | |
| cbz x0, sym_lookup_next | |
| // found! | |
| mov x9, #SYM_ENT_SIZE | |
| mul x9, x21, x9 | |
| add x0, x22, x9 | |
| mov x1, #1 | |
| b sym_lookup_ret | |
| sym_lookup_next: | |
| add x21, x21, #1 | |
| and x21, x21, #(SYM_TBL_SLOTS - 1) | |
| b sym_lookup_probe | |
| sym_lookup_empty: | |
| // return pointer to empty slot, found=0 | |
| mov x0, x9 | |
| mov x1, #0 | |
| sym_lookup_ret: | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| ldr x21, [sp, #24] | |
| ldr x22, [sp, #32] | |
| add sp, sp, #48 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // sym_define — insert or update a symbol | |
| // x0 = name pointer, x1 = name length, x2 = value, x3 = flags | |
| // | |
| // if the symbol already exists, updates value and flags (OR'd). | |
| // if new, copies name into sym_names pool and creates entry. | |
| // ───────────────────────────────────────────────────────────────────── | |
| sym_define: | |
| sub sp, sp, #64 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| str x21, [sp, #24] | |
| str x22, [sp, #32] | |
| str x23, [sp, #40] | |
| mov x19, x0 // name ptr | |
| mov x20, x1 // name len | |
| mov x21, x2 // value | |
| mov x22, x3 // flags | |
| // look up the symbol | |
| bl sym_lookup | |
| mov x23, x0 // entry pointer | |
| cbnz x1, sym_define_update | |
| // ── new entry: copy name into pool ────────────────────────────── | |
| ldr x9, [x28, #ST_SYM_NPOS] | |
| // copy name bytes | |
| adrp x0, sym_names | |
| add x0, x0, :lo12:sym_names | |
| add x0, x0, x9 // dst = sym_names + npos | |
| mov x1, x19 // src = name ptr | |
| mov x2, x20 // len | |
| bl memcpy | |
| // store name_off and name_len in the entry | |
| ldr x9, [x28, #ST_SYM_NPOS] | |
| str w9, [x23, #SYM_NAME_OFF] | |
| str w20, [x23, #SYM_NAME_LEN] | |
| // advance pool pointer | |
| add x9, x9, x20 | |
| str x9, [x28, #ST_SYM_NPOS] | |
| sym_define_update: | |
| str x21, [x23, #SYM_VALUE] | |
| // OR in flags (don't clobber existing bits) | |
| ldr x9, [x23, #SYM_FLAGS] | |
| orr x9, x9, x22 | |
| str x9, [x23, #SYM_FLAGS] | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| ldr x21, [sp, #24] | |
| ldr x22, [sp, #32] | |
| ldr x23, [sp, #40] | |
| add sp, sp, #64 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // error_at — print "filename:line: msg\n" to stderr and exit(1) | |
| // x0 = message pointer, x1 = message length | |
| // | |
| // uses state block for filename and line number | |
| // ───────────────────────────────────────────────────────────────────── | |
| error_at: | |
| sub sp, sp, #48 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| mov x19, x0 // msg ptr | |
| mov x20, x1 // msg len | |
| // write filename | |
| ldr x1, [x28, #ST_INPUT_NAME] | |
| mov x0, x1 | |
| bl strlen | |
| mov x2, x0 | |
| ldr x1, [x28, #ST_INPUT_NAME] | |
| mov x0, #STDERR | |
| mov x8, #SYS_write | |
| svc #0 | |
| // write ":" | |
| mov x0, #STDERR | |
| adrp x1, msg_colon | |
| add x1, x1, :lo12:msg_colon | |
| mov x2, #1 | |
| mov x8, #SYS_write | |
| svc #0 | |
| // write line number (convert to decimal on stack) | |
| ldr x9, [x28, #ST_LINE_NUM] | |
| sub sp, sp, #32 | |
| add x10, sp, #31 // write from end | |
| mov x11, #0 // digit count | |
| mov x12, #10 | |
| 3: udiv x13, x9, x12 | |
| mul x14, x13, x12 | |
| sub x14, x9, x14 // remainder | |
| add w14, w14, #'0' | |
| sub x10, x10, #1 | |
| strb w14, [x10] | |
| add x11, x11, #1 | |
| mov x9, x13 | |
| cbnz x9, 3b | |
| // if no digits were written (line 0), write "0" | |
| cbz x11, 4f | |
| b 5f | |
| 4: mov w14, #'0' | |
| sub x10, x10, #1 | |
| strb w14, [x10] | |
| mov x11, #1 | |
| 5: | |
| mov x0, #STDERR | |
| mov x1, x10 | |
| mov x2, x11 | |
| mov x8, #SYS_write | |
| svc #0 | |
| add sp, sp, #32 | |
| // write ": " | |
| mov x0, #STDERR | |
| adrp x1, msg_colon | |
| add x1, x1, :lo12:msg_colon | |
| mov x2, #1 | |
| mov x8, #SYS_write | |
| svc #0 | |
| mov x0, #STDERR | |
| adrp x1, msg_space | |
| add x1, x1, :lo12:msg_space | |
| mov x2, #1 | |
| mov x8, #SYS_write | |
| svc #0 | |
| // write message | |
| mov x0, #STDERR | |
| mov x1, x19 | |
| mov x2, x20 | |
| mov x8, #SYS_write | |
| svc #0 | |
| // write newline | |
| mov x0, #STDERR | |
| adrp x1, msg_newline | |
| add x1, x1, :lo12:msg_newline | |
| mov x2, #1 | |
| mov x8, #SYS_write | |
| svc #0 | |
| // exit(1) | |
| mov x0, #1 | |
| mov x8, #SYS_exit | |
| svc #0 | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // Pass driver and line processing | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // ───────────────────────────────────────────────────────────────────── | |
| // get_sec_pos — return state-block offset for current section position | |
| // returns x0 = ST_TEXT_POS, ST_RODATA_POS, or ST_BSS_POS | |
| // ───────────────────────────────────────────────────────────────────── | |
| get_sec_pos: | |
| ldr x9, [x28, #ST_CUR_SEC] | |
| cmp x9, #SEC_RODATA | |
| b.eq 1f | |
| cmp x9, #SEC_BSS | |
| b.eq 2f | |
| mov x0, #ST_TEXT_POS | |
| ret | |
| 1: mov x0, #ST_RODATA_POS | |
| ret | |
| 2: mov x0, #ST_BSS_POS | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // get_sec_buf — return pointer to current section buffer + offset | |
| // returns x0 = buffer pointer at current write position | |
| // (only meaningful for text/rodata; bss has no buffer) | |
| // ───────────────────────────────────────────────────────────────────── | |
| get_sec_buf: | |
| ldr x9, [x28, #ST_CUR_SEC] | |
| cmp x9, #SEC_RODATA | |
| b.eq 1f | |
| adrp x0, text_buf | |
| add x0, x0, :lo12:text_buf | |
| ldr x9, [x28, #ST_TEXT_POS] | |
| add x0, x0, x9 | |
| ret | |
| 1: adrp x0, rodata_buf | |
| add x0, x0, :lo12:rodata_buf | |
| ldr x9, [x28, #ST_RODATA_POS] | |
| add x0, x0, x9 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // run_pass — iterate over all source lines | |
| // x0 = pass number (1 or 2) | |
| // ───────────────────────────────────────────────────────────────────── | |
| run_pass: | |
| sub sp, sp, #48 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| str x21, [sp, #24] | |
| str x22, [sp, #32] | |
| str x0, [x28, #ST_PASS] | |
| // reset section positions and current section | |
| str xzr, [x28, #ST_TEXT_POS] | |
| str xzr, [x28, #ST_RODATA_POS] | |
| str xzr, [x28, #ST_BSS_POS] | |
| str xzr, [x28, #ST_CUR_SEC] | |
| // reset line number | |
| mov x9, #1 | |
| str x9, [x28, #ST_LINE_NUM] | |
| // reset numeric label cursors | |
| adrp x0, numlab_curs | |
| add x0, x0, :lo12:numlab_curs | |
| mov x1, #0 | |
| mov x2, #(NUMLAB_DIGITS * 8) | |
| bl memset | |
| // set up input pointers | |
| adrp x19, input_buf | |
| add x19, x19, :lo12:input_buf | |
| ldr x9, [x28, #ST_INPUT_LEN] | |
| add x20, x19, x9 // x20 = end of input | |
| run_pass_loop: | |
| cmp x19, x20 | |
| b.ge run_pass_done | |
| // find end of line (newline or end of buffer) | |
| mov x21, x19 // x21 = line start | |
| mov x9, x19 | |
| 1: cmp x9, x20 | |
| b.ge 2f | |
| ldrb w10, [x9] | |
| cmp w10, #'\n' | |
| b.eq 2f | |
| add x9, x9, #1 | |
| b 1b | |
| 2: mov x22, x9 // x22 = newline / end position | |
| // temporarily null-terminate | |
| ldrb w9, [x22] | |
| sub sp, sp, #16 | |
| strb w9, [sp] // save original byte on stack | |
| strb wzr, [x22] | |
| // process the line | |
| mov x0, x21 | |
| bl process_line | |
| // restore original byte | |
| ldrb w9, [sp] | |
| strb w9, [x22] | |
| add sp, sp, #16 | |
| // advance past newline | |
| add x19, x22, #1 | |
| // increment line number | |
| ldr x9, [x28, #ST_LINE_NUM] | |
| add x9, x9, #1 | |
| str x9, [x28, #ST_LINE_NUM] | |
| b run_pass_loop | |
| run_pass_done: | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| ldr x21, [sp, #24] | |
| ldr x22, [sp, #32] | |
| add sp, sp, #48 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // process_line — handle one null-terminated source line | |
| // x0 = line start (null-terminated) | |
| // ───────────────────────────────────────────────────────────────────── | |
| process_line: | |
| sub sp, sp, #48 | |
| str x30, [sp] | |
| str x19, [sp, #8] // current parse position | |
| str x20, [sp, #16] // scratch (name start) | |
| str x21, [sp, #24] // scratch (name length) | |
| str x22, [sp, #32] | |
| bl skip_ws | |
| mov x19, x0 | |
| // empty line? | |
| ldrb w9, [x19] | |
| cbz w9, pl_done | |
| // comment? ( // ) | |
| cmp w9, #'/' | |
| b.ne pl_not_comment | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'/' | |
| b.eq pl_done | |
| pl_not_comment: | |
| // ── check for numeric label (digit followed by ':') ───────── | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.hi pl_not_numlab | |
| ldrb w11, [x19, #1] | |
| cmp w11, #':' | |
| b.ne pl_not_numlab | |
| // numeric label — record in pass 1 | |
| mov x0, x10 // digit (0-9) | |
| bl handle_numlab | |
| add x19, x19, #2 | |
| b pl_after_label | |
| pl_not_numlab: | |
| // ── check for named label or mnemonic ─────────────────────── | |
| cmp w9, #'.' | |
| b.eq pl_directive | |
| mov x0, x19 | |
| bl parse_ident | |
| cbz x1, pl_done // no identifier → skip | |
| // is it a label (followed by ':')? | |
| ldrb w9, [x2] | |
| cmp w9, #':' | |
| b.ne pl_instruction | |
| // ── named label ───────────────────────────────────────────── | |
| mov x20, x0 // name start | |
| mov x21, x1 // name length | |
| add x19, x2, #1 // past ':' | |
| // only define in pass 1 (pass 2 uses rebased values) | |
| ldr x9, [x28, #ST_PASS] | |
| cmp x9, #2 | |
| b.eq pl_after_label | |
| // value = current section offset | |
| bl get_sec_pos | |
| ldr x2, [x28, x0] | |
| // flags = DEFINED | (cur_section << SEC_SHIFT) | |
| ldr x3, [x28, #ST_CUR_SEC] | |
| lsl x3, x3, #SYMF_SEC_SHIFT | |
| orr x3, x3, #SYMF_DEFINED | |
| mov x0, x20 | |
| mov x1, x21 | |
| bl sym_define | |
| b pl_after_label | |
| pl_after_label: | |
| mov x0, x19 | |
| bl skip_ws | |
| mov x19, x0 | |
| ldrb w9, [x19] | |
| cbz w9, pl_done | |
| cmp w9, #'/' | |
| b.ne 1f | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'/' | |
| b.eq pl_done | |
| 1: cmp w9, #'.' | |
| b.eq pl_directive | |
| // must be instruction after label | |
| mov x0, x19 | |
| bl parse_ident | |
| cbz x1, pl_done | |
| b pl_instruction | |
| // ── directive (starts with '.') ───────────────────────────── | |
| pl_directive: | |
| add x19, x19, #1 // skip '.' | |
| mov x0, x19 | |
| bl parse_ident | |
| cbz x1, pl_done | |
| // x0 = name start, x1 = name length, x2 = end pointer | |
| mov x20, x0 // directive name | |
| mov x21, x1 // directive length | |
| mov x19, x2 // position after directive name | |
| // dispatch on directive name — check first char then length | |
| ldrb w9, [x20] | |
| cmp w9, #'t' | |
| b.ne 3f | |
| cmp x21, #4 | |
| b.ne pl_done | |
| mov x0, x20 | |
| adrp x1, str_text | |
| add x1, x1, :lo12:str_text | |
| mov x2, #4 | |
| bl str_eq_n | |
| cbnz x0, dir_text | |
| b pl_done | |
| 3: cmp w9, #'b' | |
| b.ne 4f | |
| cmp x21, #3 | |
| b.ne pl_done | |
| mov x0, x20 | |
| adrp x1, str_bss | |
| add x1, x1, :lo12:str_bss | |
| mov x2, #3 | |
| bl str_eq_n | |
| cbnz x0, dir_bss | |
| b pl_done | |
| 4: cmp w9, #'s' | |
| b.ne 5f | |
| cmp x21, #7 | |
| b.eq dir_s_section | |
| cmp x21, #4 | |
| b.eq dir_s_skip | |
| b pl_done | |
| dir_s_section: | |
| mov x0, x20 | |
| adrp x1, str_section | |
| add x1, x1, :lo12:str_section | |
| mov x2, #7 | |
| bl str_eq_n | |
| cbnz x0, dir_section | |
| b pl_done | |
| dir_s_skip: | |
| mov x0, x20 | |
| adrp x1, str_skip | |
| add x1, x1, :lo12:str_skip | |
| mov x2, #4 | |
| bl str_eq_n | |
| cbnz x0, dir_skip | |
| b pl_done | |
| 5: cmp w9, #'a' | |
| b.ne 6f | |
| cmp x21, #5 | |
| b.eq dir_a_dispatch | |
| b pl_done | |
| dir_a_dispatch: | |
| // could be "align", "ascii", or "asciz" | |
| ldrb w10, [x20, #1] | |
| cmp w10, #'l' | |
| b.ne dir_a_asc | |
| mov x0, x20 | |
| adrp x1, str_align | |
| add x1, x1, :lo12:str_align | |
| mov x2, #5 | |
| bl str_eq_n | |
| cbnz x0, dir_align | |
| b pl_done | |
| dir_a_asc: | |
| // "ascii" or "asciz" | |
| ldrb w10, [x20, #4] | |
| cmp w10, #'z' | |
| b.eq dir_a_asciz | |
| mov x0, x20 | |
| adrp x1, str_ascii | |
| add x1, x1, :lo12:str_ascii | |
| mov x2, #5 | |
| bl str_eq_n | |
| cbnz x0, dir_ascii | |
| b pl_done | |
| dir_a_asciz: | |
| mov x0, x20 | |
| adrp x1, str_asciz | |
| add x1, x1, :lo12:str_asciz | |
| mov x2, #5 | |
| bl str_eq_n | |
| cbnz x0, dir_asciz | |
| b pl_done | |
| 6: cmp w9, #'e' | |
| b.ne 7f | |
| cmp x21, #3 | |
| b.ne pl_done | |
| mov x0, x20 | |
| adrp x1, str_equ | |
| add x1, x1, :lo12:str_equ | |
| mov x2, #3 | |
| bl str_eq_n | |
| cbnz x0, dir_equ | |
| b pl_done | |
| 7: cmp w9, #'g' | |
| b.ne pl_done | |
| cmp x21, #6 | |
| b.ne pl_done | |
| mov x0, x20 | |
| adrp x1, str_global | |
| add x1, x1, :lo12:str_global | |
| mov x2, #6 | |
| bl str_eq_n | |
| cbnz x0, dir_global | |
| b pl_done | |
| // ── instruction ───────────────────────────────────────────── | |
| pl_instruction: | |
| // x0 = mnemonic start, x1 = mnemonic length, x2 = position after | |
| mov x20, x0 // mnemonic start | |
| mov x21, x1 // mnemonic length | |
| mov x22, x2 // operands start | |
| ldr x9, [x28, #ST_PASS] | |
| cmp x9, #2 | |
| b.eq pl_inst_encode | |
| // pass 1: advance section pos by 4 | |
| bl get_sec_pos | |
| ldr x9, [x28, x0] | |
| add x9, x9, #4 | |
| str x9, [x28, x0] | |
| b pl_done | |
| pl_inst_encode: | |
| // pass 2: encode instruction and emit | |
| mov x0, x20 // mnemonic start | |
| mov x1, x21 // mnemonic length | |
| mov x2, x22 // operands start | |
| bl encode_instruction | |
| b pl_done | |
| pl_done: | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| ldr x21, [sp, #24] | |
| ldr x22, [sp, #32] | |
| add sp, sp, #48 | |
| ret | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // Directive handlers | |
| // | |
| // On entry: x19 = parse position after directive name | |
| // x20, x21 available (saved by process_line's frame) | |
| // Must jump to pl_done when finished. | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // .text — switch to text section | |
| dir_text: | |
| str xzr, [x28, #ST_CUR_SEC] | |
| b pl_done | |
| // .bss — switch to bss section | |
| dir_bss: | |
| mov x9, #SEC_BSS | |
| str x9, [x28, #ST_CUR_SEC] | |
| b pl_done | |
| // .section .rodata — switch to rodata section | |
| dir_section: | |
| mov x0, x19 | |
| bl skip_ws | |
| // expect ".rodata" | |
| adrp x1, str_rodata | |
| add x1, x1, :lo12:str_rodata | |
| mov x2, #7 | |
| bl str_eq_n | |
| cbz x0, pl_done // unknown section, ignore | |
| mov x9, #SEC_RODATA | |
| str x9, [x28, #ST_CUR_SEC] | |
| b pl_done | |
| // .align N — align to 2^N boundary | |
| dir_align: | |
| mov x0, x19 | |
| bl skip_ws | |
| bl parse_expr | |
| // x0 = N (alignment power) | |
| mov x10, x0 | |
| bl get_sec_pos | |
| ldr x11, [x28, x0] // current position | |
| mov x22, x0 // save state offset | |
| // aligned = (pos >> N) << N after adding (1<<N)-1 | |
| mov x9, #1 | |
| lsl x9, x9, x10 // 1 << N | |
| sub x9, x9, #1 // mask | |
| add x11, x11, x9 // pos + mask | |
| lsr x11, x11, x10 | |
| lsl x11, x11, x10 // aligned position | |
| str x11, [x28, x22] | |
| b pl_done | |
| // .skip N — advance by N bytes | |
| dir_skip: | |
| mov x0, x19 | |
| bl skip_ws | |
| bl parse_expr | |
| mov x10, x0 // N | |
| bl get_sec_pos | |
| ldr x11, [x28, x0] | |
| add x11, x11, x10 | |
| str x11, [x28, x0] | |
| b pl_done | |
| // .ascii "string" — emit string bytes (no null terminator) | |
| dir_ascii: | |
| mov x0, x19 | |
| bl skip_ws | |
| mov x19, x0 // save ptr to string | |
| // pass 2: get destination buffer; pass 1: NULL | |
| ldr x9, [x28, #ST_PASS] | |
| cmp x9, #2 | |
| b.ne 1f | |
| bl get_sec_buf | |
| mov x1, x0 | |
| b 2f | |
| 1: mov x1, #0 | |
| 2: mov x0, x19 | |
| bl parse_string | |
| mov x10, x0 | |
| bl get_sec_pos | |
| ldr x11, [x28, x0] | |
| add x11, x11, x10 | |
| str x11, [x28, x0] | |
| b pl_done | |
| // .asciz "string" — emit string bytes + null terminator | |
| dir_asciz: | |
| mov x0, x19 | |
| bl skip_ws | |
| mov x19, x0 // save ptr to string | |
| mov x20, #0 // dest pointer (0 for pass 1) | |
| ldr x9, [x28, #ST_PASS] | |
| cmp x9, #2 | |
| b.ne 1f | |
| bl get_sec_buf | |
| mov x20, x0 | |
| 1: mov x0, x19 | |
| mov x1, x20 | |
| bl parse_string | |
| // pass 2: write null terminator | |
| cbz x20, 2f | |
| strb wzr, [x20, x0] | |
| 2: add x10, x0, #1 // +1 for null terminator | |
| bl get_sec_pos | |
| ldr x11, [x28, x0] | |
| add x11, x11, x10 | |
| str x11, [x28, x0] | |
| b pl_done | |
| // .equ name, expr — define a constant symbol | |
| dir_equ: | |
| mov x0, x19 | |
| bl skip_ws | |
| bl parse_ident | |
| cbz x1, pl_done | |
| mov x20, x0 // name start | |
| mov x21, x1 // name length | |
| mov x19, x2 // past identifier | |
| // skip comma | |
| mov x0, x19 | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #',' | |
| b.ne pl_done | |
| add x0, x0, #1 | |
| // evaluate expression | |
| bl skip_ws | |
| bl parse_expr | |
| mov x2, x0 // value | |
| // define symbol | |
| mov x3, #SYMF_DEFINED | |
| orr x3, x3, #SYMF_EQU | |
| mov x0, x20 | |
| mov x1, x21 | |
| bl sym_define | |
| b pl_done | |
| // .global name — mark symbol as global | |
| dir_global: | |
| mov x0, x19 | |
| bl skip_ws | |
| bl parse_ident | |
| cbz x1, pl_done | |
| mov x20, x0 | |
| mov x21, x1 | |
| mov x2, #0 // value 0 (just setting flag) | |
| mov x3, #SYMF_GLOBAL | |
| mov x0, x20 | |
| mov x1, x21 | |
| bl sym_define | |
| b pl_done | |
| // ───────────────────────────────────────────────────────────────────── | |
| // handle_numlab — record a numeric label definition | |
| // x0 = digit (0-9) | |
| // ───────────────────────────────────────────────────────────────────── | |
| handle_numlab: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| mov x19, x0 // digit (callee-saved) | |
| // pass 2: advance cursor instead of recording | |
| ldr x10, [x28, #ST_PASS] | |
| cmp x10, #2 | |
| b.eq handle_numlab_p2 | |
| // pass 1: get current address (section offset) | |
| bl get_sec_pos | |
| ldr x10, [x28, x0] // current offset | |
| // count = numlab_cnts[digit] | |
| adrp x11, numlab_cnts | |
| add x11, x11, :lo12:numlab_cnts | |
| ldr x12, [x11, x19, lsl #3] // count | |
| // store address: numlab_defs[digit * MAX_DEFS + count] | |
| adrp x13, numlab_defs | |
| add x13, x13, :lo12:numlab_defs | |
| mov x14, #NUMLAB_MAX_DEFS | |
| mul x14, x19, x14 | |
| add x14, x14, x12 | |
| str x10, [x13, x14, lsl #3] | |
| // increment count | |
| add x12, x12, #1 | |
| str x12, [x11, x19, lsl #3] | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| add sp, sp, #32 | |
| ret | |
| handle_numlab_p2: | |
| adrp x11, numlab_curs | |
| add x11, x11, :lo12:numlab_curs | |
| ldr x10, [x11, x19, lsl #3] | |
| add x10, x10, #1 | |
| str x10, [x11, x19, lsl #3] | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| add sp, sp, #32 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // rebase_symbols — after pass 1, add section bases to label values | |
| // ───────────────────────────────────────────────────────────────────── | |
| rebase_symbols: | |
| sub sp, sp, #16 | |
| str x30, [sp] | |
| adrp x9, sym_table | |
| add x9, x9, :lo12:sym_table | |
| mov x10, #0 // slot index | |
| rebase_loop: | |
| cmp x10, #SYM_TBL_SLOTS | |
| b.ge rebase_done | |
| mov x11, #SYM_ENT_SIZE | |
| mul x11, x10, x11 | |
| add x12, x9, x11 // entry pointer | |
| ldr w13, [x12, #SYM_NAME_OFF] | |
| cbz w13, rebase_next // empty slot | |
| ldr x14, [x12, #SYM_FLAGS] | |
| tbnz x14, #2, rebase_next // bit 2 = SYMF_EQU, skip | |
| // extract section from flags bits 5:4 | |
| lsr x15, x14, #SYMF_SEC_SHIFT | |
| and x15, x15, #3 | |
| // look up the base for this section | |
| cmp x15, #SEC_RODATA | |
| b.eq 1f | |
| cmp x15, #SEC_BSS | |
| b.eq 2f | |
| ldr x16, [x28, #ST_TEXT_BASE] | |
| b 3f | |
| 1: ldr x16, [x28, #ST_RODATA_BASE] | |
| b 3f | |
| 2: ldr x16, [x28, #ST_BSS_BASE] | |
| 3: ldr x17, [x12, #SYM_VALUE] | |
| add x17, x17, x16 | |
| str x17, [x12, #SYM_VALUE] | |
| rebase_next: | |
| add x10, x10, #1 | |
| b rebase_loop | |
| rebase_done: | |
| // also rebase numeric labels | |
| adrp x9, numlab_defs | |
| add x9, x9, :lo12:numlab_defs | |
| adrp x10, numlab_cnts | |
| add x10, x10, :lo12:numlab_cnts | |
| // numeric labels are always in the text section for now | |
| ldr x16, [x28, #ST_TEXT_BASE] | |
| mov x11, #0 // digit | |
| rebase_numlab_digit: | |
| cmp x11, #NUMLAB_DIGITS | |
| b.ge rebase_numlab_done | |
| ldr x12, [x10, x11, lsl #3] // count for this digit | |
| mov x13, #NUMLAB_MAX_DEFS | |
| mul x14, x11, x13 // base index | |
| mov x15, #0 // def index | |
| 4: cmp x15, x12 | |
| b.ge 5f | |
| add x17, x14, x15 | |
| ldr x0, [x9, x17, lsl #3] | |
| add x0, x0, x16 | |
| str x0, [x9, x17, lsl #3] | |
| add x15, x15, #1 | |
| b 4b | |
| 5: add x11, x11, #1 | |
| b rebase_numlab_digit | |
| rebase_numlab_done: | |
| ldr x30, [sp] | |
| add sp, sp, #16 | |
| ret | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // Expression evaluator — recursive descent | |
| // | |
| // Each function: x0 = pointer → x0 = value, x1 = pointer past expr | |
| // | |
| // Precedence (low to high): | & +/- * <</>> unary(~ -) atom | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_expr — entry point, handles '|' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_expr: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| bl parse_expr_and | |
| mov x19, x0 // accumulated value | |
| mov x20, x1 // current pointer | |
| pe_or_loop: | |
| mov x0, x20 | |
| bl skip_ws | |
| mov x20, x0 | |
| ldrb w9, [x20] | |
| cmp w9, #'|' | |
| b.ne pe_or_done | |
| add x20, x20, #1 | |
| mov x0, x20 | |
| bl parse_expr_and | |
| orr x19, x19, x0 | |
| mov x20, x1 | |
| b pe_or_loop | |
| pe_or_done: | |
| mov x0, x19 | |
| mov x1, x20 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_expr_and — handles '&' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_expr_and: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| bl parse_expr_add | |
| mov x19, x0 | |
| mov x20, x1 | |
| pe_and_loop: | |
| mov x0, x20 | |
| bl skip_ws | |
| mov x20, x0 | |
| ldrb w9, [x20] | |
| cmp w9, #'&' | |
| b.ne pe_and_done | |
| add x20, x20, #1 | |
| mov x0, x20 | |
| bl parse_expr_add | |
| and x19, x19, x0 | |
| mov x20, x1 | |
| b pe_and_loop | |
| pe_and_done: | |
| mov x0, x19 | |
| mov x1, x20 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_expr_add — handles '+', '-' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_expr_add: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| bl parse_expr_mul | |
| mov x19, x0 | |
| mov x20, x1 | |
| pe_add_loop: | |
| mov x0, x20 | |
| bl skip_ws | |
| mov x20, x0 | |
| ldrb w9, [x20] | |
| cmp w9, #'+' | |
| b.eq pe_add_plus | |
| cmp w9, #'-' | |
| b.eq pe_add_minus | |
| b pe_add_done | |
| pe_add_plus: | |
| add x20, x20, #1 | |
| mov x0, x20 | |
| bl parse_expr_mul | |
| add x19, x19, x0 | |
| mov x20, x1 | |
| b pe_add_loop | |
| pe_add_minus: | |
| add x20, x20, #1 | |
| mov x0, x20 | |
| bl parse_expr_mul | |
| sub x19, x19, x0 | |
| mov x20, x1 | |
| b pe_add_loop | |
| pe_add_done: | |
| mov x0, x19 | |
| mov x1, x20 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_expr_mul — handles '*' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_expr_mul: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| bl parse_expr_shift | |
| mov x19, x0 | |
| mov x20, x1 | |
| pe_mul_loop: | |
| mov x0, x20 | |
| bl skip_ws | |
| mov x20, x0 | |
| ldrb w9, [x20] | |
| cmp w9, #'*' | |
| b.ne pe_mul_done | |
| add x20, x20, #1 | |
| mov x0, x20 | |
| bl parse_expr_shift | |
| mul x19, x19, x0 | |
| mov x20, x1 | |
| b pe_mul_loop | |
| pe_mul_done: | |
| mov x0, x19 | |
| mov x1, x20 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_expr_shift — handles '<<', '>>' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_expr_shift: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| bl parse_expr_unary | |
| mov x19, x0 | |
| mov x20, x1 | |
| pe_shift_loop: | |
| mov x0, x20 | |
| bl skip_ws | |
| mov x20, x0 | |
| ldrb w9, [x20] | |
| cmp w9, #'<' | |
| b.eq pe_shift_check_left | |
| cmp w9, #'>' | |
| b.eq pe_shift_check_right | |
| b pe_shift_done | |
| pe_shift_check_left: | |
| ldrb w10, [x20, #1] | |
| cmp w10, #'<' | |
| b.ne pe_shift_done | |
| add x20, x20, #2 | |
| mov x0, x20 | |
| bl parse_expr_unary | |
| lsl x19, x19, x0 | |
| mov x20, x1 | |
| b pe_shift_loop | |
| pe_shift_check_right: | |
| ldrb w10, [x20, #1] | |
| cmp w10, #'>' | |
| b.ne pe_shift_done | |
| add x20, x20, #2 | |
| mov x0, x20 | |
| bl parse_expr_unary | |
| lsr x19, x19, x0 | |
| mov x20, x1 | |
| b pe_shift_loop | |
| pe_shift_done: | |
| mov x0, x19 | |
| mov x1, x20 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_expr_unary — handles '~' and unary '-' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_expr_unary: | |
| sub sp, sp, #16 | |
| str x30, [sp] | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #'~' | |
| b.eq pe_unary_not | |
| cmp w9, #'-' | |
| b.eq pe_unary_neg | |
| // not unary, parse atom | |
| bl parse_expr_atom | |
| ldr x30, [sp] | |
| add sp, sp, #16 | |
| ret | |
| pe_unary_not: | |
| add x0, x0, #1 | |
| bl parse_expr_unary // recursive | |
| mvn x0, x0 | |
| ldr x30, [sp] | |
| add sp, sp, #16 | |
| ret | |
| pe_unary_neg: | |
| add x0, x0, #1 | |
| bl parse_expr_unary // recursive | |
| sub x0, xzr, x0 // neg | |
| ldr x30, [sp] | |
| add sp, sp, #16 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_expr_atom — numbers, symbols, '.', '(expr)' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_expr_atom: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| // '(' — grouped expression | |
| cmp w9, #'(' | |
| b.eq pe_atom_paren | |
| // '.' — current location counter | |
| cmp w9, #'.' | |
| b.eq pe_atom_dot | |
| // digit or '-' or '\'' — numeric literal | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.ls pe_atom_num | |
| cmp w9, #'-' | |
| b.eq pe_atom_num | |
| cmp w9, #'\'' | |
| b.eq pe_atom_num | |
| cmp w9, #'0' | |
| b.eq pe_atom_num | |
| // identifier — symbol reference | |
| bl parse_ident | |
| cbz x1, pe_atom_err | |
| mov x19, x0 // name start | |
| mov x20, x2 // end pointer (return this) | |
| // look up symbol | |
| bl sym_lookup | |
| cbz x1, pe_atom_undef | |
| // return value | |
| ldr x0, [x0, #SYM_VALUE] | |
| mov x1, x20 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| pe_atom_undef: | |
| // in pass 1, undefined symbols get 0 (forward ref in instruction) | |
| ldr x9, [x28, #ST_PASS] | |
| cmp x9, #1 | |
| b.eq 1f | |
| // pass 2: error | |
| adrp x0, msg_undef | |
| add x0, x0, :lo12:msg_undef | |
| mov x1, #msg_undef_len | |
| bl error_at | |
| 1: mov x0, #0 | |
| mov x1, x20 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| pe_atom_paren: | |
| add x0, x0, #1 // skip '(' | |
| bl parse_expr | |
| mov x19, x0 // value | |
| mov x0, x1 | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #')' | |
| b.ne pe_atom_err | |
| add x0, x0, #1 // skip ')' | |
| mov x1, x0 | |
| mov x0, x19 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| pe_atom_dot: | |
| add x1, x0, #1 // pointer past '.' | |
| // value = current section offset (pass 1) or base+offset (pass 2) | |
| sub sp, sp, #16 | |
| str x1, [sp] | |
| bl get_sec_pos | |
| ldr x0, [x28, x0] // section offset | |
| ldr x9, [x28, #ST_PASS] | |
| cmp x9, #1 | |
| b.eq 1f | |
| // pass 2: add section base | |
| ldr x10, [x28, #ST_CUR_SEC] | |
| cmp x10, #SEC_RODATA | |
| b.eq 2f | |
| cmp x10, #SEC_BSS | |
| b.eq 3f | |
| ldr x11, [x28, #ST_TEXT_BASE] | |
| b 4f | |
| 2: ldr x11, [x28, #ST_RODATA_BASE] | |
| b 4f | |
| 3: ldr x11, [x28, #ST_BSS_BASE] | |
| 4: add x0, x0, x11 | |
| 1: ldr x1, [sp] | |
| add sp, sp, #16 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| pe_atom_num: | |
| bl parse_int | |
| // x0 = value, x1 = pointer past | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| pe_atom_err: | |
| adrp x0, msg_syntax | |
| add x0, x0, :lo12:msg_syntax | |
| mov x1, #msg_syntax_len | |
| bl error_at | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // String parsing | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_string — parse a quoted string, count or emit bytes | |
| // x0 = pointer (at the opening '"') | |
| // x1 = destination (NULL to just count) | |
| // returns x0 = byte count, x1 = pointer past closing '"' | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_string: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| mov x19, x1 // dest (or NULL) | |
| add x0, x0, #1 // skip opening '"' | |
| mov x20, #0 // byte count | |
| ps_loop: | |
| ldrb w9, [x0] | |
| cbz w9, ps_done // unterminated string | |
| cmp w9, #'"' | |
| b.eq ps_close | |
| cmp w9, #'\\' | |
| b.eq ps_escape | |
| // plain character | |
| cbz x19, 1f | |
| strb w9, [x19, x20] | |
| 1: add x20, x20, #1 | |
| add x0, x0, #1 | |
| b ps_loop | |
| ps_escape: | |
| add x0, x0, #1 // skip backslash | |
| ldrb w9, [x0] | |
| add x0, x0, #1 | |
| cmp w9, #'n' | |
| b.eq 2f | |
| cmp w9, #'t' | |
| b.eq 3f | |
| cmp w9, #'0' | |
| b.eq 4f | |
| // '\\' '\'' '"' or unknown → literal | |
| b 5f | |
| 2: mov w9, #10 | |
| b 5f | |
| 3: mov w9, #9 | |
| b 5f | |
| 4: mov w9, #0 | |
| 5: cbz x19, 6f | |
| strb w9, [x19, x20] | |
| 6: add x20, x20, #1 | |
| b ps_loop | |
| ps_close: | |
| add x0, x0, #1 // skip closing '"' | |
| ps_done: | |
| mov x1, x0 | |
| mov x0, x20 // byte count | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // Pass 2 infrastructure | |
| // ═════════════════════════════════════════════════════════════════════ | |
| // ───────────────────────────────────────────────────────────────────── | |
| // emit_inst — write 32-bit instruction to text_buf, advance text_pos | |
| // x0 = instruction word | |
| // ───────────────────────────────────────────────────────────────────── | |
| emit_inst: | |
| adrp x9, text_buf | |
| add x9, x9, :lo12:text_buf | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| str w0, [x9, x10] | |
| add x10, x10, #4 | |
| str x10, [x28, #ST_TEXT_POS] | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_hash_imm — parse #expr or #:lo12:expr | |
| // x0 = pointer (at '#') | |
| // returns x0 = value, x1 = pointer past, x2 = 1 if :lo12: | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_hash_imm: | |
| sub sp, sp, #16 | |
| str x30, [sp] | |
| // check first char: '#' or ':' | |
| ldrb w9, [x0] | |
| cmp w9, #':' | |
| b.eq phi_lo12_check // bare :lo12:expr | |
| add x0, x0, #1 // skip '#' | |
| ldrb w9, [x0] | |
| cmp w9, #':' | |
| b.eq phi_lo12_check // #:lo12:expr | |
| b phi_plain | |
| phi_lo12_check: | |
| ldrb w9, [x0, #1] | |
| cmp w9, #'l' | |
| b.ne phi_plain | |
| ldrb w9, [x0, #2] | |
| cmp w9, #'o' | |
| b.ne phi_plain | |
| ldrb w9, [x0, #3] | |
| cmp w9, #'1' | |
| b.ne phi_plain | |
| ldrb w9, [x0, #4] | |
| cmp w9, #'2' | |
| b.ne phi_plain | |
| ldrb w9, [x0, #5] | |
| cmp w9, #':' | |
| b.ne phi_plain | |
| // :lo12: found — skip 6 chars past ':' | |
| add x0, x0, #6 | |
| bl parse_expr | |
| and x0, x0, #0xFFF | |
| mov x2, #1 | |
| ldr x30, [sp] | |
| add sp, sp, #16 | |
| ret | |
| phi_plain: | |
| bl parse_expr | |
| mov x2, #0 | |
| ldr x30, [sp] | |
| add sp, sp, #16 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_label_ref — parse branch target (named label or Nf/Nb) | |
| // x0 = pointer | |
| // returns x0 = target address, x1 = pointer past | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_label_ref: | |
| sub sp, sp, #32 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| // numeric label ref? digit followed by 'f' or 'b' | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.hi plr_named | |
| ldrb w11, [x0, #1] | |
| cmp w11, #'f' | |
| b.eq plr_forward | |
| cmp w11, #'b' | |
| b.eq plr_backward | |
| b plr_named | |
| plr_forward: | |
| mov x19, x10 // digit | |
| add x20, x0, #2 // pointer past "Nf" | |
| mov x0, x19 | |
| bl resolve_numlab_f | |
| mov x1, x20 | |
| b plr_done | |
| plr_backward: | |
| mov x19, x10 // digit | |
| add x20, x0, #2 // pointer past "Nb" | |
| mov x0, x19 | |
| bl resolve_numlab_b | |
| mov x1, x20 | |
| b plr_done | |
| plr_named: | |
| bl parse_ident | |
| cbz x1, plr_err | |
| mov x20, x2 // save end pointer | |
| bl sym_lookup | |
| cbz x1, plr_err | |
| ldr x0, [x0, #SYM_VALUE] | |
| mov x1, x20 | |
| b plr_done | |
| plr_err: | |
| adrp x0, msg_undef | |
| add x0, x0, :lo12:msg_undef | |
| mov x1, #msg_undef_len | |
| bl error_at | |
| plr_done: | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| add sp, sp, #32 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // resolve_numlab_f — next forward definition of numeric label | |
| // x0 = digit (0-9) | |
| // returns x0 = address | |
| // ───────────────────────────────────────────────────────────────────── | |
| resolve_numlab_f: | |
| adrp x9, numlab_curs | |
| add x9, x9, :lo12:numlab_curs | |
| ldr x10, [x9, x0, lsl #3] // cursor | |
| adrp x11, numlab_defs | |
| add x11, x11, :lo12:numlab_defs | |
| mov x12, #NUMLAB_MAX_DEFS | |
| mul x12, x0, x12 | |
| add x12, x12, x10 // digit * MAX_DEFS + cursor | |
| ldr x0, [x11, x12, lsl #3] | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // resolve_numlab_b — most recent backward definition of numeric label | |
| // x0 = digit (0-9) | |
| // returns x0 = address | |
| // ───────────────────────────────────────────────────────────────────── | |
| resolve_numlab_b: | |
| adrp x9, numlab_curs | |
| add x9, x9, :lo12:numlab_curs | |
| ldr x10, [x9, x0, lsl #3] // cursor | |
| sub x10, x10, #1 // previous definition | |
| adrp x11, numlab_defs | |
| add x11, x11, :lo12:numlab_defs | |
| mov x12, #NUMLAB_MAX_DEFS | |
| mul x12, x0, x12 | |
| add x12, x12, x10 | |
| ldr x0, [x11, x12, lsl #3] | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // encode_logical_imm — encode bitmask immediate for logical instructions | |
| // x0 = value, x1 = is_32bit (1=replicate low 32 to full 64) | |
| // returns x0 = (N << 12) | (immr << 6) | imms, or -1 if unencodable | |
| // ───────────────────────────────────────────────────────────────────── | |
| encode_logical_imm: | |
| sub sp, sp, #48 | |
| str x30, [sp] | |
| str x19, [sp, #8] | |
| str x20, [sp, #16] | |
| str x21, [sp, #24] | |
| str x22, [sp, #32] | |
| // for 32-bit, replicate low 32 bits | |
| cbz x1, eli_start | |
| and x0, x0, #0xFFFFFFFF | |
| orr x0, x0, x0, lsl #32 | |
| eli_start: | |
| mov x19, x0 // val | |
| // reject all-zeros and all-ones | |
| cbz x19, eli_fail | |
| mvn x9, x19 | |
| cbz x9, eli_fail | |
| // rotation = ctz(val & (val + 1)) | |
| add x9, x19, #1 | |
| and x9, x19, x9 | |
| rbit x10, x9 | |
| clz x20, x10 // rotation | |
| // normalized = ror(val, rotation) — done as shift pair | |
| lsr x9, x19, x20 | |
| mov x10, #64 | |
| sub x10, x10, x20 | |
| lsl x11, x19, x10 | |
| orr x9, x9, x11 // normalized | |
| // zeroes = clz(normalized) | |
| clz x10, x9 | |
| // ones = ctz(~normalized) = clz(rbit(~normalized)) | |
| mvn x11, x9 | |
| rbit x11, x11 | |
| clz x21, x11 // ones | |
| // size = zeroes + ones | |
| add x22, x10, x21 | |
| // validate: ror(val, size) == val | |
| lsr x9, x19, x22 | |
| mov x10, #64 | |
| sub x10, x10, x22 | |
| lsl x11, x19, x10 | |
| orr x9, x9, x11 | |
| cmp x9, x19 | |
| b.ne eli_fail | |
| // immr = (-rotation) & (size - 1) | |
| neg x9, x20 | |
| sub x10, x22, #1 | |
| and x9, x9, x10 // immr | |
| // imms = (-(size << 1) | (ones - 1)) & 0x3F | |
| lsl x11, x22, #1 | |
| neg x11, x11 | |
| sub x12, x21, #1 | |
| orr x11, x11, x12 | |
| and x11, x11, #0x3F // imms | |
| // N = size >> 6 | |
| lsr x12, x22, #6 | |
| // result = (N << 12) | (immr << 6) | imms | |
| lsl x12, x12, #12 | |
| lsl x9, x9, #6 | |
| orr x0, x12, x9 | |
| orr x0, x0, x11 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| ldr x21, [sp, #24] | |
| ldr x22, [sp, #32] | |
| add sp, sp, #48 | |
| ret | |
| eli_fail: | |
| mov x0, #-1 | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| ldr x21, [sp, #24] | |
| ldr x22, [sp, #32] | |
| add sp, sp, #48 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // parse_cond — parse condition code (eq, ne, lt, ge, hi, ls, etc.) | |
| // x0 = pointer (at first char of condition) | |
| // returns x0 = cond code (0-14), x1 = pointer past | |
| // x0 = -1 if not recognized | |
| // ───────────────────────────────────────────────────────────────────── | |
| parse_cond: | |
| ldrb w9, [x0] | |
| ldrb w10, [x0, #1] | |
| add x1, x0, #2 // assume 2-char code | |
| cmp w9, #'e' | |
| b.eq pc_e | |
| cmp w9, #'n' | |
| b.eq pc_n | |
| cmp w9, #'l' | |
| b.eq pc_l | |
| cmp w9, #'g' | |
| b.eq pc_g | |
| cmp w9, #'h' | |
| b.eq pc_h | |
| cmp w9, #'c' | |
| b.eq pc_c | |
| cmp w9, #'m' | |
| b.eq pc_m | |
| cmp w9, #'p' | |
| b.eq pc_p | |
| cmp w9, #'v' | |
| b.eq pc_v | |
| cmp w9, #'a' | |
| b.eq pc_a | |
| b pc_fail | |
| pc_e: cmp w10, #'q' | |
| b.ne pc_fail | |
| mov x0, #0 // EQ | |
| ret | |
| pc_n: cmp w10, #'e' | |
| b.ne pc_fail | |
| mov x0, #1 // NE | |
| ret | |
| pc_h: cmp w10, #'s' | |
| b.eq 1f | |
| cmp w10, #'i' | |
| b.ne pc_fail | |
| mov x0, #8 // HI | |
| ret | |
| 1: mov x0, #2 // HS (alias CS) | |
| ret | |
| pc_c: cmp w10, #'s' | |
| b.eq 2f | |
| cmp w10, #'c' | |
| b.ne pc_fail | |
| mov x0, #3 // CC (alias LO) | |
| ret | |
| 2: mov x0, #2 // CS (alias HS) | |
| ret | |
| pc_m: cmp w10, #'i' | |
| b.ne pc_fail | |
| mov x0, #4 // MI | |
| ret | |
| pc_p: cmp w10, #'l' | |
| b.ne pc_fail | |
| mov x0, #5 // PL | |
| ret | |
| pc_v: cmp w10, #'s' | |
| b.eq 3f | |
| cmp w10, #'c' | |
| b.ne pc_fail | |
| mov x0, #7 // VC | |
| ret | |
| 3: mov x0, #6 // VS | |
| ret | |
| pc_l: cmp w10, #'s' | |
| b.eq 4f | |
| cmp w10, #'t' | |
| b.eq 5f | |
| cmp w10, #'e' | |
| b.eq 6f | |
| cmp w10, #'o' | |
| b.ne pc_fail | |
| mov x0, #3 // LO (alias CC) | |
| ret | |
| 4: mov x0, #9 // LS | |
| ret | |
| 5: mov x0, #11 // LT | |
| ret | |
| 6: mov x0, #13 // LE | |
| ret | |
| pc_g: cmp w10, #'e' | |
| b.eq 7f | |
| cmp w10, #'t' | |
| b.ne pc_fail | |
| mov x0, #12 // GT | |
| ret | |
| 7: mov x0, #10 // GE | |
| ret | |
| pc_a: cmp w10, #'l' | |
| b.ne pc_fail | |
| mov x0, #14 // AL | |
| ret | |
| pc_fail: | |
| mov x0, #-1 | |
| ret | |
| // ───────────────────────────────────────────────────────────────────── | |
| // encode_instruction — dispatch mnemonic, parse operands, emit | |
| // x0 = mnemonic start, x1 = mnemonic length, x2 = operands start | |
| // ───────────────────────────────────────────────────────────────────── | |
| encode_instruction: | |
| sub sp, sp, #64 | |
| str x30, [sp] | |
| str x19, [sp, #8] // mnemonic start | |
| str x20, [sp, #16] // mnemonic length | |
| str x21, [sp, #24] // operands pointer | |
| str x22, [sp, #32] | |
| str x23, [sp, #40] | |
| str x24, [sp, #48] | |
| mov x19, x0 | |
| mov x20, x1 | |
| mov x21, x2 | |
| // dispatch on first character of mnemonic | |
| ldrb w9, [x19] | |
| cmp w9, #'a' | |
| b.eq ei_a | |
| cmp w9, #'b' | |
| b.eq ei_b | |
| cmp w9, #'c' | |
| b.eq ei_c | |
| cmp w9, #'e' | |
| b.eq ei_e | |
| cmp w9, #'l' | |
| b.eq ei_l | |
| cmp w9, #'m' | |
| b.eq ei_m | |
| cmp w9, #'n' | |
| b.eq ei_n | |
| cmp w9, #'o' | |
| b.eq ei_o | |
| cmp w9, #'r' | |
| b.eq ei_r | |
| cmp w9, #'s' | |
| b.eq ei_s | |
| cmp w9, #'t' | |
| b.eq ei_t | |
| cmp w9, #'u' | |
| b.eq ei_u | |
| b ei_bad | |
| // ── 'a' mnemonics: add, and, adrp ─────────────────────── | |
| ei_a: | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'d' | |
| b.eq ei_a_d | |
| cmp w10, #'n' | |
| b.eq ei_and | |
| cmp w10, #'s' | |
| b.ne ei_bad | |
| cmp x20, #3 | |
| b.ne ei_bad | |
| b ei_asr | |
| ei_a_d: | |
| ldrb w10, [x19, #2] | |
| cmp w10, #'d' | |
| b.eq ei_add | |
| cmp w10, #'r' | |
| b.ne ei_bad | |
| // adrp | |
| cmp x20, #4 | |
| b.ne ei_bad | |
| b ei_adrp | |
| // ── 'b' mnemonics: b, bl, b.cond, bic ─────────────────── | |
| ei_b: | |
| cmp x20, #1 | |
| b.eq ei_branch | |
| cmp x20, #2 | |
| b.eq ei_bl_or_bcond | |
| cmp x20, #3 | |
| b.eq ei_bic | |
| b ei_bad | |
| ei_bl_or_bcond: | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'l' | |
| b.eq ei_bl | |
| b ei_bad | |
| // ── 'c' mnemonics: cmp, cbz, cbnz, clz ───────────────── | |
| ei_c: | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'m' | |
| b.eq ei_cmp | |
| cmp w10, #'b' | |
| b.eq ei_c_b | |
| cmp w10, #'l' | |
| b.ne ei_bad | |
| b ei_clz | |
| ei_c_b: | |
| ldrb w10, [x19, #2] | |
| cmp w10, #'z' | |
| b.eq ei_cbz | |
| cmp w10, #'n' | |
| b.ne ei_bad | |
| b ei_cbnz | |
| // ── 'e' mnemonics: eor ────────────────────────────────── | |
| ei_e: | |
| cmp x20, #3 | |
| b.ne ei_bad | |
| b ei_eor | |
| // ── 'l' mnemonics: ldr, ldrb, lsl, lsr ───────────────── | |
| ei_l: | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'d' | |
| b.eq ei_ld | |
| cmp w10, #'s' | |
| b.eq ei_ls_shift | |
| b ei_bad | |
| ei_ld: | |
| cmp x20, #3 | |
| b.eq ei_ldr | |
| cmp x20, #4 | |
| b.ne ei_bad | |
| b ei_ldrb | |
| // ── 'm' mnemonics: mov, movz, movn, movk, mul, mvn ───── | |
| ei_m: | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'o' | |
| b.eq ei_mo | |
| cmp w10, #'u' | |
| b.eq ei_mul | |
| cmp w10, #'v' | |
| b.eq ei_mvn | |
| b ei_bad | |
| ei_mo: | |
| ldrb w10, [x19, #2] | |
| cmp w10, #'v' | |
| b.ne ei_bad | |
| // mov (3 chars) vs movz/movn/movk (4 chars) | |
| cmp x20, #3 | |
| b.eq ei_mov | |
| cmp x20, #4 | |
| b.ne ei_bad | |
| ldrb w10, [x19, #3] | |
| cmp w10, #'z' | |
| b.eq ei_movz | |
| cmp w10, #'n' | |
| b.eq ei_movn | |
| cmp w10, #'k' | |
| b.eq ei_movk | |
| b ei_bad | |
| // ── 'n' mnemonics: neg ────────────────────────────────── | |
| ei_n: | |
| cmp x20, #3 | |
| b.ne ei_bad | |
| b ei_neg | |
| // ── 'o' mnemonics: orr ────────────────────────────────── | |
| ei_o: | |
| cmp x20, #3 | |
| b.ne ei_bad | |
| b ei_orr | |
| // ── 'r' mnemonics: ret, rbit ──────────────────────────── | |
| ei_r: | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'e' | |
| b.eq ei_ret | |
| cmp w10, #'b' | |
| b.ne ei_bad | |
| b ei_rbit | |
| // ── 's' mnemonics: sub, str, strb, svc ────────────────── | |
| ei_s: | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'u' | |
| b.eq ei_su | |
| cmp w10, #'t' | |
| b.eq ei_st | |
| cmp w10, #'v' | |
| b.eq ei_svc | |
| b ei_bad | |
| ei_su: | |
| cmp x20, #3 | |
| b.ne ei_bad | |
| b ei_sub | |
| ei_st: | |
| ldrb w10, [x19, #2] | |
| cmp w10, #'r' | |
| b.ne ei_bad | |
| cmp x20, #3 | |
| b.eq ei_str | |
| cmp x20, #4 | |
| b.ne ei_bad | |
| b ei_strb | |
| // ── 't' mnemonics: tbnz, tbz ──────────────────────────── | |
| ei_t: | |
| cmp x20, #4 | |
| b.eq ei_tbnz | |
| cmp x20, #3 | |
| b.ne ei_bad | |
| b ei_tbz | |
| // ── 'u' mnemonics: ubfx, udiv ────────────────────────── | |
| ei_u: | |
| ldrb w10, [x19, #1] | |
| cmp w10, #'b' | |
| b.eq ei_ubfx | |
| cmp w10, #'d' | |
| b.eq ei_udiv | |
| b ei_bad | |
| ei_bad: | |
| adrp x0, msg_badins | |
| add x0, x0, :lo12:msg_badins | |
| mov x1, #msg_badins_len | |
| bl error_at | |
| // ── instruction encoders ──────────────────────────────────── | |
| ei_ret: | |
| movz w0, #0x03C0 | |
| movk w0, #0xD65F, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| ei_svc: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_hash_imm // x0 = imm16 value | |
| and w22, w0, #0xFFFF | |
| movz w0, #0x0001 | |
| movk w0, #0xD400, lsl #16 // 0xD4000001 | |
| orr w0, w0, w22, lsl #5 | |
| bl emit_inst | |
| b ei_done | |
| // b label — or b.cond label (mnemonic is "b", operands may start with '.') | |
| ei_branch: | |
| mov x0, x21 | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #'.' | |
| b.eq ei_bcond | |
| // unconditional B: 0x14000000 | imm26 | |
| bl parse_label_ref | |
| ldr x9, [x28, #ST_TEXT_BASE] | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| add x9, x9, x10 | |
| sub x0, x0, x9 | |
| asr x0, x0, #2 | |
| and w22, w0, #0x3FFFFFF | |
| movz w0, #0x1400, lsl #16 | |
| orr w0, w0, w22 | |
| bl emit_inst | |
| b ei_done | |
| ei_bcond: | |
| add x0, x0, #1 // skip '.' | |
| bl parse_cond | |
| mov x22, x0 // cond code | |
| mov x0, x1 | |
| bl skip_ws | |
| bl parse_label_ref | |
| ldr x9, [x28, #ST_TEXT_BASE] | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| add x9, x9, x10 | |
| sub x0, x0, x9 | |
| asr x0, x0, #2 | |
| // 0x54000000 | (imm19 << 5) | cond | |
| and w0, w0, #0x7FFFF | |
| lsl w0, w0, #5 | |
| orr w0, w0, w22 | |
| movz w22, #0x5400, lsl #16 | |
| orr w0, w0, w22 | |
| bl emit_inst | |
| b ei_done | |
| // bl label — 0x94000000 | imm26 | |
| ei_bl: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_label_ref | |
| ldr x9, [x28, #ST_TEXT_BASE] | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| add x9, x9, x10 | |
| sub x0, x0, x9 | |
| asr x0, x0, #2 | |
| and w22, w0, #0x3FFFFFF | |
| movz w0, #0x9400, lsl #16 | |
| orr w0, w0, w22 | |
| bl emit_inst | |
| b ei_done | |
| // clz Rd, Rn — 64-bit: 0xDAC01000, 32-bit: 0x5AC01000 | |
| ei_clz: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register // Rn | |
| movz w9, #0x1000 | |
| movk w9, #0x5AC0, lsl #16 | |
| cbz x23, 1f | |
| movk w9, #0xDAC0, lsl #16 | |
| 1: orr w9, w9, w22 | |
| orr w0, w9, w0, lsl #5 | |
| bl emit_inst | |
| b ei_done | |
| // rbit Rd, Rn — 64-bit: 0xDAC00000, 32-bit: 0x5AC00000 | |
| ei_rbit: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 | |
| mov x23, x1 | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register | |
| movz w9, #0x0000 | |
| movk w9, #0x5AC0, lsl #16 | |
| cbz x23, 1f | |
| movk w9, #0xDAC0, lsl #16 | |
| 1: orr w9, w9, w22 | |
| orr w0, w9, w0, lsl #5 | |
| bl emit_inst | |
| b ei_done | |
| // adrp Rd, symbol — 1 immlo 10000 immhi Rd | |
| // imm21 = (page(target) - page(PC)) >> 12 | |
| ei_adrp: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_label_ref // x0 = target address | |
| // page(target) = target & ~0xFFF | |
| and x23, x0, #~0xFFF | |
| // page(PC) = PC & ~0xFFF, PC = text_base + text_pos | |
| ldr x9, [x28, #ST_TEXT_BASE] | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| add x9, x9, x10 | |
| and x9, x9, #~0xFFF | |
| sub x23, x23, x9 | |
| asr x23, x23, #12 // signed imm21 | |
| // encoding: immlo = imm21[1:0], immhi = imm21[20:2] | |
| and w9, w23, #3 // immlo | |
| lsl w9, w9, #29 | |
| asr w10, w23, #2 | |
| and w10, w10, #0x7FFFF // immhi (19 bits) | |
| lsl w10, w10, #5 | |
| movz w0, #0x0000 | |
| movk w0, #0x9000, lsl #16 // ADRP opcode | |
| orr w0, w0, w22 | |
| orr w0, w0, w9 | |
| orr w0, w0, w10 | |
| bl emit_inst | |
| b ei_done | |
| // add Rd, Rn, #imm / add Rd, Rn, Rm [, lsl #N] / add Rd, Rn, :lo12:sym | |
| ei_add: | |
| mov x22, #0 // op=0 (ADD) | |
| b ei_addsub | |
| // sub Rd, Rn, #imm / sub Rd, Rn, Rm | |
| ei_sub: | |
| mov x22, #1 // op=1 (SUB) | |
| ei_addsub: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x23, x0 // Rd | |
| mov x24, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_register | |
| str x0, [sp, #56] // save Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| // is the third operand a register or immediate? | |
| ldrb w9, [x0] | |
| cmp w9, #'#' | |
| b.eq ei_addsub_imm | |
| cmp w9, #':' | |
| b.eq ei_addsub_imm // :lo12: treated as immediate | |
| // register form: add Rd, Rn, Rm [, lsl #N] | |
| bl parse_register | |
| mov x21, x0 // Rm | |
| mov x0, x2 | |
| bl skip_ws | |
| // check for optional ", lsl #N" | |
| mov x9, #0 // shift amount default 0 | |
| ldrb w10, [x0] | |
| cmp w10, #',' | |
| b.ne ei_addsub_reg_emit | |
| add x0, x0, #1 | |
| bl skip_ws | |
| // expect "lsl" | |
| add x0, x0, #3 // skip "lsl" | |
| bl skip_ws | |
| bl parse_hash_imm | |
| mov x9, x0 // shift amount | |
| ei_addsub_reg_emit: | |
| // sf op 0 01011 shift 0 Rm imm6 Rn Rd | |
| // shift = 00 (LSL) | |
| ldr x10, [sp, #56] // Rn | |
| mov w0, w23 // Rd | |
| orr w0, w0, w10, lsl #5 // Rn | |
| and w11, w9, #0x3F | |
| orr w0, w0, w11, lsl #10 // imm6 | |
| orr w0, w0, w21, lsl #16 // Rm | |
| orr w0, w0, w22, lsl #30 // op | |
| cbz x24, 1f | |
| orr w0, w0, #0x80000000 // sf=1 | |
| 1: movz w9, #0x0B00, lsl #16 | |
| orr w0, w0, w9 // 01011 in bits 28:24 | |
| bl emit_inst | |
| b ei_done | |
| ei_addsub_imm: | |
| // immediate form: #expr or #:lo12:expr | |
| bl parse_hash_imm // x0=val, x2=is_lo12 | |
| mov x21, x0 // imm12 value | |
| // sf op 0 10001 shift imm12 Rn Rd | |
| ldr x10, [sp, #56] // Rn | |
| and w0, w21, #0xFFF | |
| lsl w0, w0, #10 // imm12 field | |
| orr w0, w0, w23 // Rd | |
| orr w0, w0, w10, lsl #5 // Rn | |
| orr w0, w0, w22, lsl #30 // op | |
| cbz x24, 1f | |
| orr w0, w0, #0x80000000 // sf=1 | |
| 1: movz w9, #0x1100, lsl #16 | |
| orr w0, w0, w9 // 10001 in bits 28:24 | |
| bl emit_inst | |
| b ei_done | |
| // cmp Rn, #imm / cmp Rn, Rm — alias for subs xzr, Rn, ... | |
| ei_cmp: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x23, x1 // sf from first reg | |
| str x0, [sp, #56] // Rn (into the "Rn" slot) | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #'#' | |
| b.eq ei_cmp_imm | |
| // register form: SUBS xzr, Rn, Rm | |
| bl parse_register | |
| mov x21, x0 // Rm | |
| ldr x10, [sp, #56] // Rn | |
| mov w0, #31 // Rd = xzr | |
| orr w0, w0, w10, lsl #5 | |
| orr w0, w0, w21, lsl #16 | |
| // sf 1 1 01011 00 0 Rm 000000 Rn Rd (S=1, op=1) | |
| movz w9, #0x6B00, lsl #16 | |
| orr w0, w0, w9 | |
| cbz x23, 1f | |
| orr w0, w0, #0x80000000 | |
| 1: bl emit_inst | |
| b ei_done | |
| ei_cmp_imm: | |
| // SUBS xzr, Rn, #imm | |
| bl parse_hash_imm | |
| and w21, w0, #0xFFF | |
| ldr x10, [sp, #56] | |
| mov w0, #31 // Rd = xzr | |
| orr w0, w0, w10, lsl #5 | |
| lsl w9, w21, #10 | |
| orr w0, w0, w9 | |
| // sf 1 1 10001 00 imm12 Rn 11111 (op=1, S=1) | |
| movz w9, #0x7100, lsl #16 | |
| orr w0, w0, w9 | |
| cbz x23, 1f | |
| orr w0, w0, #0x80000000 | |
| 1: bl emit_inst | |
| b ei_done | |
| // and/eor/orr — immediate (bitmask) or register | |
| ei_and: | |
| mov x22, #0 // opc=00 | |
| b ei_logical | |
| ei_eor: | |
| mov x22, #2 // opc=10 | |
| b ei_logical | |
| ei_orr: | |
| mov x22, #1 // opc=01 | |
| ei_logical: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x23, x0 // Rd | |
| mov x24, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register | |
| str x0, [sp, #56] // Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #'#' | |
| b.eq ei_logical_imm | |
| // register form: sf opc 01010 sh 0 Rm imm6 Rn Rd | |
| bl parse_register | |
| mov x21, x0 // Rm | |
| mov x0, x2 | |
| bl skip_ws | |
| mov w25, #0 // shift amount = 0 default | |
| ldrb w9, [x0] | |
| cmp w9, #',' | |
| b.ne ei_logical_reg_emit | |
| add x0, x0, #1 | |
| bl skip_ws | |
| add x0, x0, #3 // skip "lsl" | |
| bl skip_ws | |
| bl parse_hash_imm | |
| mov w25, w0 // shift amount | |
| ei_logical_reg_emit: | |
| ldr x10, [sp, #56] | |
| mov w0, w23 | |
| orr w0, w0, w10, lsl #5 | |
| orr w0, w0, w25, lsl #10 // imm6 (shift amount) | |
| orr w0, w0, w21, lsl #16 | |
| orr w0, w0, w22, lsl #29 | |
| movz w9, #0x0A00, lsl #16 | |
| orr w0, w0, w9 | |
| cbz x24, 1f | |
| orr w0, w0, #0x80000000 | |
| 1: bl emit_inst | |
| b ei_done | |
| ei_logical_imm: | |
| bl parse_hash_imm | |
| mov x21, x0 | |
| mov x0, x21 | |
| cbz x24, 1f | |
| mov x1, #0 | |
| b 2f | |
| 1: mov x1, #1 | |
| 2: bl encode_logical_imm | |
| cmp x0, #-1 | |
| b.eq ei_logical_bad | |
| // x0 = (N<<12)|(immr<<6)|imms | |
| mov x21, x0 | |
| ldr x10, [sp, #56] | |
| mov w0, w23 | |
| orr w0, w0, w10, lsl #5 | |
| orr w0, w0, w21, lsl #10 // N/immr/imms at bit 10 | |
| orr w0, w0, w22, lsl #29 | |
| movz w9, #0x1200, lsl #16 // 100100 in bits 28:23 | |
| orr w0, w0, w9 | |
| cbz x24, 1f | |
| orr w0, w0, #0x80000000 | |
| 1: bl emit_inst | |
| b ei_done | |
| ei_logical_bad: | |
| adrp x0, msg_badimm | |
| add x0, x0, :lo12:msg_badimm | |
| mov x1, #msg_badimm_len | |
| bl error_at | |
| // cbz/cbnz Rt, label — sf 011010 op imm19 Rt | |
| ei_cbz: | |
| mov x22, #0 | |
| b ei_cbz_common | |
| ei_cbnz: | |
| mov x22, #1 | |
| ei_cbz_common: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x23, x0 // Rt | |
| mov x24, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_label_ref | |
| ldr x9, [x28, #ST_TEXT_BASE] | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| add x9, x9, x10 | |
| sub x0, x0, x9 | |
| asr x0, x0, #2 | |
| and w0, w0, #0x7FFFF | |
| lsl w0, w0, #5 | |
| orr w0, w0, w23 | |
| orr w0, w0, w22, lsl #24 | |
| lsl w9, w24, #31 | |
| orr w0, w0, w9 | |
| movz w9, #0x3400, lsl #16 | |
| orr w0, w0, w9 | |
| bl emit_inst | |
| b ei_done | |
| // ldr/ldrb/str/strb — multiple addressing modes | |
| ei_ldr: | |
| mov x22, #1 // opc=01 (load) | |
| mov x24, #0 // not byte | |
| b ei_ldst | |
| ei_ldrb: | |
| mov x22, #1 | |
| mov x24, #1 // byte | |
| b ei_ldst | |
| ei_str: | |
| mov x22, #0 // opc=00 (store) | |
| mov x24, #0 | |
| b ei_ldst | |
| ei_strb: | |
| mov x22, #0 | |
| mov x24, #1 | |
| ei_ldst: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x23, x0 // Rt | |
| mov x21, x1 // sf (size for non-byte) | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| add x0, x0, #1 // skip '[' | |
| bl skip_ws | |
| bl parse_register // Rn | |
| str x0, [sp, #56] // save Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #']' | |
| b.eq ei_ldst_base_only | |
| cmp w9, #',' | |
| b.ne ei_ldst_err | |
| add x0, x0, #1 | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #'#' | |
| b.eq ei_ldst_uimm | |
| cmp w9, #':' | |
| b.eq ei_ldst_uimm | |
| // register offset: Rm [, lsl #N] | |
| bl parse_register | |
| str x0, [sp, #48] // save Rm | |
| mov x0, x2 | |
| bl skip_ws | |
| mov w10, #0 // S=0 | |
| ldrb w9, [x0] | |
| cmp w9, #']' | |
| b.eq ei_ldst_reg_emit | |
| add x0, x0, #1 // ',' | |
| bl skip_ws | |
| add x0, x0, #3 // "lsl" | |
| bl skip_ws | |
| bl parse_hash_imm | |
| cbnz x0, 1f | |
| b ei_ldst_reg_emit | |
| 1: mov w10, #1 // S=1 | |
| ei_ldst_reg_emit: | |
| ldr x9, [sp, #56] // Rn | |
| ldr x11, [sp, #48] // Rm | |
| mov w0, #0 | |
| cbnz x24, 2f | |
| cbz x21, 3f | |
| mov w0, #3 | |
| b 2f | |
| 3: mov w0, #2 | |
| 2: lsl w0, w0, #30 | |
| orr w0, w0, w23 | |
| orr w0, w0, w9, lsl #5 | |
| orr w0, w0, #0x800 // bits 11:10 = 10 (fixed) | |
| orr w0, w0, w10, lsl #12 // S bit | |
| mov w12, #3 | |
| orr w0, w0, w12, lsl #13 // option=011 | |
| orr w0, w0, w11, lsl #16 | |
| orr w0, w0, #0x00200000 // bit 21 | |
| orr w0, w0, w22, lsl #22 | |
| movz w9, #0x3800, lsl #16 | |
| orr w0, w0, w9 | |
| bl emit_inst | |
| b ei_done | |
| ei_ldst_base_only: | |
| add x0, x0, #1 // skip ']' | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #',' | |
| b.eq ei_ldst_post | |
| mov x0, #0 | |
| b ei_ldst_uimm_encode | |
| ei_ldst_uimm: | |
| bl parse_hash_imm | |
| ei_ldst_uimm_encode: | |
| mov x10, x0 | |
| mov w9, #0 | |
| mov w11, #0 | |
| cbnz x24, 1f | |
| cbz x21, 2f | |
| mov w9, #3 | |
| mov w11, #3 | |
| b 1f | |
| 2: mov w9, #2 | |
| mov w11, #2 | |
| 1: lsr x10, x10, x11 | |
| and w10, w10, #0xFFF | |
| ldr x12, [sp, #56] | |
| lsl w0, w9, #30 | |
| orr w0, w0, w23 | |
| orr w0, w0, w12, lsl #5 | |
| orr w0, w0, w10, lsl #10 | |
| orr w0, w0, w22, lsl #22 | |
| movz w9, #0x3900, lsl #16 | |
| orr w0, w0, w9 | |
| bl emit_inst | |
| b ei_done | |
| ei_ldst_post: | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_hash_imm | |
| and w10, w0, #0x1FF | |
| ldr x12, [sp, #56] | |
| mov w9, #0 | |
| cbnz x24, 1f | |
| cbz x21, 2f | |
| mov w9, #3 | |
| b 1f | |
| 2: mov w9, #2 | |
| 1: lsl w0, w9, #30 | |
| orr w0, w0, w23 | |
| orr w0, w0, w12, lsl #5 | |
| orr w0, w0, #0x00000400 // post-index bits | |
| orr w0, w0, w10, lsl #12 | |
| orr w0, w0, w22, lsl #22 | |
| movz w9, #0x3800, lsl #16 | |
| orr w0, w0, w9 | |
| bl emit_inst | |
| b ei_done | |
| ei_ldst_err: | |
| adrp x0, msg_syntax | |
| add x0, x0, :lo12:msg_syntax | |
| mov x1, #msg_syntax_len | |
| bl error_at | |
| // mul Rd, Rn, Rm — MADD Rd, Rn, Rm, XZR | |
| // 64-bit: 0x9B007C00 | (Rm<<16) | (Rn<<5) | Rd | |
| // 32-bit: 0x1B007C00 | ... | |
| ei_mul: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register | |
| mov x24, x0 // Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register // Rm | |
| movz w9, #0x7C00 | |
| movk w9, #0x1B00, lsl #16 // 32-bit base | |
| cbz x23, 1f | |
| movk w9, #0x9B00, lsl #16 // 64-bit base | |
| 1: orr w9, w9, w22 | |
| orr w9, w9, w24, lsl #5 | |
| orr w0, w9, w0, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| // udiv Rd, Rn, Rm | |
| // 64-bit: 0x9AC00800 | (Rm<<16) | (Rn<<5) | Rd | |
| // 32-bit: 0x1AC00800 | ... | |
| ei_udiv: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register | |
| mov x24, x0 // Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register // Rm | |
| movz w9, #0x0800 | |
| movk w9, #0x1AC0, lsl #16 | |
| cbz x23, 1f | |
| movk w9, #0x9AC0, lsl #16 | |
| 1: orr w9, w9, w22 | |
| orr w9, w9, w24, lsl #5 | |
| orr w0, w9, w0, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| // lsl/lsr — immediate (UBFM alias) or register (LSLV/LSRV) | |
| ei_ls_shift: | |
| ldrb w9, [x19, #2] // 'l' or 'r' | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register | |
| mov x24, x0 // Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| ldrb w10, [x0] | |
| cmp w10, #'#' | |
| b.eq ei_shift_imm | |
| // register form: LSLV/LSRV | |
| bl parse_register // Rm | |
| ldrb w9, [x19, #2] | |
| movz w10, #0x2000 | |
| cmp w9, #'r' | |
| b.ne 1f | |
| movz w10, #0x2400 | |
| 1: movk w10, #0x1AC0, lsl #16 | |
| cbz x23, 2f | |
| movk w10, #0x9AC0, lsl #16 | |
| 2: orr w10, w10, w22 | |
| orr w10, w10, w24, lsl #5 | |
| orr w0, w10, w0, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| ei_shift_imm: | |
| bl parse_hash_imm | |
| mov x21, x0 // shift amount | |
| ldrb w9, [x19, #2] | |
| cmp w9, #'r' | |
| b.eq ei_lsr_imm | |
| // LSL #n: UBFM Rd, Rn, #(-n mod size), #(size-1-n) | |
| cbz x23, 3f | |
| mov x9, #64 | |
| b 4f | |
| 3: mov x9, #32 | |
| 4: sub x11, x9, #1 // size-1 | |
| neg x10, x21 | |
| and x10, x10, x11 // immr = (-n) & (size-1) | |
| sub x11, x9, x21 | |
| sub x11, x11, #1 // imms = size-1-n | |
| b ei_ubfm_emit | |
| ei_lsr_imm: | |
| mov x10, x21 // immr = n | |
| cbz x23, 5f | |
| mov x11, #63 | |
| b ei_ubfm_emit | |
| 5: mov x11, #31 | |
| ei_ubfm_emit: | |
| movz w0, #0x0000 | |
| movk w0, #0x5300, lsl #16 | |
| cbz x23, 1f | |
| movk w0, #0xD340, lsl #16 | |
| 1: orr w0, w0, w22 | |
| orr w0, w0, w24, lsl #5 | |
| orr w0, w0, w11, lsl #10 | |
| orr w0, w0, w10, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| // asr Rd, Rn, #imm / asr Rd, Rn, Rm | |
| // immediate: SBFM Rd, Rn, #shift, #(size-1) | |
| // register: ASRV Rd, Rn, Rm | |
| ei_asr: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_register | |
| mov x24, x0 // Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| ldrb w10, [x0] | |
| cmp w10, #'#' | |
| b.eq ei_asr_imm | |
| // register form: ASRV | |
| bl parse_register // Rm | |
| movz w10, #0x2800 | |
| movk w10, #0x1AC0, lsl #16 | |
| cbz x23, 1f | |
| movk w10, #0x9AC0, lsl #16 | |
| 1: orr w10, w10, w22 | |
| orr w10, w10, w24, lsl #5 | |
| orr w0, w10, w0, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| ei_asr_imm: | |
| bl parse_hash_imm | |
| mov x10, x0 // immr = shift amount | |
| cbz x23, 1f | |
| mov x11, #63 | |
| b 2f | |
| 1: mov x11, #31 | |
| 2: // SBFM: sf 00 100110 N immr imms Rn Rd | |
| movz w0, #0x0000 | |
| movk w0, #0x1300, lsl #16 // 32-bit SBFM | |
| cbz x23, 3f | |
| movk w0, #0x9340, lsl #16 // 64-bit SBFM | |
| 3: orr w0, w0, w22 | |
| orr w0, w0, w24, lsl #5 | |
| orr w0, w0, w11, lsl #10 // imms | |
| orr w0, w0, w10, lsl #16 // immr | |
| bl emit_inst | |
| b ei_done | |
| // bic Rd, Rn, Rm — AND Rd, Rn, ~Rm | |
| // sf 00 01010 sh 1 Rm imm6 Rn Rd | |
| ei_bic: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_register | |
| mov x24, x0 // Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_register // Rm | |
| movz w10, #0x0000 | |
| movk w10, #0x0A20, lsl #16 // 32-bit BIC | |
| cbz x23, 1f | |
| movk w10, #0x8A20, lsl #16 // 64-bit BIC | |
| 1: orr w10, w10, w22 | |
| orr w10, w10, w24, lsl #5 | |
| orr w0, w10, w0, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| // mov — multiple forms | |
| ei_mov: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| ldrb w9, [x0] | |
| cmp w9, #'#' | |
| b.eq ei_mov_imm | |
| // register form | |
| mov x21, x0 // save source reg text ptr | |
| bl parse_register | |
| mov x24, x0 // Rm number | |
| // if either reg is 31, use ADD Rd, Rn, #0 (handles SP) | |
| cmp x22, #31 | |
| b.eq ei_mov_add | |
| cmp x24, #31 | |
| b.eq ei_mov_add | |
| // ORR Rd, XZR, Rm | |
| movz w0, #0x03E0 | |
| movk w0, #0x2A00, lsl #16 | |
| cbz x23, 1f | |
| movk w0, #0xAA00, lsl #16 | |
| 1: orr w0, w0, w22 | |
| orr w0, w0, w24, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| ei_mov_add: | |
| // ADD Rd, Rn, #0 | |
| mov w0, w22 | |
| orr w0, w0, w24, lsl #5 | |
| movz w9, #0x1100, lsl #16 | |
| orr w0, w0, w9 | |
| cbz x23, 1f | |
| orr w0, w0, #0x80000000 | |
| 1: bl emit_inst | |
| b ei_done | |
| ei_mov_imm: | |
| bl parse_hash_imm | |
| mov x24, x0 | |
| // Try MOVZ with hw=0,1,2,3 — check each 16-bit chunk | |
| mov x25, #0 // hw shift counter | |
| ei_mov_hw_loop: | |
| lsr x9, x24, x25 | |
| and x9, x9, #0xFFFF | |
| // rebuild without this chunk | |
| movz x10, #0xFFFF | |
| lsl x10, x10, x25 | |
| bic x11, x24, x10 // value with this chunk zeroed | |
| cbz x11, ei_mov_found_hw // all other bits zero? found our hw | |
| add x25, x25, #16 | |
| cmp x25, #64 | |
| b.lt ei_mov_hw_loop | |
| // try MOVN: ~value in single hw | |
| mvn x24, x24 | |
| mov x25, #0 | |
| ei_movn_hw_loop: | |
| lsr x9, x24, x25 | |
| and x9, x9, #0xFFFF | |
| movz x10, #0xFFFF | |
| lsl x10, x10, x25 | |
| bic x11, x24, x10 | |
| cbz x11, ei_mov_found_movn | |
| add x25, x25, #16 | |
| cmp x25, #64 | |
| b.lt ei_movn_hw_loop | |
| adrp x0, msg_badimm | |
| add x0, x0, :lo12:msg_badimm | |
| mov x1, #msg_badimm_len | |
| bl error_at | |
| ei_mov_found_hw: | |
| // x9 = imm16, x25 = shift amount (0/16/32/48) | |
| lsl w9, w9, #5 // imm16 field | |
| lsr w10, w25, #4 // hw = shift/16 | |
| lsl w10, w10, #21 // hw field position | |
| movz w0, #0x0000 | |
| movk w0, #0x5280, lsl #16 // 32-bit MOVZ | |
| cbz x23, 1f | |
| movk w0, #0xD280, lsl #16 // 64-bit MOVZ | |
| 1: orr w0, w0, w22 | |
| orr w0, w0, w9 | |
| orr w0, w0, w10 | |
| bl emit_inst | |
| b ei_done | |
| ei_mov_found_movn: | |
| // x9 = imm16 of ~value, x25 = shift | |
| lsl w9, w9, #5 | |
| lsr w10, w25, #4 | |
| lsl w10, w10, #21 | |
| movz w0, #0x0000 | |
| movk w0, #0x1280, lsl #16 // 32-bit MOVN | |
| cbz x23, 1f | |
| movk w0, #0x9280, lsl #16 // 64-bit MOVN | |
| 1: orr w0, w0, w22 | |
| orr w0, w0, w9 | |
| orr w0, w0, w10 | |
| bl emit_inst | |
| b ei_done | |
| // movz/movn/movk Rd, #imm16 [, lsl #N] | |
| // MOVZ: sf 10 100101 hw imm16 Rd (0x52800000 / 0xD2800000) | |
| // MOVN: sf 00 100101 hw imm16 Rd (0x12800000 / 0x92800000) | |
| // MOVK: sf 11 100101 hw imm16 Rd (0x72800000 / 0xF2800000) | |
| ei_movz: | |
| movz w22, #0x0000 | |
| movk w22, #0x5280, lsl #16 // 32-bit MOVZ base | |
| movz w10, #0x0000 | |
| movk w10, #0xD280, lsl #16 // 64-bit MOVZ base | |
| b ei_movwide | |
| ei_movn: | |
| movz w22, #0x0000 | |
| movk w22, #0x1280, lsl #16 | |
| movz w10, #0x0000 | |
| movk w10, #0x9280, lsl #16 | |
| b ei_movwide | |
| ei_movk: | |
| movz w22, #0x0000 | |
| movk w22, #0x7280, lsl #16 | |
| movz w10, #0x0000 | |
| movk w10, #0xF280, lsl #16 | |
| ei_movwide: | |
| str w10, [sp, #56] // save 64-bit base | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x23, x0 // Rd | |
| mov x24, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_hash_imm // #imm16 | |
| and w25, w0, #0xFFFF // imm16 (callee-saved) | |
| mov x0, x1 // ptr past imm | |
| // check for optional ", lsl #N" | |
| bl skip_ws | |
| mov w10, #0 // hw = 0 default | |
| ldrb w11, [x0] | |
| cmp w11, #',' | |
| b.ne ei_movwide_emit | |
| add x0, x0, #1 | |
| bl skip_ws | |
| add x0, x0, #3 // skip "lsl" | |
| bl skip_ws | |
| bl parse_hash_imm // #shift (0/16/32/48) | |
| lsr w10, w0, #4 // hw = shift / 16 | |
| ei_movwide_emit: | |
| cbz x24, 1f | |
| ldr w22, [sp, #56] // use 64-bit base | |
| 1: orr w0, w22, w23 // | Rd | |
| orr w0, w0, w25, lsl #5 // | imm16 | |
| orr w0, w0, w10, lsl #21 // | hw | |
| bl emit_inst | |
| b ei_done | |
| // tbz/tbnz Rt, #bit, label — b5 011011 op b40 imm14 Rt | |
| ei_tbz: | |
| mov x22, #0 | |
| b ei_tbz_common | |
| ei_tbnz: | |
| mov x22, #1 | |
| ei_tbz_common: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x23, x0 // Rt | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_hash_imm | |
| mov x24, x0 // bit number | |
| mov x0, x1 | |
| bl skip_ws | |
| add x0, x0, #1 // skip ',' | |
| bl skip_ws | |
| bl parse_label_ref | |
| ldr x9, [x28, #ST_TEXT_BASE] | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| add x9, x9, x10 | |
| sub x0, x0, x9 | |
| asr x0, x0, #2 | |
| and w0, w0, #0x3FFF | |
| lsl w0, w0, #5 | |
| orr w0, w0, w23 | |
| and w9, w24, #0x1F | |
| orr w0, w0, w9, lsl #19 | |
| lsr w9, w24, #5 | |
| orr w0, w0, w9, lsl #31 | |
| orr w0, w0, w22, lsl #24 | |
| movz w9, #0x3600, lsl #16 | |
| orr w0, w0, w9 | |
| bl emit_inst | |
| b ei_done | |
| // neg Rd, Rm — alias for sub Rd, xzr, Rm | |
| // 64-bit: 0xCB0003E0 | (Rm<<16) | Rd, 32-bit: 0x4B0003E0 | (Rm<<16) | Rd | |
| ei_neg: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register | |
| mov x24, x0 // Rm | |
| movz w0, #0x03E0 | |
| movk w0, #0x4B00, lsl #16 // 32-bit base | |
| cbz x23, 1f | |
| movz w0, #0x03E0 | |
| movk w0, #0xCB00, lsl #16 // 64-bit base | |
| 1: orr w0, w0, w22 | |
| orr w0, w0, w24, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| // mvn Rd, Rm — alias for orn Rd, xzr, Rm | |
| // 64-bit: 0xAA2003E0 | (Rm<<16) | Rd, 32-bit: 0x2A2003E0 | (Rm<<16) | Rd | |
| ei_mvn: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register | |
| mov x24, x0 // Rm | |
| movz w0, #0x03E0 | |
| movk w0, #0x2A20, lsl #16 // 32-bit base | |
| cbz x23, 1f | |
| movz w0, #0x03E0 | |
| movk w0, #0xAA20, lsl #16 // 64-bit base | |
| 1: orr w0, w0, w22 | |
| orr w0, w0, w24, lsl #16 | |
| bl emit_inst | |
| b ei_done | |
| // ubfx Rd, Rn, #lsb, #width — UBFM Rd, Rn, #lsb, #(lsb+width-1) | |
| // 64-bit: 0xD3400000 | (immr<<16) | (imms<<10) | (Rn<<5) | Rd | |
| // 32-bit: 0x53000000 | ... | |
| ei_ubfx: | |
| mov x0, x21 | |
| bl skip_ws | |
| bl parse_register | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_register // Rn | |
| mov x24, x0 // Rn | |
| mov x0, x2 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_hash_imm // #lsb | |
| str x0, [sp, #56] // save lsb (using unused frame slot) | |
| mov x0, x1 | |
| bl skip_ws | |
| add x0, x0, #1 | |
| bl skip_ws | |
| bl parse_hash_imm // #width | |
| ldr x9, [sp, #56] // lsb | |
| add x10, x9, x0 | |
| sub x10, x10, #1 // imms = lsb + width - 1 | |
| // immr = lsb, imms = lsb+width-1 | |
| movz w0, #0x0000 | |
| movk w0, #0x5300, lsl #16 // 32-bit base | |
| cbz x23, 1f | |
| movz w0, #0x0000 | |
| movk w0, #0xD340, lsl #16 // 64-bit base (N=1) | |
| 1: orr w0, w0, w22 | |
| orr w0, w0, w24, lsl #5 | |
| orr w0, w0, w10, lsl #10 // imms | |
| orr w0, w0, w9, lsl #16 // immr | |
| bl emit_inst | |
| b ei_done | |
| ei_done: | |
| ldr x30, [sp] | |
| ldr x19, [sp, #8] | |
| ldr x20, [sp, #16] | |
| ldr x21, [sp, #24] | |
| ldr x22, [sp, #32] | |
| ldr x23, [sp, #40] | |
| ldr x24, [sp, #48] | |
| add sp, sp, #64 | |
| ret |