Page translation cache (TLB) in dynasm

Simple JIT compiler to create a fixed-size page translation cache. Generates simple linear search code without branches. On small tables it costs about 16 cycles including the overhead of looping and calling from Lua.

Cache table layout:

struct tlb_entry { int64_t page, offset; }

JIT compiler source:

void tlb_emit(Dst_DECL, int size, int pagebits)
{
  int i;
  // Preconditions
  //   Arg 0 (rdi): Table address
  //   Arg 1 (rsi): Address
  // R9 = page
  | mov r9, rsi
  | shr r9, pagebits // r9 = address >> pagebits
  // RAX = result (or 0)
  | xor rax, rax
  for (i = 0; i < size; i++) {
    // if table[i].page == r9 then rax = table[i].offset end
    | cmp r9, qword[rdi + (i * 16)]
    | cmove rax, qword[rdi + (i * 16) + 8]
  }
  // If rax contains an offset then add the base address
  //   rax = (rax == 0) ? rax : rax + address
  | xor rcx, rcx
  | test rax, rax
  | cmovz rsi, rcx  // if rax == 0 then rsi = 0 end
  | add rax, rsi    // rax = address + offset
  | ret
}

Generated code for an 8-entry cache for 2MB page size. (Lookup costs approx. 16 cycles when called from Lua.)

00000000  4989F1            mov r9,rsi
00000003  49C1E915          shr r9,byte 0x15
00000007  4831C0            xor rax,rax
0000000A  4C3B0F            cmp r9,[rdi]
0000000D  480F444708        cmovz rax,[rdi+0x8]
00000012  4C3B4F10          cmp r9,[rdi+0x10]
00000016  480F444718        cmovz rax,[rdi+0x18]
0000001B  4C3B4F20          cmp r9,[rdi+0x20]
0000001F  480F444728        cmovz rax,[rdi+0x28]
00000024  4C3B4F30          cmp r9,[rdi+0x30]
00000028  480F444738        cmovz rax,[rdi+0x38]
0000002D  4C3B4F40          cmp r9,[rdi+0x40]
00000031  480F444748        cmovz rax,[rdi+0x48]
00000036  4C3B4F50          cmp r9,[rdi+0x50]
0000003A  480F444758        cmovz rax,[rdi+0x58]
0000003F  4C3B4F60          cmp r9,[rdi+0x60]
00000043  480F444768        cmovz rax,[rdi+0x68]
00000048  4C3B4F70          cmp r9,[rdi+0x70]
0000004C  480F444778        cmovz rax,[rdi+0x78]
00000051  4831C9            xor rcx,rcx
00000054  4885C0            test rax,rax
00000057  480F44F1          cmovz rsi,rcx
0000005B  4801F0            add rax,rsi
0000005E  C3                ret

lukego/gist:aebdb14845e52ee8d3a4

pkhuong commented Jun 16, 2014

Uh oh!

lukego commented Jun 17, 2014

Uh oh!

pkhuong commented Jun 17, 2014

Uh oh!