Last active
January 2, 2020 18:20
-
-
Save zeux/fc1d6ff696c97ec258891be6572040d6 to your computer and use it in GitHub Desktop.
luaujit: nbody.lua when compiled using experimental Luau JIT engine. All assembly snippets only show inner loop body. Variants: scalar - using type info, records and basic block compiler to generate much more efficient inner loop; vector - scalar but with scalars replaced with first-class 3-component vector
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# table type guard (memory safety) | |
cmp dword ptr [rdi + 12], 6 | |
jne 1072 <.text+0x5ea> | |
# load array index and convert to integer (+ exactness check) | |
movsd xmm0, qword ptr [rdi + 256] | |
cvttsd2si eax, xmm0 | |
cvtsi2sd xmm1, eax | |
ucomisd xmm1, xmm0 | |
jne 1046 <.text+0x5ea> | |
# indices are 1-based; could remove this one with runtime changes | |
dec eax | |
# load table and do a bounds check on the lookup | |
mov rcx, qword ptr [rdi] | |
mov rdx, qword ptr [rcx + 32] | |
cmp dword ptr [rcx + 20], eax | |
jbe 1028 <.text+0x5ea> | |
# actual array lookup | |
shl eax, 4 | |
movups xmm0, xmmword ptr [rdx + rax] | |
movups xmmword ptr [rdi + 272], xmm0 | |
# record type guard (memory safety) | |
cmp dword ptr [rdi + 284], 10 | |
jne 1030 <.text+0x607> | |
# check that record has enough fields (memory safety) | |
mov rcx, qword ptr [rdi + 272] | |
cmp dword ptr [rcx + 12], 6 | |
jle 1013 <.text+0x607> | |
# basic block compiler, load phase (loads from records and stack) | |
vmovsd xmm12, qword ptr [rdi + 176] | |
vmovsd xmm13, qword ptr [rdi + 192] | |
vmovsd xmm14, qword ptr [rdi + 208] | |
vmovsd xmm1, qword ptr [rcx + 96] | |
vmovsd xmm2, qword ptr [rcx + 112] | |
vmovsd xmm3, qword ptr [rcx + 128] | |
vmovsd xmm15, qword ptr [rcx + 32] | |
vmovsd xmm11, qword ptr [rcx + 48] | |
vmovsd xmm8, qword ptr [rcx + 64] | |
vmovsd xmm9, qword ptr [rcx + 80] | |
vmovsd xmm0, qword ptr [rdi + 112] | |
# basic block compiler, arith phase (mostly devoid of memory access) | |
vsubsd xmm4, xmm0, xmm1 | |
vmovsd xmm0, qword ptr [rdi + 128] | |
vsubsd xmm5, xmm0, xmm2 | |
vmovsd xmm0, qword ptr [rdi + 144] | |
vsubsd xmm6, xmm0, xmm3 | |
vmulsd xmm2, xmm4, xmm4 | |
vmulsd xmm1, xmm5, xmm5 | |
vaddsd xmm3, xmm2, xmm1 | |
vmulsd xmm2, xmm6, xmm6 | |
vaddsd xmm7, xmm3, xmm2 | |
vsqrtsd xmm10, xmm10, xmm7 | |
vmulsd xmm2, xmm10, xmm10 | |
vmulsd xmm3, xmm2, xmm10 | |
vmovsd xmm0, qword ptr [rdi + 32] | |
vdivsd xmm7, xmm0, xmm3 | |
vmulsd xmm3, xmm7, qword ptr [rdi + 160] | |
vmulsd xmm2, xmm15, xmm7 | |
vmulsd xmm1, xmm4, xmm2 | |
vsubsd xmm12, xmm12, xmm1 | |
vmulsd xmm1, xmm5, xmm2 | |
vsubsd xmm13, xmm13, xmm1 | |
vmulsd xmm1, xmm6, xmm2 | |
vsubsd xmm14, xmm14, xmm1 | |
vmulsd xmm1, xmm4, xmm3 | |
vaddsd xmm11, xmm11, xmm1 | |
vmulsd xmm1, xmm5, xmm3 | |
vaddsd xmm8, xmm8, xmm1 | |
vmulsd xmm1, xmm6, xmm3 | |
vaddsd xmm9, xmm9, xmm1 | |
# basic block compiler, store phase (note, stores type tags for memory safety) | |
vmovsd qword ptr [rdi + 176], xmm12 | |
mov dword ptr [rdi + 188], 3 | |
vmovsd qword ptr [rdi + 192], xmm13 | |
mov dword ptr [rdi + 204], 3 | |
vmovsd qword ptr [rdi + 208], xmm14 | |
mov dword ptr [rdi + 220], 3 | |
vmovsd qword ptr [rcx + 48], xmm11 | |
mov dword ptr [rcx + 60], 3 | |
vmovsd qword ptr [rcx + 64], xmm8 | |
mov dword ptr [rcx + 76], 3 | |
vmovsd qword ptr [rcx + 80], xmm9 | |
mov dword ptr [rcx + 92], 3 | |
# loop interrupt check, necessary to solve halting problem | |
mov rax, qword ptr [rbx + 32] | |
mov rax, qword ptr [rax + 176] | |
test rax, rax | |
jne 860 <.text+0x6a2> | |
# loop back edge | |
movsd xmm0, qword ptr [rdi + 256] | |
movsd xmm1, qword ptr [rdi + 224] | |
addsd xmm0, qword ptr [rdi + 240] | |
movsd qword ptr [rdi + 256], xmm0 | |
ucomisd xmm1, xmm0 | |
jae -448 <.text+0x1b0> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# table type guard (memory safety) | |
cmp dword ptr [rdi + 12], 6 | |
jne 802 <.text+0x49c> | |
# load array index and convert to integer (+ exactness check) | |
movsd xmm0, qword ptr [rdi + 192] | |
cvttsd2si eax, xmm0 | |
cvtsi2sd xmm1, eax | |
ucomisd xmm1, xmm0 | |
jne 776 <.text+0x49c> | |
# indices are 1-based; could remove this one with runtime changes | |
dec eax | |
# load table and do a bounds check on the lookup | |
mov rcx, qword ptr [rdi] | |
mov rdx, qword ptr [rcx + 32] | |
cmp dword ptr [rcx + 20], eax | |
jbe 758 <.text+0x49c> | |
# actual array lookup | |
shl eax, 4 | |
movups xmm0, xmmword ptr [rdx + rax] | |
movups xmmword ptr [rdi + 208], xmm0 | |
# record type guard (memory safety) | |
cmp dword ptr [rdi + 220], 10 | |
jne 760 <.text+0x4b9> | |
# check that record has enough fields (memory safety) | |
mov rcx, qword ptr [rdi + 208] | |
cmp dword ptr [rcx + 12], 2 | |
jle 743 <.text+0x4b9> | |
# basic block compiler, load phase (loads from records and stack) | |
vmovups xmm8, xmmword ptr [rdi + 128] | |
vmovups xmm2, xmmword ptr [rcx + 48] | |
vmovups xmm3, xmmword ptr [rcx + 64] | |
vmovsd xmm7, qword ptr [rcx + 32] | |
# basic block compiler, arith phase (mostly devoid of memory access) | |
vmovups xmm0, xmmword ptr [rdi + 112] | |
vsubps xmm1, xmm0, xmm2 | |
vdpps xmm0, xmm1, xmm1, 119 | |
vcvtss2sd xmm2, xmm0, xmm0 | |
vsqrtsd xmm4, xmm4, xmm2 | |
vmulsd xmm6, xmm4, xmm4 | |
vmulsd xmm5, xmm6, xmm4 | |
vmovsd xmm0, qword ptr [rdi + 32] | |
vdivsd xmm2, xmm0, xmm5 | |
vmulsd xmm5, xmm2, qword ptr [rdi + 144] | |
vmulsd xmm6, xmm7, xmm2 | |
vcvtsd2ss xmm0, xmm0, xmm6 | |
vshufps xmm0, xmm0, xmm0, 0 | |
vmulps xmm2, xmm0, xmm1 | |
vsubps xmm8, xmm8, xmm2 | |
vcvtsd2ss xmm0, xmm0, xmm5 | |
vshufps xmm0, xmm0, xmm0, 0 | |
vmulps xmm2, xmm0, xmm1 | |
vaddps xmm3, xmm3, xmm2 | |
# basic block compiler, store phase (note, stores type tags for memory safety) | |
vmovups xmmword ptr [rdi + 128], xmm8 | |
mov dword ptr [rdi + 140], 4 | |
vmovups xmmword ptr [rcx + 64], xmm3 | |
mov dword ptr [rcx + 76], 4 | |
# loop interrupt check, necessary to solve halting problem | |
mov rax, qword ptr [rbx + 32] | |
mov rax, qword ptr [rax + 176] | |
test rax, rax | |
jne 699 <.text+0x52c> | |
# loop back edge | |
movsd xmm0, qword ptr [rdi + 192] | |
movsd xmm1, qword ptr [rdi + 160] | |
addsd xmm0, qword ptr [rdi + 176] | |
movsd qword ptr [rdi + 192], xmm0 | |
ucomisd xmm1, xmm0 | |
jae -299 <.text+0x170> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment