Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save define-private-public/07945570cbfe49b98bd88f0f8ea66e19 to your computer and use it in GitHub Desktop.
Save define-private-public/07945570cbfe49b98bd88f0f8ea66e19 to your computer and use it in GitHub Desktop.
PSRT Rejection vs. Analytical sampling
RNG::analytical_in_unit_disk(): ; 49 instructions
push rbp
mov rbp, rsp
sub rsp, 64
mov QWORD PTR [rbp-56], rdi
movsd xmm0, QWORD PTR .LC1[rip]
mov rax, QWORD PTR [rbp-56]
movapd xmm1, xmm0
mov rdx, QWORD PTR .LC2[rip]
movq xmm0, rdx
mov rdi, rax
call RNG::num(double, double) ; 19 instructions
movq rax, xmm0
movq xmm0, rax
call sqrt
movq rax, xmm0
mov QWORD PTR [rbp-8], rax
movsd xmm0, QWORD PTR .LC3[rip]
mov rax, QWORD PTR [rbp-56]
movapd xmm1, xmm0
mov rdx, QWORD PTR .LC2[rip]
movq xmm0, rdx
mov rdi, rax
call RNG::num(double, double) ; 19 instructions
movq rax, xmm0
mov QWORD PTR [rbp-16], rax
mov rax, QWORD PTR [rbp-16]
movq xmm0, rax
call cos
movsd xmm1, QWORD PTR [rbp-8]
mulsd xmm0, xmm1
movsd QWORD PTR [rbp-24], xmm0
mov rax, QWORD PTR [rbp-16]
movq xmm0, rax
call sin
movsd xmm1, QWORD PTR [rbp-8]
mulsd xmm0, xmm1
movsd QWORD PTR [rbp-32], xmm0
movsd xmm0, QWORD PTR [rbp-32]
mov rdx, QWORD PTR [rbp-24]
lea rax, [rbp-48]
movapd xmm1, xmm0
movq xmm0, rdx
mov rdi, rax
call Vec2::Vec2(double, double) [complete object constructor] ; 14 instructions
mov rax, QWORD PTR [rbp-48]
mov rdx, QWORD PTR [rbp-40]
movq xmm0, rax
movq xmm1, rdx
leave
ret
.L44:
divsd xmm6, QWORD PTR [rsp+8]
comisd xmm6, QWORD PTR .LC1[rip]
jnb .L30
pxor xmm0, xmm0
addsd xmm6, xmm0
ucomisd xmm0, xmm6
ja .L42
sqrtsd xmm6, xmm6
.L16:
mov rax, QWORD PTR .LC1[rip]
mov ecx, 2
pxor xmm7, xmm7
mov QWORD PTR [rsp+8], rax
cmp r12, 624
je .L17
.L46:
mov rdx, QWORD PTR [rsp+32+r12*8]
add r12, 1
.L18:
mov rax, rdx
shr rax, 11
mov eax, eax
xor rax, rdx
mov rdx, rax
sal rdx, 7
and edx, 2636928640
xor rax, rdx
mov rdx, rax
sal rdx, 15
and edx, 4022730752
xor rax, rdx
mov rdx, rax
shr rdx, 18
xor rax, rdx
js .L21
pxor xmm0, xmm0
cvtsi2sd xmm0, rax
.L22:
mulsd xmm0, QWORD PTR [rsp+8]
fld DWORD PTR .LC7[rip]
fmul QWORD PTR [rsp+8]
addsd xmm7, xmm0
fstp QWORD PTR [rsp+8]
cmp ecx, 1
jne .L31
divsd xmm7, QWORD PTR [rsp+8]
comisd xmm7, QWORD PTR .LC1[rip]
jnb .L32
pxor xmm0, xmm0
lea rdi, [rsp+24]
lea rsi, [rsp+16]
movsd QWORD PTR [rsp+8], xmm6
addsd xmm7, xmm0
mulsd xmm7, QWORD PTR .LC8[rip]
addsd xmm0, xmm7
call sincos
movsd xmm6, QWORD PTR [rsp+8]
movsd xmm0, QWORD PTR [rsp+16]
movsd xmm1, QWORD PTR [rsp+24]
mulsd xmm0, xmm6
RNG::rejection_in_unit_disk(): ; 41 instructions
push rbp
mov rbp, rsp
push rbx
sub rsp, 40
mov QWORD PTR [rbp-40], rdi
.L12:
movsd xmm0, QWORD PTR .LC1[rip]
mov rdx, QWORD PTR .LC3[rip]
mov rax, QWORD PTR [rbp-40]
movapd xmm1, xmm0
movq xmm0, rdx
mov rdi, rax
call RNG::num(double, double) ; 19 instructions
movq rbx, xmm0
movsd xmm0, QWORD PTR .LC1[rip]
mov rdx, QWORD PTR .LC3[rip]
mov rax, QWORD PTR [rbp-40]
movapd xmm1, xmm0
movq xmm0, rdx
mov rdi, rax
call RNG::num(double, double) ; 19 instructions
movq rax, xmm0
lea rdx, [rbp-32]
movq xmm1, rbx
movq xmm0, rax
mov rdi, rdx
call Vec2::Vec2(double, double) [complete object constructor] ; 14 instructions
lea rax, [rbp-32]
mov rdi, rax
call Vec2::length_squared() const ; 16 instructions
movsd xmm1, QWORD PTR .LC1[rip]
comisd xmm1, xmm0
seta al
test al, al
je .L12 ; Possible jump to repeat the above (29 instructions)
mov rax, QWORD PTR [rbp-32]
mov rdx, QWORD PTR [rbp-24]
movq xmm0, rax
movq xmm1, rdx
mov rbx, QWORD PTR [rbp-8]
leave
ret
.L16:
movdqa xmm1, XMMWORD PTR [rax+8]
movdqu xmm0, XMMWORD PTR [rax]
add rax, 16
pand xmm0, xmm5
pand xmm1, xmm3
por xmm1, xmm0
movdqa xmm2, xmm1
pand xmm1, xmm4
movdqa xmm0, xmm1
psrlq xmm2, 1
pxor xmm2, XMMWORD PTR [rax-1832]
psllq xmm0, 3
paddq xmm0, xmm1
psllq xmm0, 9
paddq xmm0, xmm1
psllq xmm0, 5
paddq xmm0, xmm1
psllq xmm0, 2
psubq xmm0, xmm1
psllq xmm0, 3
psubq xmm0, xmm1
movdqa xmm10, xmm0
psllq xmm10, 4
paddq xmm0, xmm10
psllq xmm0, 5
psubq xmm0, xmm1
pxor xmm2, xmm0
movups XMMWORD PTR [rax-16], xmm2
cmp rax, rsi
jne .L16
mov rsi, QWORD PTR [rsp+16]
mov rax, QWORD PTR [rsp+5000]
mov r12d, 1
mov rdi, rsi
and rax, -2147483648
and edi, 2147483647
or rax, rdi
mov rdi, rax
and eax, 1
neg rax
shr rdi
xor rdi, QWORD PTR [rsp+3184]
and eax, 2567483615
xor rax, rdi
mov QWORD PTR [rsp+5000], rax
jmp .L14
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment