Skip to content

Instantly share code, notes, and snippets.

@lydonchandra
Last active January 1, 2016 08:29
Show Gist options
  • Save lydonchandra/8118040 to your computer and use it in GitHub Desktop.
Save lydonchandra/8118040 to your computer and use it in GitHub Desktop.
parameter passing pointer and __m256
void test_parameterPassing_pointer( double *d1p,
__m256 *m256_1p, __m128 *m128_1p,
__m256 m256_1) {
double mul1 = (*d1p) + (*d1p);
double *m256_dp = ((double*)m256_1p);
double *m128_dp = ((double*)m128_1p);
double mul2 = m256_dp[0] * mul1 - m128_dp[1];
double div1 = ((double*)&m256_1)[2] / mul2;
printf("mul2=%f\n", div1);
}
In OSX/Linux x64 and using gcc, all parameters are passed using registers.
Pointer parameters are treated like normal parameters and hence passed using regular %rdi, %rsi, %rdx, %rcx, %r8, %r9
__m256, which is 32bytes in size, is passed through %ymm register, as can be seen below:
_test_parameterPassing_pointer: ## @test_parameterPassing_pointer
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp24:
.cfi_def_cfa_offset 16
Ltmp25:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp26:
.cfi_def_cfa_register %rbp
vmovsd (%rdi), %xmm1
vaddsd %xmm1, %xmm1, %xmm1
vmulsd (%rsi), %xmm1, %xmm1
vsubsd 8(%rdx), %xmm1, %xmm1
vextractf128 $1, %ymm0, %xmm0 # <<---- our __m256 param
//xmm0 == ymm0[128:255]
vpshufd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0,0,0]
vinsertps $16, %xmm2, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
//$16 == 0001 0000
//source 00 = 0, %xmm2[31:0]
//dest 01 = 1, %xmm0[63:32]
//mask 0000
//xmm0 == xmm0[127:96], xmm0[95:64], xmm2[31:0], xmm0[31:0]
vdivsd %xmm1, %xmm0, %xmm0
leaq L_.str5(%rip), %rdi
movb $1, %al
popq %rbp
vzeroupper
jmp _printf ## TAILCALL
In Windows x64, the standard __fastcall applies, which means only up to 4 parameters are passed using registers.
Our last param, m256_1 above, is passed using reference, not value (because it is more than 64 bits long)
test_parameterPassing_pointer PROC
; parameter 1(d1p): rcx
; parameter 2(m256_1p): rdx
; parameter 3(m128_1p): r8
; parameter 4(m256_1): [r9]
.B3.1:: ; Preds .B3.0
;;; void test_parameterPassing_pointer(double *d1p, __m256 *m256_1p, __m128 *m128_1p, __m256 m256_1) {
$LN290:
00000 48 83 ec 28 sub rsp, 40 ; main.c:148.98
$LN291:
;;;
;;; double mul1 = (*d1p) + (*d1p);
00004 f2 0f 10 01 movsd xmm0, QWORD PTR [rcx] ; main.c:150.18
$LN292:
;;; double *m256_dp = ((double*)m256_1p);
;;; double *m128_dp = ((double*)m128_1p);
;;; double mul2 = m256_dp[0] * mul1 - m128_dp[1];
;;; double div1 = ((double*)&m256_1)[2] / mul2;
;;; printf("mul2=%f\n", div1);
00008 48 8d 0d 00 00
00 00 lea rcx, QWORD PTR [??_C@_08A@mul2?$DN?$CFf?6?$AA@] ; main.c:155.2
$LN293:
0000f f2 41 0f 10 49
10 movsd xmm1, QWORD PTR [16+r9] ; main.c:155.2 <<----- our __m256 param
$LN294:
00015 f2 0f 58 c0 addsd xmm0, xmm0 ; main.c:155.2
$LN295:
00019 f2 0f 59 02 mulsd xmm0, QWORD PTR [rdx] ; main.c:155.2
$LN296:
0001d 4c 89 4c 24 48 mov QWORD PTR [72+rsp], r9 ; main.c:148.98
$LN297:
00022 f2 41 0f 5c 40
08 subsd xmm0, QWORD PTR [8+r8] ; main.c:155.2
$LN298:
00028 f2 0f 5e c8 divsd xmm1, xmm0 ; main.c:155.2
$LN299:
0002c 66 48 0f 7e ca movd rdx, xmm1 ; main.c:155.2
$LN300:
00031 ff 15 fc ff ff
ff call QWORD PTR [__imp_printf] ; main.c:155.2
$LN301:
; LOE rbx rbp rsi rdi r12 r13 r14 r15 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15
.B3.2:: ; Preds .B3.1
;;; }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment