Last active
January 1, 2016 08:29
-
-
Save lydonchandra/8118040 to your computer and use it in GitHub Desktop.
parameter passing pointer and __m256
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void test_parameterPassing_pointer( double *d1p, | |
__m256 *m256_1p, __m128 *m128_1p, | |
__m256 m256_1) { | |
double mul1 = (*d1p) + (*d1p); | |
double *m256_dp = ((double*)m256_1p); | |
double *m128_dp = ((double*)m128_1p); | |
double mul2 = m256_dp[0] * mul1 - m128_dp[1]; | |
double div1 = ((double*)&m256_1)[2] / mul2; | |
printf("mul2=%f\n", div1); | |
} | |
In OSX/Linux x64 and using gcc, all parameters are passed using registers. | |
Pointer parameters are treated like normal parameters and hence passed using regular %rdi, %rsi, %rdx, %rcx, %r8, %r9 | |
__m256, which is 32bytes in size, is passed through %ymm register, as can be seen below: | |
_test_parameterPassing_pointer: ## @test_parameterPassing_pointer | |
.cfi_startproc | |
## BB#0: | |
pushq %rbp | |
Ltmp24: | |
.cfi_def_cfa_offset 16 | |
Ltmp25: | |
.cfi_offset %rbp, -16 | |
movq %rsp, %rbp | |
Ltmp26: | |
.cfi_def_cfa_register %rbp | |
vmovsd (%rdi), %xmm1 | |
vaddsd %xmm1, %xmm1, %xmm1 | |
vmulsd (%rsi), %xmm1, %xmm1 | |
vsubsd 8(%rdx), %xmm1, %xmm1 | |
vextractf128 $1, %ymm0, %xmm0 # <<---- our __m256 param | |
//xmm0 == ymm0[128:255] | |
vpshufd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0,0,0] | |
vinsertps $16, %xmm2, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm2[0],xmm0[2,3] | |
//$16 == 0001 0000 | |
//source 00 = 0, %xmm2[31:0] | |
//dest 01 = 1, %xmm0[63:32] | |
//mask 0000 | |
//xmm0 == xmm0[127:96], xmm0[95:64], xmm2[31:0], xmm0[31:0] | |
vdivsd %xmm1, %xmm0, %xmm0 | |
leaq L_.str5(%rip), %rdi | |
movb $1, %al | |
popq %rbp | |
vzeroupper | |
jmp _printf ## TAILCALL | |
In Windows x64, the standard __fastcall applies, which means only up to 4 parameters are passed using registers. | |
Our last param, m256_1 above, is passed using reference, not value (because it is more than 64 bits long) | |
test_parameterPassing_pointer PROC | |
; parameter 1(d1p): rcx | |
; parameter 2(m256_1p): rdx | |
; parameter 3(m128_1p): r8 | |
; parameter 4(m256_1): [r9] | |
.B3.1:: ; Preds .B3.0 | |
;;; void test_parameterPassing_pointer(double *d1p, __m256 *m256_1p, __m128 *m128_1p, __m256 m256_1) { | |
$LN290: | |
00000 48 83 ec 28 sub rsp, 40 ; main.c:148.98 | |
$LN291: | |
;;; | |
;;; double mul1 = (*d1p) + (*d1p); | |
00004 f2 0f 10 01 movsd xmm0, QWORD PTR [rcx] ; main.c:150.18 | |
$LN292: | |
;;; double *m256_dp = ((double*)m256_1p); | |
;;; double *m128_dp = ((double*)m128_1p); | |
;;; double mul2 = m256_dp[0] * mul1 - m128_dp[1]; | |
;;; double div1 = ((double*)&m256_1)[2] / mul2; | |
;;; printf("mul2=%f\n", div1); | |
00008 48 8d 0d 00 00 | |
00 00 lea rcx, QWORD PTR [??_C@_08A@mul2?$DN?$CFf?6?$AA@] ; main.c:155.2 | |
$LN293: | |
0000f f2 41 0f 10 49 | |
10 movsd xmm1, QWORD PTR [16+r9] ; main.c:155.2 <<----- our __m256 param | |
$LN294: | |
00015 f2 0f 58 c0 addsd xmm0, xmm0 ; main.c:155.2 | |
$LN295: | |
00019 f2 0f 59 02 mulsd xmm0, QWORD PTR [rdx] ; main.c:155.2 | |
$LN296: | |
0001d 4c 89 4c 24 48 mov QWORD PTR [72+rsp], r9 ; main.c:148.98 | |
$LN297: | |
00022 f2 41 0f 5c 40 | |
08 subsd xmm0, QWORD PTR [8+r8] ; main.c:155.2 | |
$LN298: | |
00028 f2 0f 5e c8 divsd xmm1, xmm0 ; main.c:155.2 | |
$LN299: | |
0002c 66 48 0f 7e ca movd rdx, xmm1 ; main.c:155.2 | |
$LN300: | |
00031 ff 15 fc ff ff | |
ff call QWORD PTR [__imp_printf] ; main.c:155.2 | |
$LN301: | |
; LOE rbx rbp rsi rdi r12 r13 r14 r15 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15 | |
.B3.2:: ; Preds .B3.1 | |
;;; } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment