Skip to content

Instantly share code, notes, and snippets.

@kaityo256
Last active December 19, 2017 09:22
Show Gist options
  • Save kaityo256/d0cbd43c6ddd7aa80696fa0bf45ee3ae to your computer and use it in GitHub Desktop.
Save kaityo256/d0cbd43c6ddd7aa80696fa0bf45ee3ae to your computer and use it in GitHub Desktop.
v4df and v8df
add_v4df(double __vector, double __vector):
# parameter 1: %ymm0
# parameter 2: %ymm1
..B1.1: # Preds ..B1.0
.cfi_startproc
.cfi_personality 0x3,__gxx_personality_v0
..___tag_value__Z8add_v4dfU8__vectordS_.1:
..L2:
#9.25
pushq %rbp #9.25
.cfi_def_cfa_offset 16
movq %rsp, %rbp #9.25
.cfi_def_cfa 6, 16
.cfi_offset 6, -16
subq $112, %rsp #9.25
vmovupd %ymm0, -64(%rbp) #9.25
vmovupd %ymm1, -32(%rbp) #9.25
movl $4, -112(%rbp) #10.14
movl $0, -108(%rbp) #10.14
# LOE rbx rbp rsp r12 r13 r14 r15 rip
..B1.2: # Preds ..B1.2 ..B1.1
lea -64(%rbp), %rax #10.14
movl -108(%rbp), %edx #10.14
movslq %edx, %rdx #10.14
imulq $8, %rdx, %rdx #10.14
addq %rdx, %rax #10.14
lea -32(%rbp), %rdx #10.14
movl -108(%rbp), %ecx #10.14
movslq %ecx, %rcx #10.14
imulq $8, %rcx, %rcx #10.14
addq %rcx, %rdx #10.14
vmovsd (%rax), %xmm0 #10.14
vmovsd (%rdx), %xmm1 #10.14
vaddsd %xmm1, %xmm0, %xmm0 #10.14
lea -104(%rbp), %rax #10.14
movl -108(%rbp), %edx #10.14
movslq %edx, %rdx #10.14
imulq $8, %rdx, %rdx #10.14
addq %rdx, %rax #10.14
vmovsd %xmm0, (%rax) #10.14
movl $1, %eax #10.14
addl -108(%rbp), %eax #10.14
movl %eax, -108(%rbp) #10.14
movl -108(%rbp), %eax #10.14
movl -112(%rbp), %edx #10.14
cmpl %edx, %eax #10.14
jl ..B1.2 # Prob 50% #10.14
# LOE rbx rbp rsp r12 r13 r14 r15 rip
..B1.3: # Preds ..B1.2
vmovupd -104(%rbp), %ymm0 #11.1
leave #10.14
.cfi_restore 6
ret #10.14
add_v4df(double __vector, double __vector):
# parameter 1: %ymm0
# parameter 2: %ymm1
..B2.1: # Preds ..B2.0
.cfi_startproc
.cfi_personality 0x3,__gxx_personality_v0
..___tag_value__Z8add_v4dfU8__vectordS_.35:
..L36:
#9.25
vaddpd %ymm1, %ymm0, %ymm0 #10.14
ret #10.14
all: test_O0.s test_O3.s
test_O0.s: test_s0.s
c++filt < $< > $@
test_O3.s: test_s3.s
c++filt < $< > $@
test_s0.s: test.cpp
icpc -std=c++11 -xHOST -O0 -S $< -o $@
test_s3.s: test.cpp
icpc -std=c++11 -xHOST -O3 -S $< -o $@
clean:
rm -f *.s
#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
typedef double v4df __attribute__((vector_size(32)));
typedef double v8df __attribute__((vector_size(64)));
v4df
add_v4df(v4df a, v4df b){
return a + b;
}
v8df
add_v8df(v8df a, v8df b){
return a + b;
}
void
put_v4df(v4df &a){
double *x = (double*)(&a);
for(int i=0;i<4; i++) printf("%f ",a[i]);
printf("\n");
}
void
put_v8df(v8df &a){
double *x = (double*)(&a);
for(int i=0;i<8; i++) printf("%f ",a[i]);
printf("\n");
}
v4df
set1_v4df(double v){
v4df a;
for(int i=0;i<4; i++) a[i] = v;
return a;
}
v8df
set1_v8df(double v){
v8df a;
for(int i=0;i<8; i++) a[i] = v;
return a;
}
int
main(int argc, char **argv){
const double a = atof(argv[1]);
const double b = atof(argv[2]);
v4df v4a = set1_v4df(a);
v4df v4b = set1_v4df(b);
v4df v4c = add_v4df(v4a,v4b);
put_v4df(v4c);
v8df v8a = set1_v8df(a);
v8df v8b = set1_v8df(b);
v8df v8c = add_v8df(v8a,v8b);
put_v8df(v8c);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment