Created
November 28, 2019 11:20
-
-
Save horitaku1124/aa44a2c4784d544453b718cd95dfe658 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set tabstop=4 | |
set shiftwidth=4 | |
set expandtab |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo apt install gcc-8 clang-8 | |
gcc-8 -mavx512f -march=icelake-server main.c -o main |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
#include <stdio.h> | |
int main() | |
{ | |
int8_t __attribute__((aligned(64))) op1_int8[64]; | |
int8_t __attribute__((aligned(64))) op2_int8[64]; | |
int __attribute__((aligned(64))) op3_int[16]; | |
int __attribute__((aligned(64))) presult[16]; | |
int16_t __attribute__((aligned(64))) op4_int16[32]; | |
int16_t __attribute__((aligned(64))) op5_int16[32]; | |
__m512i v1_int8; | |
__m512i v2_int8; | |
__m512i v3_int; | |
__m512i v4_int16; | |
__m512i v5_int16; | |
printf("size of int8_t is %zu\n", sizeof(int8_t)); | |
printf("size of int is %zu\n", sizeof(int)); | |
printf("size of int16_t is %zu\n", sizeof(int16_t)); | |
for (int i = 0;i < 64;i++) { | |
op1_int8[i] = i; | |
op2_int8[i] = i; | |
} | |
for (int i = 0;i < 16;i++) { | |
op3_int[i] = 0; | |
} | |
for (int i = 0;i < 32;i++) { | |
op4_int16[i] = i; | |
op5_int16[i] = i; | |
} | |
v1_int8 = _mm512_load_si512(&op1_int8); | |
v2_int8 =_mm512_load_si512(&op2_int8); | |
v3_int = _mm512_load_si512(&op3_int); | |
v4_int16 = _mm512_load_si512(&op4_int16); | |
v5_int16 = _mm512_load_si512(&op5_int16); | |
__m512i result = _mm512_dpbusds_epi32(v3_int, v1_int8, v2_int8); | |
_mm512_store_si512(presult, result); | |
printf("vpdpbusds\n"); | |
for (int i = 0; i < 16; i++) { | |
int val = presult[i]; | |
printf("%d = %d\n", i, val); | |
} | |
printf("vpmaddwd + vpaddd\n"); | |
result = _mm512_madd_epi16(v4_int16, v5_int16); | |
result = _mm512_add_epi32(result, v3_int); | |
_mm512_store_si512(presult, result); | |
for (int i = 0; i < 16; i++) { | |
int val = presult[i]; | |
printf("%d = %d\n", i, val); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment