Last active
April 11, 2018 23:40
-
-
Save iwatake2222/7fb81c44ea8058afa34a1262505a1af2 to your computer and use it in GitHub Desktop.
ARMアセンブリ言語の実装色々とNEON命令のサンプル ref: https://qiita.com/take-iwiw/items/cea0a2cb4d2709cb7ee5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
int main() | |
{ | |
int a; | |
int ret; | |
extern int func1(int *a); | |
ret = func1(&a); | |
printf("ret = %d, a = %d\n", ret, a); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
int func1(int *a) | |
{ | |
int ret = 0; | |
__asm__ __volatile__ ( | |
"mov r1, #99;" | |
"str r1, [%1];" | |
"mov %0, #88;" | |
: "=r"(ret) // Output operands | |
: "r"(a) // Input operands | |
: // Overwritten registers | |
); | |
return ret; | |
} | |
int add(int a, int b) | |
{ | |
int ret = 0; | |
__asm__ __volatile__ ( | |
"add %0, %1, %2;" | |
: "=r"(ret) // Output operands | |
: "r"(a), "r"(b) // Input operands | |
: // Overwritten registers | |
); | |
return ret; | |
} | |
int add_label(int a, int b) | |
{ | |
int ret = 0; | |
__asm__ __volatile__ ( | |
"add %[Rret], %[Ra], %[Rb];" | |
: [Rret]"=r"(ret) // Output operands | |
: [Ra]"r"(a), [Rb]"r"(b) // Input operands | |
: // Overwritten registers | |
); | |
return ret; | |
} | |
void copy(int *src, int *dst) | |
{ | |
__asm__ __volatile__ ( | |
"ldr r2, [%0];" // r2 <- *src | |
"str r2, [%1];" // r2 -> *dst | |
: // Output operands | |
: "r"(src), "r"(dst) // Input operands | |
: "r2", "memory" // Overwritten registers | |
); | |
return; | |
} | |
int main() | |
{ | |
int a, b, ret; | |
ret = func1(&a); | |
printf("ret = %d, a = %d\n", ret, a); | |
ret = add(1, 2); | |
printf("ret = %d\n", ret); | |
ret = add_label(1, 2); | |
printf("ret = %d\n", ret); | |
a = 10; b = 0; | |
copy(&a, &b); | |
printf("a = %d b = %d\n", a, b); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <arm_neon.h> | |
void add_emb() | |
{ | |
uint8_t a[8]; // d0 | |
uint8_t b[8]; // d1 | |
uint8_t c[8]; // result | |
memset(a, 0x12, sizeof(a)); | |
a[2] = 0xEE; // 飽和確認用 | |
memset(b, 0x34, sizeof(b)); | |
memset(c, 0x00, sizeof(c)); | |
uint8x8_t va, vb, vc; // 8-bit(uint8_t) x 8レーン | |
va = vld1_u8(a); | |
vb = vld1_u8(b); | |
vc = vqadd_u8(va, vb); | |
vst1_u8(c, vc); | |
for (int i = 0; i < 8; i++) | |
printf("%02X + %02X = %02X\n", a[i], b[i], c[i]); | |
return; | |
} | |
int main() | |
{ | |
add_emb(); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
#define NUM 128 | |
void add(uint8_t * a, uint8_t * b, uint8_t * c) | |
{ | |
for(int i = 0; i < NUM; i++) { | |
c[i] = a[i] + b[i]; | |
} | |
} | |
int main() | |
{ | |
uint8_t a[NUM], b[NUM], c[NUM]; | |
memset(a, 0x12, sizeof(a)); | |
memset(b, 0x34, sizeof(a)); | |
add(a, b, c); | |
printf("%08X\n", *(uint32_t*)&c[0]); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.file "sub.s" | |
.text | |
.global func1 | |
.type func1, %function | |
func1: | |
mov r1, #99 | |
str r1, [r0] @ r0 is the first arg | |
mov r0, #88 | |
bx lr | |
.end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gcc simd.c -mfpu=neon | |
./a.out | |
12 + 34 = 46 | |
12 + 34 = 46 | |
EE + 34 = FF | |
12 + 34 = 46 | |
12 + 34 = 46 | |
12 + 34 = 46 | |
12 + 34 = 46 | |
12 + 34 = 46 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gcc -mfpu=neon -O3 -S simd_auto.c | |
more simd_auto.s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment