Skip to content

Instantly share code, notes, and snippets.

@iwatake2222
Last active April 11, 2018 23:40
Show Gist options
  • Save iwatake2222/7fb81c44ea8058afa34a1262505a1af2 to your computer and use it in GitHub Desktop.
Save iwatake2222/7fb81c44ea8058afa34a1262505a1af2 to your computer and use it in GitHub Desktop.
ARMアセンブリ言語の実装色々とNEON命令のサンプル ref: https://qiita.com/take-iwiw/items/cea0a2cb4d2709cb7ee5
#include <stdio.h>
int main()
{
int a;
int ret;
extern int func1(int *a);
ret = func1(&a);
printf("ret = %d, a = %d\n", ret, a);
}
#include <stdio.h>
int func1(int *a)
{
int ret = 0;
__asm__ __volatile__ (
"mov r1, #99;"
"str r1, [%1];"
"mov %0, #88;"
: "=r"(ret) // Output operands
: "r"(a) // Input operands
: // Overwritten registers
);
return ret;
}
int add(int a, int b)
{
int ret = 0;
__asm__ __volatile__ (
"add %0, %1, %2;"
: "=r"(ret) // Output operands
: "r"(a), "r"(b) // Input operands
: // Overwritten registers
);
return ret;
}
int add_label(int a, int b)
{
int ret = 0;
__asm__ __volatile__ (
"add %[Rret], %[Ra], %[Rb];"
: [Rret]"=r"(ret) // Output operands
: [Ra]"r"(a), [Rb]"r"(b) // Input operands
: // Overwritten registers
);
return ret;
}
void copy(int *src, int *dst)
{
__asm__ __volatile__ (
"ldr r2, [%0];" // r2 <- *src
"str r2, [%1];" // r2 -> *dst
: // Output operands
: "r"(src), "r"(dst) // Input operands
: "r2", "memory" // Overwritten registers
);
return;
}
int main()
{
int a, b, ret;
ret = func1(&a);
printf("ret = %d, a = %d\n", ret, a);
ret = add(1, 2);
printf("ret = %d\n", ret);
ret = add_label(1, 2);
printf("ret = %d\n", ret);
a = 10; b = 0;
copy(&a, &b);
printf("a = %d b = %d\n", a, b);
}
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <arm_neon.h>
void add_emb()
{
uint8_t a[8]; // d0
uint8_t b[8]; // d1
uint8_t c[8]; // result
memset(a, 0x12, sizeof(a));
a[2] = 0xEE; // 飽和確認用
memset(b, 0x34, sizeof(b));
memset(c, 0x00, sizeof(c));
uint8x8_t va, vb, vc; // 8-bit(uint8_t) x 8レーン
va = vld1_u8(a);
vb = vld1_u8(b);
vc = vqadd_u8(va, vb);
vst1_u8(c, vc);
for (int i = 0; i < 8; i++)
printf("%02X + %02X = %02X\n", a[i], b[i], c[i]);
return;
}
int main()
{
add_emb();
}
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#define NUM 128
void add(uint8_t * a, uint8_t * b, uint8_t * c)
{
for(int i = 0; i < NUM; i++) {
c[i] = a[i] + b[i];
}
}
int main()
{
uint8_t a[NUM], b[NUM], c[NUM];
memset(a, 0x12, sizeof(a));
memset(b, 0x34, sizeof(a));
add(a, b, c);
printf("%08X\n", *(uint32_t*)&c[0]);
}
.file "sub.s"
.text
.global func1
.type func1, %function
func1:
mov r1, #99
str r1, [r0] @ r0 is the first arg
mov r0, #88
bx lr
.end
gcc simd.c -mfpu=neon
./a.out
12 + 34 = 46
12 + 34 = 46
EE + 34 = FF
12 + 34 = 46
12 + 34 = 46
12 + 34 = 46
12 + 34 = 46
12 + 34 = 46
gcc -mfpu=neon -O3 -S simd_auto.c
more simd_auto.s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment