Skip to content

Instantly share code, notes, and snippets.

@syohex
Created June 30, 2012 13:10
Show Gist options
  • Save syohex/3023686 to your computer and use it in GitHub Desktop.
Save syohex/3023686 to your computer and use it in GitHub Desktop.
sample code of generation ARM NEON instruction
/*
arm-linux-gnueabi-gcc-4.6 -O2 -march=armv7-a -mtune=cortex-a9 -ftree-vectorize -mhard-float -mfloat-abi=softfp -mfpu=neon -ffast-math -mvectorize-with-neon-quad -S neon_test.c
*/
void NeonTest(short int * __restrict a, short int * __restrict b, short int * __restrict z)
{
int i;
for (i = 0; i < 200; i++) {
z[i] = a[i] * b[i];
}
}
.syntax unified
.arch armv7-a
.eabi_attribute 27, 3
.fpu neon
.eabi_attribute 23, 1
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.thumb
.file "neon_test2.c"
.text
.align 2
.global NeonTest
.thumb
.thumb_func
.type NeonTest, %function
NeonTest:
@ args = 0, pretend = 0, frame = 8
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
ubfx r3, r0, #1, #3
push {r4, r5, r6, r7, r8, r9, sl, fp}
negs r3, r3
sub sp, sp, #8
ands r3, r3, #7
beq .L8
movs r6, #0
mov r4, r6
.L3:
ldrh r7, [r1, r6]
adds r4, r4, #1
ldrh ip, [r0, r6]
rsb r5, r4, #200
mul r7, ip, r7
strh r7, [r2, r6] @ movhi
adds r6, r6, #2
cmp r3, r4
bhi .L3
.L2:
rsb r6, r3, #200
lsr r8, r6, #3
str r6, [sp, #4]
lsl fp, r8, #3
cmp r8, #0
beq .L4
add r3, r0, r3, lsl #1
movs r6, #0
rsb sl, r0, r1
rsb r9, r0, r2
mov ip, r4
.L5:
add r7, r3, sl
vldmia r3, {d16-d17}
adds r6, r6, #1
add r4, r3, r9
vld1.16 {q9}, [r7]
adds r3, r3, #16
vmul.i16 q8, q9, q8 @ <= Neon instruction
cmp r6, r8
vst1.16 {q8}, [r4]
bcc .L5
ldr r3, [sp, #4]
add r4, ip, fp
rsb r5, fp, r5
cmp r3, fp
beq .L1
.L4:
lsls r4, r4, #1
movs r3, #0
adds r2, r2, r4
adds r1, r1, r4
adds r0, r0, r4
.L7:
ldrh r4, [r1, r3]
ldrh r6, [r0, r3]
mul r4, r6, r4
strh r4, [r2, r3] @ movhi
adds r3, r3, #2
subs r5, r5, #1
bne .L7
.L1:
add sp, sp, #8
pop {r4, r5, r6, r7, r8, r9, sl, fp}
bx lr
.L8:
movs r5, #200
mov r4, r3
b .L2
.size NeonTest, .-NeonTest
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",%progbits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment