-
-
Save sandeepkumar-skb/cf20f680ecca327a8f1f22fc5a683917 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Floating Point 4x4 Matrix Multiplication */ | |
.global _start | |
_start: | |
LDR R0, =matrix0 | |
LDR R1, =matrix1 | |
LDR R2, =matrix2 | |
VLD1.32 {D16-D19}, [R1]! @ first eight elements of matrix 1 | |
VLD1.32 {D20-D23}, [R1]! @ second eight elements of matrix 1 | |
VLD1.32 {D0-D3}, [R2]! @ first eight elements of matrix 2 | |
VLD1.32 {D4-D7}, [R2]! @ second eight elements of matrix 2 | |
VMUL.f32 Q12, Q8, D0[0] | |
VMLA.f32 Q12, Q9, D0[1] | |
VMLA.f32 Q12, Q10, D1[0] | |
VMLA.f32 Q12, Q11, D1[1] | |
VMUL.f32 Q13, Q8, D2[0] | |
VMLA.f32 Q13, Q9, D2[1] | |
VMLA.f32 Q13, Q10, D3[0] | |
VMLA.f32 Q13, Q11, D3[1] | |
VMUL.f32 Q14, Q8, D4[0] | |
VMLA.f32 Q14, Q9, D4[1] | |
VMLA.f32 Q14, Q10, D5[0] | |
VMLA.f32 Q14, Q11, D5[1] | |
VMUL.f32 Q15, Q8, D6[0] | |
VMLA.f32 Q15, Q9, D6[1] | |
VMLA.f32 Q15, Q10, D7[0] | |
VMLA.f32 Q15, Q11, D7[1] | |
VST1.32 {D24-D27}, [R0]! @ store first eight elements of result. | |
VST1.32 {D28-D31}, [R0]! @ store second eight elements of result. | |
MOV R7, #1 | |
SWI 0 | |
.data | |
matrix0: .float 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 | |
matrix1: .float 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 | |
matrix2: .float 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment