Last active
January 10, 2024 08:46
-
-
Save TrungNguyen1909/5b323edda9a21550a1621af506e8ce5f to your computer and use it in GitHub Desktop.
Apple H10 Mul53 extension
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#if 0 | |
Apple A11 (H10) introduces 2 propietary instructions called mul53lo.2d and mul53hi.2d. All of which belongs to Mul53 extensions. | |
Defintions: | |
- mul53lo.2d Vd, Vm: Multiplies 2 53-bit doublewords in the Vn vector with 2 53-bit doublewords in Vm vector and store 53 lowest bits in the Vn vector. | |
- mul53hi.2d Vd, Vm: Multiplies 2 53-bit doublewords in the Vn vector with 2 53-bit doublewords in Vm vector and store the result shifted 53 bits in the Vn vector. | |
Encodings: | |
- mul53lo.2d Vd, Vm: 0x00200000 | (m << 5) | (d << 0) | |
- mul53hi.2d Vd, Vm: 0x00200400 | (m << 5) | (d << 0) | |
#endif | |
#include <stdio.h> | |
#include <stdint.h> | |
void write_v0(uint64_t lo, uint64_t hi) { | |
__asm__ __volatile__("mov v0.D[0], %0\n" :: "r"(lo) :); | |
__asm__ __volatile__("mov v0.D[1], %0\n" :: "r"(hi) :); | |
} | |
void read_v0(uint64_t *lo, uint64_t *hi) { | |
__asm__ __volatile__("mov %0, v0.D[0]\n" : "=r"(*lo) ::"memory"); | |
__asm__ __volatile__("mov %0, v0.D[1]\n" : "=r"(*hi) ::"memory"); | |
} | |
void write_v1(uint64_t lo, uint64_t hi) { | |
__asm__ __volatile__("mov v1.D[0], %0\n" :: "r"(lo) :); | |
__asm__ __volatile__("mov v1.D[1], %0\n" :: "r"(hi) :); | |
} | |
void read_v1(uint64_t *lo, uint64_t *hi) { | |
__asm__ __volatile__("mov %0, v1.D[0]\n" : "=r"(*lo) ::"memory"); | |
__asm__ __volatile__("mov %0, v1.D[1]\n" : "=r"(*hi) ::"memory"); | |
} | |
int main(int argc, char *argv[]) { | |
uint64_t a_lo, a_hi; | |
uint64_t b_lo, b_hi; | |
a_lo = 0x06050403020100; | |
a_hi = 0x2020202020202; | |
b_lo = 0x2020202020202; | |
b_hi = 1; | |
write_v0(a_lo, a_hi); | |
write_v1(b_lo, b_hi); | |
uint64_t lo0, hi0; | |
uint64_t lo1_hi, hi1_hi; | |
uint64_t lo1, hi1; | |
uint64_t lo0_before, hi0_before; | |
uint64_t lo1_before, hi1_before; | |
read_v0(&lo0_before, &hi0_before); | |
read_v1(&lo1_before, &hi1_before); | |
__asm__ __volatile__ (".long 0x00200001\n" :::"v0", "v1"); //mul53lo.2d v1, v0 | |
read_v0(&lo0, &hi0); | |
read_v1(&lo1, &hi1); | |
write_v0(a_lo, a_hi); | |
write_v1(b_lo, b_hi); | |
__asm__ __volatile__ (".long 0x00200401\n" :::"v0", "v1"); //mul53hi.2d v1, v0 | |
read_v1(&lo1_hi, &hi1_hi); | |
printf("v0: 0x%llx 0x%llx\n", lo0_before, hi0_before); //0x6050403020100 0x2020202020202 | |
printf("v1: 0x%llx 0x%llx\n", lo1_before, hi1_before); //0x2020202020202 0x1 | |
printf("v0 after: 0x%llx 0x%llx\n", lo0, hi0); //0x6050403020100 0x2020202020202 | |
printf("v1 after: 0x%llx 0x%llx\n", lo1, hi1); //0xa1e140c060200 0x2020202020202 | |
printf("v1 after hi: 0x%llx 0x%llx\n", lo1_hi, hi1_hi);//0x60b0f1214151 0x0 | |
#if 0 | |
>>> hex(0x06050403020100*0x02020202020202) | |
'0xc161e24282a2a1e140c060200 | |
>>> hex(0xa1e140c060200|(0x60b0f1214151<<53)) | |
'0xc161e24282a2a1e140c060200' | |
#endif | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment