Created
February 20, 2023 08:25
-
-
Save TeaPoly/04dbe7a750b1f8a25826f5eece8cd736 to your computer and use it in GitHub Desktop.
Neon to RISC-V V
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (c), Lucky Wong. | |
* | |
* MIT License (MIT), http://opensource.org/licenses/MIT | |
* Full license can be found in the LICENSE file | |
*/ | |
#include <riscv_vector.h> | |
typedef vfloat32m4_t float32x4_t; | |
typedef vfloat32m2_t float32x2_t; | |
const size_t kE32m4 = 4; // vsetvlmax_e32m4(); | |
const size_t kE32m2 = 2; // vsetvlmax_e32m2(); | |
__inline static float32x2_t vget_low_f32(float32x4_t x) { | |
return vget_v_f32m4_f32m2(x, 0); | |
} | |
__inline static float32x2_t vget_high_f32(float32x4_t x) { | |
return vget_v_f32m4_f32m2(x, 1); | |
} | |
__inline static float32x4_t vcombine_f32(float32x2_t low, float32x2_t high) { | |
float32x4_t x = vset_v_f32m2_f32m4(x, 0, low); | |
x = vset_v_f32m2_f32m4(x, 1, high); | |
return x; | |
} | |
__inline static float32x4_t vld1q_f32(float32_t const* ptr) { | |
return vle32_v_f32m4(ptr, kE32m4); | |
} | |
__inline static float32x2_t vld1_f32 (float32_t const* ptr) { | |
return vle32_v_f32m2(ptr, kE32m2); | |
} | |
__inline static float32x4_t vaddq_f32 (float32x4_t a, float32x4_t b) { | |
return vfadd_vv_f32m4(a, b, kE32m4); | |
} | |
__inline static float32x4_t vsubq_f32 (float32x4_t a, float32x4_t b) { | |
return vfsub_vv_f32m4(a, b, kE32m4); | |
} | |
__inline static float32x4_t vmulq_f32 (float32x4_t a, float32x4_t b) { | |
return vfmul_vv_f32m4(a, b, kE32m4); | |
} | |
__inline static float32x4_t vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) { | |
return vfmacc_vv_f32m4(a, b, c, kE32m4); | |
} | |
__inline static float32x4_t vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) { | |
return vfnmsac_vv_f32m4(a, b, c, kE32m4); | |
} | |
__inline static void vst1q_f32 (float32_t* ptr, float32x4_t val) { | |
vse32_v_f32m4(ptr, val, kE32m4); | |
} | |
__inline static void vst1_f32 (float32_t* ptr, float32x2_t val) { | |
vse32_v_f32m2(ptr, val, kE32m2); | |
} | |
__inline static float32x2_t vdup_lane_f32 (float32x2_t vec, const int lane) { | |
const vfloat32m2_t x = vfmv_s_f_f32m2( | |
x, | |
vfmv_f_s_f32m1_f32(vget_v_f32m2_f32m1(vec, lane)), | |
kE32m2 | |
); | |
return x; | |
} | |
__inline static float32x4_t vdupq_n_f32 (float32_t a) { | |
float32x4_t x = vfmv_s_f_f32m4(x, a, kE32m4); | |
return x; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment