Last active
November 6, 2020 00:17
-
-
Save malfet/5ca00b5a5c98dda769ad200dd976563a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <arm_neon.h> | |
| #include <math.h> | |
| #include <stdio.h> | |
| void run_neon_reciproc(float data_in[4], float data_out[4]) { | |
| float32x4_t input = vld1q_f32(data_in); | |
| float32x4_t out = vrecpeq_f32(input); | |
| //out = vmulq_f32(vrecpsq_f32(input, out), out); | |
| //out = vmulq_f32(vrecpsq_f32(input, out), out); | |
| vst1q_f32(data_out, out); | |
| } | |
| void run_neon_rsqrt(float data_in[4], float data_out[4]) { | |
| float32x4_t input = vld1q_f32(data_in); | |
| float32x4_t out = vrsqrteq_f32(input); | |
| //out = vmulq_f32(vrsqrtsq_f32(vmulq_f32(input, out), out), out); | |
| //out = vmulq_f32(vrsqrtsq_f32(vmulq_f32(input, out), out), out); | |
| vst1q_f32(data_out, out); | |
| } | |
| int main(void) { | |
| float data_in[4] = {1., 2., 3., 4.}; | |
| //float data_in[4] = {137., 223., 331., 499.}; | |
| float data_out[4]; | |
| run_neon_reciproc(data_in, data_out); | |
| for(int i = 0; i < 4; ++i) { | |
| float res = 1. / data_in[i]; | |
| printf("%d: 1./%f = %f neon out %f err %e\n", i, data_in[i], res, data_out[i], fabs(data_out[i] - res)); | |
| } | |
| run_neon_rsqrt(data_in, data_out); | |
| for(int i = 0; i < 4; ++i) { | |
| float res = 1.f/sqrt(data_in[i]); | |
| printf("%d: 1.f/sqrt(%f) = %f neon out %f err %e\n", i, data_in[i], res, data_out[i], fabs(data_out[i] - res)); | |
| } | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment