Skip to content

Instantly share code, notes, and snippets.

@malfet
Last active November 6, 2020 00:17
Show Gist options
  • Select an option

  • Save malfet/5ca00b5a5c98dda769ad200dd976563a to your computer and use it in GitHub Desktop.

Select an option

Save malfet/5ca00b5a5c98dda769ad200dd976563a to your computer and use it in GitHub Desktop.
#include <arm_neon.h>
#include <math.h>
#include <stdio.h>
void run_neon_reciproc(float data_in[4], float data_out[4]) {
float32x4_t input = vld1q_f32(data_in);
float32x4_t out = vrecpeq_f32(input);
//out = vmulq_f32(vrecpsq_f32(input, out), out);
//out = vmulq_f32(vrecpsq_f32(input, out), out);
vst1q_f32(data_out, out);
}
void run_neon_rsqrt(float data_in[4], float data_out[4]) {
float32x4_t input = vld1q_f32(data_in);
float32x4_t out = vrsqrteq_f32(input);
//out = vmulq_f32(vrsqrtsq_f32(vmulq_f32(input, out), out), out);
//out = vmulq_f32(vrsqrtsq_f32(vmulq_f32(input, out), out), out);
vst1q_f32(data_out, out);
}
int main(void) {
float data_in[4] = {1., 2., 3., 4.};
//float data_in[4] = {137., 223., 331., 499.};
float data_out[4];
run_neon_reciproc(data_in, data_out);
for(int i = 0; i < 4; ++i) {
float res = 1. / data_in[i];
printf("%d: 1./%f = %f neon out %f err %e\n", i, data_in[i], res, data_out[i], fabs(data_out[i] - res));
}
run_neon_rsqrt(data_in, data_out);
for(int i = 0; i < 4; ++i) {
float res = 1.f/sqrt(data_in[i]);
printf("%d: 1.f/sqrt(%f) = %f neon out %f err %e\n", i, data_in[i], res, data_out[i], fabs(data_out[i] - res));
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment