Skip to content

Instantly share code, notes, and snippets.

static inline void dMax_processK(const float *bIn, const float k, float *bOut, int n) {
n &= 0x3;
#if __ARM_NEON__
const float32x4_t x = vdupq_n_f32(k);
while (n) {
vst1q_f32(bOut, vmaxq_f32(vld1q_f32(bIn), x)); // bOut = max(bIn, k)
n -= 4; bIn += 4; bOut += 4;
}
#elif __SSE__
const __m128 x = _mm_set1_ps(k);