Skip to content

Instantly share code, notes, and snippets.

@raphlinus
Created November 15, 2024 15:48
Show Gist options
  • Save raphlinus/8a39ed43ecfd5eb28a9b3bb2c9ad6dc0 to your computer and use it in GitHub Desktop.
Save raphlinus/8a39ed43ecfd5eb28a9b3bb2c9ad6dc0 to your computer and use it in GitHub Desktop.
Neon implementation of linear to sRGB transfer function
// Copyright 2024 the Color Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[inline(never)]
pub unsafe fn to_srgb(rgba: [f32; 4]) -> [f32; 4] {
let v = aarch64::vld1q_f32(rgba.as_ptr());
let vabs = aarch64::vabsq_f32(v);
let bias = aarch64::vdupq_n_f32(-5.35862651e-04);
let x = aarch64::vaddq_f32(vabs, bias);
let y = aarch64::vsqrtq_f32(x);
let x2 = aarch64::vmulq_f32(x, x);
// y = sqrt(v + bias)
let even1 = aarch64::vfmaq_f32(aarch64::vdupq_n_f32(-2.88143143e-02), x, aarch64::vdupq_n_f32(-9.12795913e-01));
let even2 = aarch64::vfmaq_f32(even1, x2, aarch64::vdupq_n_f32(-7.29192910e-01));
let odd1 = aarch64::vfmaq_f32(aarch64::vdupq_n_f32(1.40194533e+00), x, aarch64::vdupq_n_f32(1.06133172e+00));
let odd2 = aarch64::vfmaq_f32(odd1, x2, aarch64::vdupq_n_f32(2.07758287e-01));
let poly = aarch64::vfmaq_f32(even2, odd2, y);
let mask = aarch64::vcgtq_f32(vabs, aarch64::vdupq_n_f32(0.0031308));
let lin = aarch64::vmulq_f32(vabs, aarch64::vdupq_n_f32(12.92));
let z = aarch64::vbslq_f32(mask, poly, lin);
let sign_mask = aarch64::vdupq_n_u32(0x80000000);
let z_signed = aarch64::vbslq_f32(sign_mask, v, z);
let out = aarch64::vcopyq_laneq_f32(z_signed, 3, v, 3);
let mut result = [0.0f32; 4];
aarch64::vst1q_f32(result.as_mut_ptr(), out);
result
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment