Created
November 15, 2024 15:48
-
-
Save raphlinus/8a39ed43ecfd5eb28a9b3bb2c9ad6dc0 to your computer and use it in GitHub Desktop.
Neon implementation of linear to sRGB transfer function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2024 the Color Authors | |
// SPDX-License-Identifier: Apache-2.0 OR MIT | |
#[cfg(target_arch = "aarch64")] | |
#[target_feature(enable = "neon")] | |
#[inline(never)] | |
pub unsafe fn to_srgb(rgba: [f32; 4]) -> [f32; 4] { | |
let v = aarch64::vld1q_f32(rgba.as_ptr()); | |
let vabs = aarch64::vabsq_f32(v); | |
let bias = aarch64::vdupq_n_f32(-5.35862651e-04); | |
let x = aarch64::vaddq_f32(vabs, bias); | |
let y = aarch64::vsqrtq_f32(x); | |
let x2 = aarch64::vmulq_f32(x, x); | |
// y = sqrt(v + bias) | |
let even1 = aarch64::vfmaq_f32(aarch64::vdupq_n_f32(-2.88143143e-02), x, aarch64::vdupq_n_f32(-9.12795913e-01)); | |
let even2 = aarch64::vfmaq_f32(even1, x2, aarch64::vdupq_n_f32(-7.29192910e-01)); | |
let odd1 = aarch64::vfmaq_f32(aarch64::vdupq_n_f32(1.40194533e+00), x, aarch64::vdupq_n_f32(1.06133172e+00)); | |
let odd2 = aarch64::vfmaq_f32(odd1, x2, aarch64::vdupq_n_f32(2.07758287e-01)); | |
let poly = aarch64::vfmaq_f32(even2, odd2, y); | |
let mask = aarch64::vcgtq_f32(vabs, aarch64::vdupq_n_f32(0.0031308)); | |
let lin = aarch64::vmulq_f32(vabs, aarch64::vdupq_n_f32(12.92)); | |
let z = aarch64::vbslq_f32(mask, poly, lin); | |
let sign_mask = aarch64::vdupq_n_u32(0x80000000); | |
let z_signed = aarch64::vbslq_f32(sign_mask, v, z); | |
let out = aarch64::vcopyq_laneq_f32(z_signed, 3, v, 3); | |
let mut result = [0.0f32; 4]; | |
aarch64::vst1q_f32(result.as_mut_ptr(), out); | |
result | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment