Created
May 3, 2025 20:06
-
-
Save Const-me/5fb31968766f01f9ade5c54946b94f6f to your computer and use it in GitHub Desktop.
Numerically stable 2D angle bisector algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Numerically stable 2D angle bisector algorithm | |
// (c) 2025 Konstantin http://const.me/ | |
// This source file is subject to the terms of the MIT license https://opensource.org/license/MIT | |
#include <emmintrin.h> // SSE 1 and 2 | |
#include <pmmintrin.h> // SSE 3 | |
#include <smmintrin.h> // SSE 4.1 | |
// Dot products of two pairs of 2D vectors in xy and zw of the inputs, broadcasted across both xy and zw | |
static inline __m128 dot2( __m128 a, __m128 b ) noexcept | |
{ | |
__m128 v = _mm_mul_ps( a, b ); | |
v = _mm_add_ps( v, _mm_movehdup_ps( v ) ); | |
return _mm_moveldup_ps( v ); | |
} | |
// [ normalise( vec.xy ), normalise( vec.zw ) ] | |
static inline __m128 normalise2( __m128 vec ) noexcept | |
{ | |
__m128 tmp = dot2( vec, vec ); | |
tmp = _mm_sqrt_ps( tmp ); | |
return _mm_div_ps( vec, tmp ); | |
} | |
// Change the value to false if calling vector2Bisect() in a tight loop, | |
// when compiler can inline and load constant vectors outside of the loop keeping them in registers | |
static constexpr bool AVOID_CONSTANTS = true; | |
// Compute bisector of the angles between a.xy and b.xy 2D vectors | |
// The output is normalized, and zw lanes are zeros; when either input is a zero 2D vector, the output is NAN | |
__m128 vector2Bisect( __m128 a, __m128 b ) noexcept | |
{ | |
// Merge arguments into a single vector | |
const __m128 ab = _mm_movelh_ps( a, b ); | |
// Normalise ab.xy and ab.zw | |
__m128 res = normalise2( ab ); | |
// Flip high and low pieces | |
const __m128 flipped = _mm_shuffle_ps( res, res, _MM_SHUFFLE( 1, 0, 3, 2 ) ); | |
// [ normalise( a ) - normalise( b ), normalise( a ) + normalise( b ) ] | |
if constexpr( AVOID_CONSTANTS ) | |
res = _mm_blend_ps( _mm_add_ps( res, flipped ), _mm_sub_ps( res, flipped ), 0b0011 ); | |
else | |
res = _mm_add_ps( res, _mm_xor_ps( flipped, _mm_setr_ps( -0.0f, -0.0f, 0, 0 ) ) ); | |
// Rotate res.xy by 90 degrees | |
res = _mm_shuffle_ps( res, res, _MM_SHUFFLE( 3, 2, 0, 1 ) ); | |
const __m128 zero = _mm_setzero_ps(); | |
if constexpr( AVOID_CONSTANTS ) | |
res = _mm_move_ss( res, _mm_sub_ss( zero, res ) ); | |
else | |
res = _mm_xor_ps( res, _mm_set_ss( -0.0f ) ); | |
// Normalise res.xy and res.zw | |
res = normalise2( res ); | |
// Now we need two dot products: dot( a, rotate90( b ) ), dot( a, b ) | |
__m128 dp = _mm_shuffle_ps( ab, ab, _MM_SHUFFLE( 1, 0, 2, 3 ) ); | |
if constexpr( AVOID_CONSTANTS ) | |
dp = _mm_move_ss( dp, _mm_sub_ss( zero, dp ) ); | |
else | |
dp = _mm_xor_ps( dp, _mm_set_ss( -0.0f ) ); | |
dp = dot2( ab, dp ); | |
// Compare for `dp < 0.0` | |
const __m128 cmp = _mm_cmplt_ps( dp, zero ); | |
// Produce the output with blendvps SSE4.1 instruction i.e. branchless conditional move | |
// For acute and straight angles the result is res.zw | |
// For obtuse angles the result is either +res.xy or -res.xy depending on orientation of the input vectors | |
const __m128 resAcute = _mm_movehl_ps( zero, res ); | |
const __m128 maskObtuse = _mm_movehl_ps( zero, cmp ); | |
const __m128 resObtuseInv = _mm_sub_ps( zero, res ); | |
const __m128 resObtuse = _mm_blendv_ps( resObtuseInv, res, cmp ); | |
res = _mm_blendv_ps( resAcute, resObtuse, maskObtuse ); | |
return res; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment