Skip to content

Instantly share code, notes, and snippets.

@Const-me
Created May 3, 2025 20:06
Show Gist options
  • Save Const-me/5fb31968766f01f9ade5c54946b94f6f to your computer and use it in GitHub Desktop.
Save Const-me/5fb31968766f01f9ade5c54946b94f6f to your computer and use it in GitHub Desktop.
Numerically stable 2D angle bisector algorithm
// Numerically stable 2D angle bisector algorithm
// (c) 2025 Konstantin http://const.me/
// This source file is subject to the terms of the MIT license https://opensource.org/license/MIT
#include <emmintrin.h> // SSE 1 and 2
#include <pmmintrin.h> // SSE 3
#include <smmintrin.h> // SSE 4.1
// Dot products of two pairs of 2D vectors in xy and zw of the inputs, broadcasted across both xy and zw
static inline __m128 dot2( __m128 a, __m128 b ) noexcept
{
__m128 v = _mm_mul_ps( a, b );
v = _mm_add_ps( v, _mm_movehdup_ps( v ) );
return _mm_moveldup_ps( v );
}
// [ normalise( vec.xy ), normalise( vec.zw ) ]
static inline __m128 normalise2( __m128 vec ) noexcept
{
__m128 tmp = dot2( vec, vec );
tmp = _mm_sqrt_ps( tmp );
return _mm_div_ps( vec, tmp );
}
// Change the value to false if calling vector2Bisect() in a tight loop,
// when compiler can inline and load constant vectors outside of the loop keeping them in registers
static constexpr bool AVOID_CONSTANTS = true;
// Compute bisector of the angles between a.xy and b.xy 2D vectors
// The output is normalized, and zw lanes are zeros; when either input is a zero 2D vector, the output is NAN
__m128 vector2Bisect( __m128 a, __m128 b ) noexcept
{
// Merge arguments into a single vector
const __m128 ab = _mm_movelh_ps( a, b );
// Normalise ab.xy and ab.zw
__m128 res = normalise2( ab );
// Flip high and low pieces
const __m128 flipped = _mm_shuffle_ps( res, res, _MM_SHUFFLE( 1, 0, 3, 2 ) );
// [ normalise( a ) - normalise( b ), normalise( a ) + normalise( b ) ]
if constexpr( AVOID_CONSTANTS )
res = _mm_blend_ps( _mm_add_ps( res, flipped ), _mm_sub_ps( res, flipped ), 0b0011 );
else
res = _mm_add_ps( res, _mm_xor_ps( flipped, _mm_setr_ps( -0.0f, -0.0f, 0, 0 ) ) );
// Rotate res.xy by 90 degrees
res = _mm_shuffle_ps( res, res, _MM_SHUFFLE( 3, 2, 0, 1 ) );
const __m128 zero = _mm_setzero_ps();
if constexpr( AVOID_CONSTANTS )
res = _mm_move_ss( res, _mm_sub_ss( zero, res ) );
else
res = _mm_xor_ps( res, _mm_set_ss( -0.0f ) );
// Normalise res.xy and res.zw
res = normalise2( res );
// Now we need two dot products: dot( a, rotate90( b ) ), dot( a, b )
__m128 dp = _mm_shuffle_ps( ab, ab, _MM_SHUFFLE( 1, 0, 2, 3 ) );
if constexpr( AVOID_CONSTANTS )
dp = _mm_move_ss( dp, _mm_sub_ss( zero, dp ) );
else
dp = _mm_xor_ps( dp, _mm_set_ss( -0.0f ) );
dp = dot2( ab, dp );
// Compare for `dp < 0.0`
const __m128 cmp = _mm_cmplt_ps( dp, zero );
// Produce the output with blendvps SSE4.1 instruction i.e. branchless conditional move
// For acute and straight angles the result is res.zw
// For obtuse angles the result is either +res.xy or -res.xy depending on orientation of the input vectors
const __m128 resAcute = _mm_movehl_ps( zero, res );
const __m128 maskObtuse = _mm_movehl_ps( zero, cmp );
const __m128 resObtuseInv = _mm_sub_ps( zero, res );
const __m128 resObtuse = _mm_blendv_ps( resObtuseInv, res, cmp );
res = _mm_blendv_ps( resAcute, resObtuse, maskObtuse );
return res;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment