Last active
September 19, 2024 13:35
-
-
Save xenobrain/50082c93444a3a3382ae086245976842 to your computer and use it in GitHub Desktop.
simd math, no templates
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef ENGINE_TYPES_H | |
#define ENGINE_TYPES_H | |
#include <immintrin.h> | |
auto inline sqrt(float const s) -> float { return _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(s))); } | |
auto inline rsqrt(float const s) -> float { return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(s))); } | |
struct vector2 { float x, y; }; | |
struct vector3 { float x, y, z; }; | |
struct vector4 { float x, y, z, w; }; | |
struct matrix4 { float xx, xy, xz, xw, yx, yy, yz, yw, zx, zy, zz, zw, wx, wy, wz, ww;}; | |
auto static inline operator+(vector2 const& a, vector2 const&b) -> vector2 { return {a.x + b.x, a.y + b.y}; } | |
auto static inline operator-(vector2 const& a, vector2 const&b) -> vector2 { return {a.x - b.x, a.y - b.y}; } | |
auto static inline operator*(vector2 const& a, vector2 const&b) -> vector2 { return {a.x * b.x, a.y * b.y}; } | |
auto static inline operator+(vector3 const& a, vector3 const&b) -> vector3 { return {a.x + b.x, a.y + b.y, a.z + b.z}; } | |
auto static inline operator-(vector3 const& a, vector3 const&b) -> vector3 { return {a.x - b.x, a.y - b.y, a.z - b.z}; } | |
auto static inline operator*(vector3 const& a, vector3 const&b) -> vector3 { return {a.x * b.x, a.y * b.y, a.z * b.z}; } | |
auto static inline operator+(vector4 const& a, vector4 const&b) -> vector4 { return {a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w}; } | |
auto static inline operator-(vector4 const& a, vector4 const&b) -> vector4 { return {a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w}; } | |
auto static inline operator*(vector4 const& a, vector4 const&b) -> vector4 { return {a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w}; } | |
auto static inline operator*(matrix4 const& a, vector3 const& b) -> vector3 { return {a.xx * b.x + a.yx * b.y + a.zx * b.z + a.wx, a.xy * b.x + a.yy * b.y + a.zy * b.z + a.wy, a.xz * b.x + a.yz * b.y + a.zz * b.z + a.wz}; }; | |
auto static inline operator*(matrix4 const& a, vector4 const& b) -> vector4 { return {a.xx * b.x + a.yx * b.y + a.zx * b.z + b.w * a.wx, a.xy * b.x + a.yy * b.y + a.zy * b.z + b.w * a.wy, a.xz * b.x + a.yz * b.y + a.zz * b.z +b.w * a.wz, a.xw * b.x + a.yw * b.y + a.zw * b.z +b.w * a.ww}; } | |
auto static inline operator*(matrix4 const& a, matrix4 const& b) -> matrix4 { | |
matrix4 m{}; | |
__m128 vx = _mm_loadu_ps(&b.xx); __m128 vy = _mm_loadu_ps(&b.yx); __m128 vz = _mm_loadu_ps(&b.zx); __m128 vw = _mm_loadu_ps(&b.wx); __m128 sx = _mm_set1_ps(a.xx); __m128 sy = _mm_set1_ps(a.xy); __m128 sz = _mm_set1_ps(a.xz); __m128 sw = _mm_set1_ps(a.xw); | |
sx = _mm_mul_ps(sx, vx); sy = _mm_mul_ps(sy, vy); sz = _mm_mul_ps(sz, vz); sw = _mm_mul_ps(sw, vw); sx = _mm_add_ps(sx, sz); sy = _mm_add_ps(sy, sw); sx = _mm_add_ps(sx, sy); _mm_storeu_ps(&m.xx, sx); | |
sx = _mm_set1_ps(a.yx); sy = _mm_set1_ps(a.yy); sz = _mm_set1_ps(a.yz); sw = _mm_set1_ps(a.yw);sx = _mm_mul_ps(sx, vx); sy = _mm_mul_ps(sy, vy); sz = _mm_mul_ps(sz, vz); sw = _mm_mul_ps(sw, vw);sx = _mm_add_ps(sx, sz); sy = _mm_add_ps(sy, sw); sx = _mm_add_ps(sx, sy); _mm_storeu_ps(&m.yx, sx); | |
sx = _mm_set1_ps(a.zx); sy = _mm_set1_ps(a.zy); sz = _mm_set1_ps(a.zz); sw = _mm_set1_ps(a.zw); sx = _mm_mul_ps(sx, vx); sy = _mm_mul_ps(sy, vy); sz = _mm_mul_ps(sz, vz); sw = _mm_mul_ps(sw, vw);sx = _mm_add_ps(sx, sz); sy = _mm_add_ps(sy, sw); sx = _mm_add_ps(sx, sy);_mm_storeu_ps(&m.zx, sx); | |
sx = _mm_set1_ps(a.wx); sy = _mm_set1_ps(a.wy); sz = _mm_set1_ps(a.wz); sw = _mm_set1_ps(a.ww); sx = _mm_mul_ps(sx, vx); sy = _mm_mul_ps(sy, vy); sz = _mm_mul_ps(sz, vz); sw = _mm_mul_ps(sw, vw);sx = _mm_add_ps(sx, sz); sy = _mm_add_ps(sy, sw); sx = _mm_add_ps(sx, sy);_mm_storeu_ps(&m.wx, sx); | |
return m; | |
} | |
auto static inline dot(vector2 const& a, vector2 const& b) -> float { return a.x * b.x + a.y * b.y; } | |
auto static inline dot(vector3 const& a, vector3 const& b) -> float { return a.x * b.x + a.y * b.y + a.z * b.z; } | |
auto static inline dot(vector4 const& a, vector4 const& b) -> float { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } | |
auto static inline cross(vector2 const& a, vector2 const& b) -> float { return a.x * b.y - a.y * b.x; } | |
auto static inline cross(vector3 const& a, vector3 const& b) -> vector3 { return {a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x}; } | |
auto static inline normalize(vector2 const& a) -> vector2 { auto l = rsqrt(dot(a, a)); return { a.x * l, a.y * l }; } | |
auto static inline normalize(vector3 const& a) -> vector3 { auto l = rsqrt(dot(a, a)); return { a.x * l, a.y * l, a.z * l }; } | |
auto static inline normalize(vector4 const& a) -> vector4 { auto l = rsqrt(dot(a, a)); return { a.x * l, a.y * l, a.z * l, a.w * l }; } | |
struct color { float r, g, b, a; }; | |
struct rectangle { float x, y, w, h; }; | |
struct transform { vector3 position, rotation, scale; }; | |
#endif // ENGINE_TYPES_H |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment