Skip to content

Instantly share code, notes, and snippets.

@nadult
Created November 3, 2013 11:59
Show Gist options
  • Save nadult/7289479 to your computer and use it in GitHub Desktop.
Save nadult/7289479 to your computer and use it in GitHub Desktop.
SSE float wrapper class. Written in 2009.
#ifndef VECLIB_SSE_F32_H
#define VECLIB_SSE_F32_H
#include "sse/base.h"
class f32x4b;
class f32x4bn;
class f32x4
{
public:
f32x4() { }
f32x4(__m128 m) :m(m) { }
f32x4(const f32x4 &rhs) :m(rhs.m) { }
f32x4(float a,float b,float c,float d) :m(_mm_set_ps(d,c,b,a)) { }
f32x4(float v) :m(_mm_set1_ps(v)) { }
const f32x4 operator=(const f32x4 &rhs) { m = rhs.m; return *this; }
operator __m128() const { return m; }
const f32x4 operator+=(f32x4 v) { m=_mm_add_ps(m,v.m); return *this; }
const f32x4 operator-=(f32x4 v) { m=_mm_sub_ps(m,v.m); return *this; }
const f32x4 operator*=(f32x4 v) { m=_mm_mul_ps(m,v.m); return *this; }
const f32x4 operator/=(f32x4 v) { m=_mm_div_ps(m,v.m); return *this; }
const f32x4 operator-() const { return _mm_sub_ps(_mm_setzero_ps(),m); }
operator f32*() { return f; }
operator const f32*() const { return f; }
union {
__m128 m;
float f[4];
};
};
class f32x4b
{
public:
f32x4b() { }
//TODO: explicit?
f32x4b(__m128 v) :m(v) { }
f32x4b(bool v) :m(_mm_set1_ps(BitCast<float, u32>(v?0xffffffff : 0))) { }
const f32x4b operator^(f32x4b v) const { return _mm_xor_ps(m,v.m); }
const f32x4b operator&&(f32x4b v) const { return _mm_and_ps(m,v.m); }
const f32x4b operator||(f32x4b v) const { return _mm_or_ps(m,v.m); }
__m128 m;
};
class f32x4bn
{
public:
const f32x4bn operator^(f32x4b v) const { return _mm_xor_ps(m,v.m); }
const f32x4b operator^(f32x4bn v) const { return _mm_xor_ps(m,v.m); }
const f32x4b operator!() const { return f32x4b(m); }
const f32x4b operator&&(f32x4b v) const { return _mm_andnot_ps(m,v.m); }
const f32x4bn operator&&(f32x4bn v) const { return _mm_or_ps(m,v.m); }
const f32x4bn operator||(f32x4bn v) const { return _mm_and_ps(m,v.m); }
const f32x4b operator||(f32x4b v) const { return _mm_or_ps(_mm_andnot_ps(m,_mm_set1_ps(BitCast<float,u32>(~0))),v.m); }
operator f32x4b() const { return _mm_andnot_ps(m,_mm_set1_ps(BitCast<float,u32>(~0))); }
const __m128 &NegM() const { return m; }
private:
f32x4bn(__m128 v) :m(v) { }
friend const f32x4bn operator!(f32x4b);
friend const f32x4bn operator^(f32x4b,f32x4bn);
// To access it you have to
// convert to f32x4b first
__m128 m;
};
inline const f32x4bn operator!(f32x4b v) { return f32x4bn(v.m); }
inline const f32x4bn operator^(f32x4b a,f32x4bn b) { return f32x4bn(_mm_xor_ps(a.m,b.NegM())); }
inline const f32x4b operator&&(f32x4b a,f32x4bn b) { return _mm_andnot_ps(b.NegM(),a.m); }
inline const f32x4b operator||(f32x4b a,f32x4bn b)
{ return _mm_or_ps(a.m, _mm_andnot_ps(b.NegM(), _mm_set1_ps(BitCast<float,u32>(~0)))); }
inline bool ForAny(f32x4b v) { return _mm_movemask_ps(v.m)?1:0; }
inline bool ForAll(f32x4b v) { return _mm_movemask_ps(v.m)==15; }
inline int ForWhich(f32x4b v) { return _mm_movemask_ps(v.m); }
inline const f32x4b operator ==(f32x4 a,f32x4 b) { return _mm_cmpeq_ps(a.m,b.m); }
inline const f32x4b operator !=(f32x4 a,f32x4 b) { return _mm_cmpneq_ps(a.m,b.m); }
inline const f32x4b operator <=(f32x4 a,f32x4 b) { return _mm_cmple_ps(a.m,b.m); }
inline const f32x4b operator >=(f32x4 a,f32x4 b) { return _mm_cmpge_ps(a.m,b.m); }
inline const f32x4b operator < (f32x4 a,f32x4 b) { return _mm_cmplt_ps(a.m,b.m); }
inline const f32x4b operator > (f32x4 a,f32x4 b) { return _mm_cmpgt_ps(a.m,b.m); }
inline const f32x4 operator+(f32x4 a,f32x4 b) { return _mm_add_ps(a.m, b.m); }
inline const f32x4 operator-(f32x4 a,f32x4 b) { return _mm_sub_ps(a.m, b.m); }
inline const f32x4 operator*(f32x4 a,f32x4 b) { return _mm_mul_ps(a.m, b.m); }
inline const f32x4 operator/(f32x4 a,f32x4 b) { return _mm_div_ps(a.m, b.m); }
inline const f32x4 Sqrt(f32x4 v) { return _mm_sqrt_ps(v.m); }
inline const f32x4 Inv(f32x4 v) { return _mm_nrrcp_ps(v.m); }
inline const f32x4 RSqrt(f32x4 v) { return _mm_nrrsqrt_ps(v.m); }
inline const f32x4 FastInv(f32x4 v) { return _mm_rcp_ps(v.m); }
inline const f32x4 FastRSqrt(f32x4 v) { return _mm_rsqrt_ps(v.m); }
inline const f32x4 Abs(f32x4 v) { return _mm_and_ps(_mm_set1_ps(BitCast<float,u32>(0x7fffffff)),v.m); }
inline const f32x4 Min(f32x4 a,f32x4 b) { return _mm_min_ps(a.m,b.m); }
inline const f32x4 Max(f32x4 a,f32x4 b) { return _mm_max_ps(a.m,b.m); }
inline int SignMask(f32x4 v) { return _mm_movemask_ps(v.m); }
inline const f32x4 Condition(f32x4b test,f32x4 v1,f32x4 v2) __attribute__((always_inline));
inline const f32x4 Condition(f32x4b test,f32x4 v1,f32x4 v2) {
return _mm_or_ps(_mm_and_ps(test.m,v1.m),_mm_andnot_ps(test.m,v2.m));
}
inline const f32x4 Condition(f32x4bn test,f32x4 v1,f32x4 v2) __attribute__((always_inline));
inline const f32x4 Condition(f32x4bn test,f32x4 v1,f32x4 v2) {
return _mm_or_ps(_mm_andnot_ps(test.NegM(),v1.m),_mm_and_ps(test.NegM(),v2.m));
}
inline const f32x4 Condition(f32x4b test,f32x4 v1) { return _mm_and_ps(test.m,v1.m); }
inline const f32x4 Condition(f32x4bn test,f32x4 v1) { return _mm_andnot_ps(test.NegM(),v1.m); }
template <>
struct ScalarInfo<f32x4> {
enum { multiplicity=4, floatingPoint=1 };
typedef f32x4b TBool;
static f32x4b ElementMask(int n) {
static f32x4b elems[4]={
_mm_set_ps(UValue(0).f,UValue(0).f,UValue(0).f,UValue(0xffffffff).f),
_mm_set_ps(UValue(0).f,UValue(0).f,UValue(0xffffffff).f,UValue(0).f),
_mm_set_ps(UValue(0).f,UValue(0xffffffff).f,UValue(0).f,UValue(0).f),
_mm_set_ps(UValue(0xffffffff).f,UValue(0).f,UValue(0).f,UValue(0).f),
};
return elems[n];
}
};
inline void Broadcast(float in, f32x4 &out) { out=_mm_load1(in); }
inline void Convert(const float in[4], f32x4 &out) { out=_mm_load4(in); }
inline void Convert(f32x4 in, float out[4]) { _mm_store4(in.m,out); }
inline void Convert(float x,float y,float z,float w,f32x4 &out) { out=_mm_set_ps(w,z,y,x); }
inline void Convert(f32x4 in,float &outX,float &outY,float &outZ,float &outW) {
_mm_store_ss(&outX,in.m);
_mm_store_ss(&outY,_mm_shuffle(1,in.m));
_mm_store_ss(&outZ,_mm_shuffle(2,in.m));
_mm_store_ss(&outW,_mm_shuffle(3,in.m));
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment