Skip to content

Instantly share code, notes, and snippets.

@jesperdj
Created June 10, 2011 17:51
Show Gist options
  • Save jesperdj/1019354 to your computer and use it in GitHub Desktop.
Save jesperdj/1019354 to your computer and use it in GitHub Desktop.
Simple vector math classes with SSE-optimized implementation (C++).
/*
* Copyright 2011 Jesper de Jong
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SANDBOX_VECMATH_H
#define SANDBOX_VECMATH_H
namespace vecmath {
template <typename Scalar> struct Vector;
template <typename Scalar> struct Point;
template <typename Scalar> struct Ray;
template <typename Scalar> class Matrix;
template <typename Scalar> class Transform;
// Vector template
template <typename Scalar>
struct Vector {
Scalar x, y, z, w;
Vector();
Vector(Scalar x_, Scalar y_, Scalar z_);
Scalar& operator[](int index);
Scalar operator[](int index) const;
Scalar length() const;
Scalar length_squared() const;
Vector<Scalar> normalize() const;
private:
// Constructor used by Matrix
Vector(Scalar x_, Scalar y_, Scalar z_, Scalar w_);
friend class Matrix<Scalar>;
};
template <typename Scalar> Vector<Scalar> operator+(const Vector<Scalar>& v1, const Vector<Scalar>& v2);
template <typename Scalar> Vector<Scalar>& operator+=(Vector<Scalar>& v1, const Vector<Scalar>& v2);
template <typename Scalar> Vector<Scalar> operator-(const Vector<Scalar>& v1, const Vector<Scalar>& v2);
template <typename Scalar> Vector<Scalar>& operator-=(Vector<Scalar>& v1, const Vector<Scalar>& v2);
template <typename Scalar> Vector<Scalar> operator-(const Vector<Scalar>& v);
template <typename Scalar> Vector<Scalar> operator*(const Vector<Scalar>& v, Scalar f);
template <typename Scalar> Vector<Scalar> operator*(Scalar f, const Vector<Scalar>& v);
template <typename Scalar> Vector<Scalar>& operator*=(Vector<Scalar>& v, Scalar f);
template <typename Scalar> Vector<Scalar> operator/(const Vector<Scalar>& v, Scalar f);
template <typename Scalar> Vector<Scalar>& operator/=(Vector<Scalar>& v, Scalar f);
template <typename Scalar> Scalar dot(const Vector<Scalar>& v1, const Vector<Scalar>& v2);
template <typename Scalar> Vector<Scalar> cross(const Vector<Scalar>& v1, const Vector<Scalar>& v2);
// Point template
template <typename Scalar>
struct Point {
Scalar x, y, z, w;
Point();
Point(Scalar x_, Scalar y_, Scalar z_);
Scalar& operator[](int index);
Scalar operator[](int index) const;
Scalar distance(const Point<Scalar>& p) const;
Scalar distance_squared(const Point<Scalar>& p) const;
};
template <typename Scalar> Point<Scalar> operator+(const Point<Scalar>& p, const Vector<Scalar>& v);
template <typename Scalar> Point<Scalar>& operator+=(Point<Scalar>& p, const Vector<Scalar>& v);
template <typename Scalar> Point<Scalar> operator-(const Point<Scalar>& p, const Vector<Scalar>& v);
template <typename Scalar> Point<Scalar>& operator-=(Point<Scalar>& p, const Vector<Scalar>& v);
template <typename Scalar> Vector<Scalar> operator-(const Point<Scalar>& p1, const Point<Scalar>& p2);
// Ray template
template <typename Scalar>
struct Ray {
Point<Scalar> origin;
Vector<Scalar> direction;
Ray(const Point<Scalar>& origin_, const Vector<Scalar>& direction_);
Point<Scalar> point_at(Scalar t) const;
};
// Matrix template
template <typename Scalar>
class Matrix {
public:
static Matrix<Scalar> identity();
static Matrix<Scalar> translate(const Vector<Scalar>& v);
static Matrix<Scalar> rotate_x(Scalar angle);
static Matrix<Scalar> rotate_y(Scalar angle);
static Matrix<Scalar> rotate_z(Scalar angle);
static Matrix<Scalar> rotate(const Vector<Scalar>& axis, Scalar angle);
static Matrix<Scalar> scale(Scalar f);
static Matrix<Scalar> scale(Scalar fx, Scalar fy, Scalar fz);
Matrix<Scalar> operator*(const Matrix<Scalar>& m) const;
Vector<Scalar> transform(const Vector<Scalar>& v) const;
Point<Scalar> transform(const Point<Scalar>& p) const;
Ray<Scalar> transform(const Ray<Scalar>& r) const;
private:
Matrix(const Vector<Scalar>& r0_, const Vector<Scalar>& r1_, const Vector<Scalar>& r2_);
Vector<Scalar> r0, r1, r2;
};
// Transform template
template <typename Scalar>
class Transform {
public:
static Transform<Scalar> identity();
static Transform<Scalar> translate(const Vector<Scalar>& v);
static Transform<Scalar> rotate_x(Scalar angle);
static Transform<Scalar> rotate_y(Scalar angle);
static Transform<Scalar> rotate_z(Scalar angle);
static Transform<Scalar> rotate(const Vector<Scalar>& axis, Scalar angle);
static Transform<Scalar> scale(Scalar f);
static Transform<Scalar> scale(Scalar fx, Scalar fy, Scalar fz);
Transform<Scalar> operator*(const Transform<Scalar>& t) const;
Transform<Scalar> inverse() const;
Vector<Scalar> transform(const Vector<Scalar>& v) const;
Point<Scalar> transform(const Point<Scalar>& p) const;
Ray<Scalar> transform(const Ray<Scalar>& r) const;
Vector<Scalar> inverse_transform(const Vector<Scalar>& v) const;
Point<Scalar> inverse_transform(const Point<Scalar>& p) const;
Ray<Scalar> inverse_transform(const Ray<Scalar>& r) const;
private:
Transform(const Matrix<Scalar>& matrix, const Matrix<Scalar>& inverse);
Matrix<Scalar> mat, inv;
};
} // namespace vecmath
#define SANDBOX_VECMATH_IMPL_INCLUDE
#include "vecmath_impl.hpp"
#undef SANDBOX_VECMATH_IMPL_INCLUDE
// Include SSE-optimized implementation unless disabled or compiling for CUDA
#if !defined(SANDBOX_NO_SSE) && !defined(__CUDACC__)
#include "vecmath_sse.hpp"
#endif
#endif // SANDBOX_VECMATH_H
/*
* Copyright 2011 Jesper de Jong
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SANDBOX_VECMATH_IMPL_H
#define SANDBOX_VECMATH_IMPL_H
#ifndef SANDBOX_VECMATH_IMPL_INCLUDE
#error "Do not include vecmath_impl.hpp directly; include vecmath.hpp instead."
#endif
#ifdef __CUDACC__
#define CUDA_DEVICE __host__ __device__
#define NDEBUG
#else
#define CUDA_DEVICE
#endif
#include <cassert>
#include <cmath>
namespace vecmath {
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar>::Vector() { }
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar>::Vector(Scalar x_, Scalar y_, Scalar z_) : x(x_), y(y_), z(z_), w(0.0) { }
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar>::Vector(Scalar x_, Scalar y_, Scalar z_, Scalar w_) : x(x_), y(y_), z(z_), w(w_) { }
template <typename Scalar>
inline CUDA_DEVICE Scalar& Vector<Scalar>::operator[](int index) {
assert(index >= 0 && index <= 3);
return (&x)[index];
}
template <typename Scalar>
inline CUDA_DEVICE Scalar Vector<Scalar>::operator[](int index) const {
assert(index >= 0 && index <= 3);
return (&x)[index];
}
template <typename Scalar>
inline CUDA_DEVICE Scalar Vector<Scalar>::length() const {
return std::sqrt(length_squared());
}
template <typename Scalar>
inline CUDA_DEVICE Scalar Vector<Scalar>::length_squared() const {
return x * x + y * y + z * z;
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> Vector<Scalar>::normalize() const {
return *this / length();
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> operator+(const Vector<Scalar>& v1, const Vector<Scalar>& v2) {
return Vector<Scalar>(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z);
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar>& operator+=(Vector<Scalar>& v1, const Vector<Scalar>& v2) {
v1.x += v2.x; v1.y += v2.y; v1.z += v2.z;
return v1;
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> operator-(const Vector<Scalar>& v1, const Vector<Scalar>& v2) {
return Vector<Scalar>(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z);
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar>& operator-=(Vector<Scalar>& v1, const Vector<Scalar>& v2) {
v1.x -= v2.x; v1.y -= v2.y; v1.z -= v2.z;
return v1;
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> operator-(const Vector<Scalar>& v) {
return Vector<Scalar>(-v.x, -v.y, -v.z);
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> operator*(const Vector<Scalar>& v, Scalar f) {
return Vector<Scalar>(v.x * f, v.y * f, v.z * f);
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> operator*(Scalar f, const Vector<Scalar>& v) {
return v * f;
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar>& operator*=(Vector<Scalar>& v, Scalar f) {
v.x *= f; v.y *= f; v.z *= f;
return v;
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> operator/(const Vector<Scalar>& v, Scalar f) {
return Vector<Scalar>(v.x / f, v.y / f, v.z / f);
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar>& operator/=(Vector<Scalar>& v, Scalar f) {
v.x /= f; v.y /= f; v.z /= f;
return v;
}
template <typename Scalar>
inline CUDA_DEVICE Scalar dot(const Vector<Scalar>& v1, const Vector<Scalar>& v2) {
return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z;
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> cross(const Vector<Scalar>& v1, const Vector<Scalar>& v2) {
return Vector<Scalar>(v1.y * v2.z - v1.z * v2.y, v1.z * v2.x - v1.x * v2.z, v1.x * v2.y - v1.y * v2.x);
}
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar>::Point() { }
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar>::Point(Scalar x_, Scalar y_, Scalar z_) : x(x_), y(y_), z(z_), w(1.0) { }
template <typename Scalar>
inline CUDA_DEVICE Scalar& Point<Scalar>::operator[](int index) {
assert(index >= 0 && index <= 3);
return (&x)[index];
}
template <typename Scalar>
inline CUDA_DEVICE Scalar Point<Scalar>::operator[](int index) const {
assert(index >= 0 && index <= 3);
return (&x)[index];
}
template <typename Scalar>
inline CUDA_DEVICE Scalar Point<Scalar>::distance(const Point<Scalar>& p) const {
return (*this - p).length();
}
template <typename Scalar>
inline CUDA_DEVICE Scalar Point<Scalar>::distance_squared(const Point<Scalar>& p) const {
return (*this - p).length_squared();
}
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar> operator+(const Point<Scalar>& p, const Vector<Scalar>& v) {
return Point<Scalar>(p.x + v.x, p.y + v.y, p.z + v.z);
}
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar>& operator+=(Point<Scalar>& p, const Vector<Scalar>& v) {
p.x += v.x; p.y += v.y; p.z += v.z;
return p;
}
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar> operator-(const Point<Scalar>& p, const Vector<Scalar>& v) {
return Point<Scalar>(p.x - v.x, p.y - v.y, p.z - v.z);
}
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar>& operator-=(Point<Scalar>& p, const Vector<Scalar>& v) {
p.x -= v.x; p.y -= v.y; p.z -= v.z;
return p;
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> operator-(const Point<Scalar>& p1, const Point<Scalar>& p2) {
return Vector<Scalar>(p1.x - p2.x, p1.y - p2.y, p1.z - p2.z);
}
template <typename Scalar>
inline CUDA_DEVICE Ray<Scalar>::Ray(const Point<Scalar>& origin_, const Vector<Scalar>& direction_) : origin(origin_), direction(direction_) { }
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar> Ray<Scalar>::point_at(Scalar t) const {
return origin + direction * t;
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::identity() {
return Matrix<Scalar>(
Vector<Scalar>(1.0, 0.0, 0.0, 0.0),
Vector<Scalar>(0.0, 1.0, 0.0, 0.0),
Vector<Scalar>(0.0, 0.0, 1.0, 0.0));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::translate(const Vector<Scalar>& v) {
return Matrix<Scalar>(
Vector<Scalar>(1.0, 0.0, 0.0, v.x),
Vector<Scalar>(0.0, 1.0, 0.0, v.y),
Vector<Scalar>(0.0, 0.0, 1.0, v.z));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::rotate_x(Scalar angle) {
Scalar c = std::cos(angle);
Scalar s = std::sin(angle);
return Matrix<Scalar>(
Vector<Scalar>(1.0, 0.0, 0.0, 0.0),
Vector<Scalar>(0.0, c, -s, 0.0),
Vector<Scalar>(0.0, s, c, 0.0));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::rotate_y(Scalar angle) {
Scalar c = std::cos(angle);
Scalar s = std::sin(angle);
return Matrix<Scalar>(
Vector<Scalar>( c, 0.0, s, 0.0),
Vector<Scalar>(0.0, 1.0, 0.0, 0.0),
Vector<Scalar>( -s, 0.0, c, 0.0));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::rotate_z(Scalar angle) {
Scalar c = std::cos(angle);
Scalar s = std::sin(angle);
return Matrix<Scalar>(
Vector<Scalar>( c, -s, 0.0, 0.0),
Vector<Scalar>( s, c, 0.0, 0.0),
Vector<Scalar>(0.0, 0.0, 1.0, 0.0));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::rotate(const Vector<Scalar>& axis, Scalar angle) {
Vector<Scalar> a = axis.normalize();
Scalar c = std::cos(angle);
Scalar s = std::sin(angle);
Scalar cc = 1.0 - c;
Scalar t1 = a.x * a.y * cc;
Scalar t2 = a.x * a.z * cc;
Scalar t3 = a.y * a.z * cc;
Scalar u1 = a.x * s;
Scalar u2 = a.y * s;
Scalar u3 = a.z * s;
return Matrix<Scalar>(
Vector<Scalar>(a.x * a.x * cc + c, t1 - u3, t2 + u2, 0.0),
Vector<Scalar>(t1 + u3, a.y * a.y * cc + c, t3 - u1, 0.0),
Vector<Scalar>(t2 - u2, t3 + u1, a.z * a.z * cc + c, 0.0));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::scale(Scalar f) {
return Matrix<Scalar>(
Vector<Scalar>( f, 0.0, 0.0, 0.0),
Vector<Scalar>(0.0, f, 0.0, 0.0),
Vector<Scalar>(0.0, 0.0, f, 0.0));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::scale(Scalar fx, Scalar fy, Scalar fz) {
return Matrix<Scalar>(
Vector<Scalar>( fx, 0.0, 0.0, 0.0),
Vector<Scalar>(0.0, fy, 0.0, 0.0),
Vector<Scalar>(0.0, 0.0, fz, 0.0));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::operator*(const Matrix<Scalar>& m) const {
Vector<Scalar> r0_(
r0.x * m.r0.x + r0.y * m.r1.x + r0.z * m.r2.x,
r0.x * m.r0.y + r0.y * m.r1.y + r0.z * m.r2.y,
r0.x * m.r0.z + r0.y * m.r1.z + r0.z * m.r2.z,
r0.x * m.r0.w + r0.y * m.r1.w + r0.z * m.r2.w + r0.w);
Vector<Scalar> r1_(
r1.x * m.r0.x + r1.y * m.r1.x + r1.z * m.r2.x,
r1.x * m.r0.y + r1.y * m.r1.y + r1.z * m.r2.y,
r1.x * m.r0.z + r1.y * m.r1.z + r1.z * m.r2.z,
r1.x * m.r0.w + r1.y * m.r1.w + r1.z * m.r2.w + r1.w);
Vector<Scalar> r2_(
r2.x * m.r0.x + r2.y * m.r1.x + r2.z * m.r2.x,
r2.x * m.r0.y + r2.y * m.r1.y + r2.z * m.r2.y,
r2.x * m.r0.z + r2.y * m.r1.z + r2.z * m.r2.z,
r2.x * m.r0.w + r2.y * m.r1.w + r2.z * m.r2.w + r2.w);
return Matrix<Scalar>(r0_, r1_, r2_);
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> Matrix<Scalar>::transform(const Vector<Scalar>& v) const {
return Vector<Scalar>(dot(r0, v), dot(r1, v), dot(r2, v));
}
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar> Matrix<Scalar>::transform(const Point<Scalar>& p) const {
return Point<Scalar>(
r0.x * p.x + r0.y * p.y + r0.z * p.z + r0.w,
r1.x * p.x + r1.y * p.y + r1.z * p.z + r1.w,
r2.x * p.x + r2.y * p.y + r2.z * p.z + r2.w);
}
template <typename Scalar>
inline CUDA_DEVICE Ray<Scalar> Matrix<Scalar>::transform(const Ray<Scalar>& r) const {
return Ray<Scalar>(transform(r.origin), transform(r.direction));
}
template <typename Scalar>
inline CUDA_DEVICE Matrix<Scalar>::Matrix(const Vector<Scalar>& r0_, const Vector<Scalar>& r1_, const Vector<Scalar>& r2_) : r0(r0_), r1(r1_), r2(r2_) { }
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::identity() {
Matrix<Scalar> id = Matrix<Scalar>::identity();
return Transform<Scalar>(id, id);
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::translate(const Vector<Scalar>& v) {
return Transform<Scalar>(Matrix<Scalar>::translate(v), Matrix<Scalar>::translate(-v));
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::rotate_x(Scalar angle) {
return Transform<Scalar>(Matrix<Scalar>::rotate_x(angle), Matrix<Scalar>::rotate_x(-angle));
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::rotate_y(Scalar angle) {
return Transform<Scalar>(Matrix<Scalar>::rotate_y(angle), Matrix<Scalar>::rotate_y(-angle));
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::rotate_z(Scalar angle) {
return Transform<Scalar>(Matrix<Scalar>::rotate_z(angle), Matrix<Scalar>::rotate_z(-angle));
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::rotate(const Vector<Scalar>& axis, Scalar angle) {
return Transform<Scalar>(Matrix<Scalar>::rotate(axis, angle), Matrix<Scalar>::rotate(axis, -angle));
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::scale(Scalar f) {
return Transform<Scalar>(Matrix<Scalar>::scale(f), Matrix<Scalar>::scale(1.0 / f));
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::scale(Scalar fx, Scalar fy, Scalar fz) {
return Transform<Scalar>(Matrix<Scalar>::scale(fx, fy, fz), Matrix<Scalar>::scale(1.0 / fx, 1.0 / fy, 1.0 / fz));
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::operator*(const Transform<Scalar>& t) const {
return Transform<Scalar>(mat * t.mat, t.inv * inv);
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::inverse() const {
return Transform<Scalar>(inv, mat);
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> Transform<Scalar>::transform(const Vector<Scalar>& v) const {
return mat.transform(v);
}
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar> Transform<Scalar>::transform(const Point<Scalar>& p) const {
return mat.transform(p);
}
template <typename Scalar>
inline CUDA_DEVICE Ray<Scalar> Transform<Scalar>::transform(const Ray<Scalar>& r) const {
return mat.transform(r);
}
template <typename Scalar>
inline CUDA_DEVICE Vector<Scalar> Transform<Scalar>::inverse_transform(const Vector<Scalar>& v) const {
return inv.transform(v);
}
template <typename Scalar>
inline CUDA_DEVICE Point<Scalar> Transform<Scalar>::inverse_transform(const Point<Scalar>& p) const {
return inv.transform(p);
}
template <typename Scalar>
inline CUDA_DEVICE Ray<Scalar> Transform<Scalar>::inverse_transform(const Ray<Scalar>& r) const {
return inv.transform(r);
}
template <typename Scalar>
inline CUDA_DEVICE Transform<Scalar>::Transform(const Matrix<Scalar>& matrix, const Matrix<Scalar>& inverse) : mat(matrix), inv(inverse) { }
} // namespace vecmath
#endif // SANDBOX_VECMATH_IMPL_H
/*
* Copyright 2011 Jesper de Jong
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SANDBOX_VECMATH_SSE_H
#define SANDBOX_VECMATH_SSE_H
// Include SSE intrinsics
#include <xmmintrin.h>
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#ifdef __SSE3__
// SSE3 includes the haddps (horizontal add) instruction
#include <pmmintrin.h>
#endif
#ifdef __SSE4_1__
// SSE4.1 includes the dpps (dot product) instruction
#include <smmintrin.h>
#endif
namespace vecmath {
// Vector template
template <>
struct Vector<float> {
union {
struct { float x, y, z, w; };
__m128 v;
};
Vector();
Vector(float x_, float y_, float z_);
explicit Vector(__m128 v_);
Vector(const Vector<float>& vector);
Vector<float>& operator=(const Vector<float>& vector);
float& operator[](int index);
float operator[](int index) const;
float length() const;
float length_squared() const;
Vector<float> normalize() const;
private:
// Constructor used by Matrix
Vector(float x_, float y_, float z_, float w_);
friend class Matrix<float>;
};
// Point template
template <>
struct Point<float> {
union {
struct { float x, y, z, w; };
__m128 v;
};
Point();
Point(float x_, float y_, float z_);
explicit Point(__m128 v_);
Point(const Point<float>& point);
Point<float>& operator=(const Point<float>& point);
float& operator[](int index);
float operator[](int index) const;
float distance(const Point<float>& p) const;
float distance_squared(const Point<float>& p) const;
};
// Matrix template
template <>
class Matrix<float> {
public:
Matrix(const Matrix<float>& m);
Matrix<float>& operator=(const Matrix<float>& m);
static Matrix<float> identity();
static Matrix<float> translate(const Vector<float>& v);
static Matrix<float> rotate_x(float angle);
static Matrix<float> rotate_y(float angle);
static Matrix<float> rotate_z(float angle);
static Matrix<float> rotate(const Vector<float>& axis, float angle);
static Matrix<float> scale(float f);
static Matrix<float> scale(float fx, float fy, float fz);
Matrix<float> operator*(const Matrix<float>& m) const;
Vector<float> transform(const Vector<float>& v) const;
Point<float> transform(const Point<float>& p) const;
Ray<float> transform(const Ray<float>& r) const;
private:
Matrix(const Vector<float>& r0_, const Vector<float>& r1_, const Vector<float>& r2_);
Vector<float> r0, r1, r2;
};
} // namespace vecmath
#define SANDBOX_VECMATH_SSE_IMPL_INCLUDE
#include "vecmath_sse_impl.hpp"
#undef SANDBOX_VECMATH_SSE_IMPL_INCLUDE
#endif // SANDBOX_VECMATH_SSE_H
/*
* Copyright 2011 Jesper de Jong
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SANDBOX_VECMATH_SSE_IMPL_H
#define SANDBOX_VECMATH_SSE_IMPL_H
#ifndef SANDBOX_VECMATH_SSE_IMPL_INCLUDE
#error "Do not include vecmath_sse_impl.hpp directly; include vecmath_sse.hpp instead."
#endif
#include <cassert>
#include <cmath>
namespace vecmath {
// Compute dot product and return it in all four components of an XMM register
inline __m128 sse_dot4_ps(__m128 a, __m128 b) {
#ifdef __SSE4_1__
__m128 dp = _mm_dp_ps(a, b, 0xff);
#elif defined(__SSE3__)
__m128 t1 = _mm_mul_ps(a, b);
__m128 t2 = _mm_hadd_ps(t1, t1);
__m128 dp = _mm_hadd_ps(t2, t2);
#else
__m128 t1 = _mm_mul_ps(a, b);
__m128 t2 = _mm_shuffle_ps(t1, t1, 0x93);
__m128 t3 = _mm_add_ps(t1, t2);
__m128 t4 = _mm_shuffle_ps(t3, t3, 0x4e);
__m128 dp = _mm_add_ps(t3, t4);
#endif
return dp;
}
// Compute dot product
inline float sse_dot1_ps(__m128 a, __m128 b) {
return _mm_cvtss_f32(sse_dot4_ps(a, b));
}
inline Vector<float>::Vector() { }
inline Vector<float>::Vector(float x_, float y_, float z_) : v(_mm_setr_ps(x_, y_, z_, 0.0f)) { }
inline Vector<float>::Vector(float x_, float y_, float z_, float w_) : v(_mm_setr_ps(x_, y_, z_, w_)) { }
inline Vector<float>::Vector(__m128 v_) : v(v_) { }
inline Vector<float>::Vector(const Vector<float>& vector) : v(vector.v) { }
inline Vector<float>& Vector<float>::operator=(const Vector<float>& vector) {
v = vector.v;
return *this;
}
inline float& Vector<float>::operator[](int index) {
assert(index >= 0 && index <= 3);
return (&x)[index];
}
inline float Vector<float>::operator[](int index) const {
assert(index >= 0 && index <= 3);
return (&x)[index];
}
inline float Vector<float>::length() const {
return _mm_cvtss_f32(_mm_sqrt_ss(sse_dot4_ps(v, v)));
}
inline float Vector<float>::length_squared() const {
return sse_dot1_ps(v, v);
}
inline Vector<float> Vector<float>::normalize() const {
return Vector<float>(_mm_div_ps(v, _mm_sqrt_ps(sse_dot4_ps(v, v))));
}
inline Vector<float> operator+(const Vector<float>& v1, const Vector<float>& v2) {
return Vector<float>(_mm_add_ps(v1.v, v2.v));
}
inline Vector<float>& operator+=(Vector<float>& v1, const Vector<float>& v2) {
v1.v = _mm_add_ps(v1.v, v2.v);
return v1;
}
inline Vector<float> operator-(const Vector<float>& v1, const Vector<float>& v2) {
return Vector<float>(_mm_sub_ps(v1.v, v2.v));
}
inline Vector<float>& operator-=(Vector<float>& v1, const Vector<float>& v2) {
v1.v = _mm_sub_ps(v1.v, v2.v);
return v1;
}
inline Vector<float> operator-(const Vector<float>& v) {
return Vector<float>(_mm_sub_ps(_mm_setzero_ps(), v.v));
}
inline Vector<float> operator*(const Vector<float>& v, float f) {
return Vector<float>(_mm_mul_ps(v.v, _mm_set1_ps(f)));
}
inline Vector<float> operator*(float f, const Vector<float>& v) {
return v * f;
}
inline Vector<float>& operator*=(Vector<float>& v, float f) {
v.v = _mm_mul_ps(v.v, _mm_set1_ps(f));
return v;
}
inline Vector<float> operator/(const Vector<float>& v, float f) {
return Vector<float>(_mm_div_ps(v.v, _mm_set1_ps(f)));
}
inline Vector<float>& operator/=(Vector<float>& v, float f) {
v.v = _mm_div_ps(v.v, _mm_set1_ps(f));
return v;
}
inline float dot(const Vector<float>& v1, const Vector<float>& v2) {
return sse_dot1_ps(v1.v, v2.v);
}
inline Vector<float> cross(const Vector<float>& v1, const Vector<float>& v2) {
__m128 t1 = _mm_shuffle_ps(v1.v, v1.v, 0xc9);
__m128 t2 = _mm_shuffle_ps(v1.v, v1.v, 0xd2);
__m128 t3 = _mm_shuffle_ps(v2.v, v2.v, 0xd2);
__m128 t4 = _mm_shuffle_ps(v2.v, v2.v, 0xc9);
__m128 t5 = _mm_mul_ps(t1, t3);
__m128 t6 = _mm_mul_ps(t2, t4);
return Vector<float>(_mm_sub_ps(t5, t6));
}
inline Point<float>::Point() { }
inline Point<float>::Point(float x_, float y_, float z_) : v(_mm_setr_ps(x_, y_, z_, 1.0f)) { }
inline Point<float>::Point(__m128 v_) : v(v_) { }
inline Point<float>::Point(const Point<float>& point) : v(point.v) { }
inline Point<float>& Point<float>::operator=(const Point<float>& point) {
v = point.v;
return *this;
}
inline float& Point<float>::operator[](int index) {
assert(index >= 0 && index <= 3);
return (&x)[index];
}
inline float Point<float>::operator[](int index) const {
assert(index >= 0 && index <= 3);
return (&x)[index];
}
inline float Point<float>::distance(const Point<float>& p) const {
return (*this - p).length();
}
inline float Point<float>::distance_squared(const Point<float>& p) const {
return (*this - p).length_squared();
}
inline Point<float> operator+(const Point<float>& p, const Vector<float>& v) {
return Point<float>(_mm_add_ps(p.v, v.v));
}
inline Point<float>& operator+=(Point<float>& p, const Vector<float>& v) {
p.v = _mm_add_ps(p.v, v.v);
return p;
}
inline Point<float> operator-(const Point<float>& p, const Vector<float>& v) {
return Point<float>(_mm_sub_ps(p.v, v.v));
}
inline Point<float>& operator-=(Point<float>& p, const Vector<float>& v) {
p.v = _mm_sub_ps(p.v, v.v);
return p;
}
inline Vector<float> operator-(const Point<float>& p1, const Point<float>& p2) {
return Vector<float>(_mm_sub_ps(p1.v, p2.v));
}
inline Matrix<float>::Matrix(const Matrix<float>& m) : r0(m.r0), r1(m.r1), r2(m.r2) { }
inline Matrix<float>& Matrix<float>::operator=(const Matrix<float>& m) {
r0 = m.r0;
r1 = m.r1;
r2 = m.r2;
return *this;
}
inline Matrix<float> Matrix<float>::identity() {
return Matrix<float>(
Vector<float>(1.0f, 0.0f, 0.0f, 0.0f),
Vector<float>(0.0f, 1.0f, 0.0f, 0.0f),
Vector<float>(0.0f, 0.0f, 1.0f, 0.0f));
}
inline Matrix<float> Matrix<float>::translate(const Vector<float>& v) {
return Matrix<float>(
Vector<float>(1.0f, 0.0f, 0.0f, v.x),
Vector<float>(0.0f, 1.0f, 0.0f, v.y),
Vector<float>(0.0f, 0.0f, 1.0f, v.z));
}
inline Matrix<float> Matrix<float>::rotate_x(float angle) {
float c = std::cos(angle);
float s = std::sin(angle);
return Matrix<float>(
Vector<float>(1.0f, 0.0f, 0.0f, 0.0f),
Vector<float>(0.0f, c, -s, 0.0f),
Vector<float>(0.0f, s, c, 0.0f));
}
inline Matrix<float> Matrix<float>::rotate_y(float angle) {
float c = std::cos(angle);
float s = std::sin(angle);
return Matrix<float>(
Vector<float>( c, 0.0f, s, 0.0f),
Vector<float>(0.0f, 1.0f, 0.0f, 0.0f),
Vector<float>( -s, 0.0f, c, 0.0f));
}
inline Matrix<float> Matrix<float>::rotate_z(float angle) {
float c = std::cos(angle);
float s = std::sin(angle);
return Matrix<float>(
Vector<float>( c, -s, 0.0f, 0.0f),
Vector<float>( s, c, 0.0f, 0.0f),
Vector<float>(0.0f, 0.0f, 1.0f, 0.0f));
}
inline Matrix<float> Matrix<float>::rotate(const Vector<float>& axis, float angle) {
Vector<float> a = axis.normalize();
float c = std::cos(angle);
float s = std::sin(angle);
float cc = 1.0f - c;
float t1 = a.x * a.y * cc;
float t2 = a.x * a.z * cc;
float t3 = a.y * a.z * cc;
float u1 = a.x * s;
float u2 = a.y * s;
float u3 = a.z * s;
return Matrix<float>(
Vector<float>(a.x * a.x * cc + c, t1 - u3, t2 + u2, 0.0f),
Vector<float>(t1 + u3, a.y * a.y * cc + c, t3 - u1, 0.0f),
Vector<float>(t2 - u2, t3 + u1, a.z * a.z * cc + c, 0.0f));
}
inline Matrix<float> Matrix<float>::scale(float f) {
return Matrix<float>(
Vector<float>( f, 0.0f, 0.0f, 0.0f),
Vector<float>(0.0f, f, 0.0f, 0.0f),
Vector<float>(0.0f, 0.0f, f, 0.0f));
}
inline Matrix<float> Matrix<float>::scale(float fx, float fy, float fz) {
return Matrix<float>(
Vector<float>( fx, 0.0f, 0.0f, 0.0f),
Vector<float>(0.0f, fy, 0.0f, 0.0f),
Vector<float>(0.0f, 0.0f, fz, 0.0f));
}
inline Matrix<float> Matrix<float>::operator*(const Matrix<float>& m) const {
__m128 r3 = _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f);
__m128 t0 = _mm_unpacklo_ps(m.r0.v, m.r1.v);
__m128 t1 = _mm_unpacklo_ps(m.r2.v, r3);
__m128 t2 = _mm_unpackhi_ps(m.r0.v, m.r1.v);
__m128 t3 = _mm_unpackhi_ps(m.r2.v, r3);
__m128 tr0 = _mm_movelh_ps(t0, t1);
__m128 tr1 = _mm_movehl_ps(t1, t0);
__m128 tr2 = _mm_movelh_ps(t2, t3);
__m128 tr3 = _mm_movehl_ps(t3, t2);
return Matrix<float>(
Vector<float>(sse_dot1_ps(r0.v, tr0), sse_dot1_ps(r0.v, tr1), sse_dot1_ps(r0.v, tr2), sse_dot1_ps(r0.v, tr3)),
Vector<float>(sse_dot1_ps(r1.v, tr0), sse_dot1_ps(r1.v, tr1), sse_dot1_ps(r1.v, tr2), sse_dot1_ps(r1.v, tr3)),
Vector<float>(sse_dot1_ps(r2.v, tr0), sse_dot1_ps(r2.v, tr1), sse_dot1_ps(r2.v, tr2), sse_dot1_ps(r2.v, tr3)));
}
inline Vector<float> Matrix<float>::transform(const Vector<float>& v) const {
return Vector<float>(sse_dot1_ps(r0.v, v.v), sse_dot1_ps(r1.v, v.v), sse_dot1_ps(r2.v, v.v));
}
inline Point<float> Matrix<float>::transform(const Point<float>& p) const {
return Point<float>(sse_dot1_ps(r0.v, p.v), sse_dot1_ps(r1.v, p.v), sse_dot1_ps(r2.v, p.v));
}
inline Ray<float> Matrix<float>::transform(const Ray<float>& r) const {
return Ray<float>(transform(r.origin), transform(r.direction));
}
inline Matrix<float>::Matrix(const Vector<float>& r0_, const Vector<float>& r1_, const Vector<float>& r2_) : r0(r0_), r1(r1_), r2(r2_) { }
} // namespace vecmath
#endif // SANDBOX_VECMATH_SSE_IMPL_H
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment