Created
June 10, 2011 17:51
-
-
Save jesperdj/1019354 to your computer and use it in GitHub Desktop.
Simple vector math classes with SSE-optimized implementation (C++).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright 2011 Jesper de Jong | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
#ifndef SANDBOX_VECMATH_H | |
#define SANDBOX_VECMATH_H | |
namespace vecmath { | |
template <typename Scalar> struct Vector; | |
template <typename Scalar> struct Point; | |
template <typename Scalar> struct Ray; | |
template <typename Scalar> class Matrix; | |
template <typename Scalar> class Transform; | |
// Vector template | |
template <typename Scalar> | |
struct Vector { | |
Scalar x, y, z, w; | |
Vector(); | |
Vector(Scalar x_, Scalar y_, Scalar z_); | |
Scalar& operator[](int index); | |
Scalar operator[](int index) const; | |
Scalar length() const; | |
Scalar length_squared() const; | |
Vector<Scalar> normalize() const; | |
private: | |
// Constructor used by Matrix | |
Vector(Scalar x_, Scalar y_, Scalar z_, Scalar w_); | |
friend class Matrix<Scalar>; | |
}; | |
template <typename Scalar> Vector<Scalar> operator+(const Vector<Scalar>& v1, const Vector<Scalar>& v2); | |
template <typename Scalar> Vector<Scalar>& operator+=(Vector<Scalar>& v1, const Vector<Scalar>& v2); | |
template <typename Scalar> Vector<Scalar> operator-(const Vector<Scalar>& v1, const Vector<Scalar>& v2); | |
template <typename Scalar> Vector<Scalar>& operator-=(Vector<Scalar>& v1, const Vector<Scalar>& v2); | |
template <typename Scalar> Vector<Scalar> operator-(const Vector<Scalar>& v); | |
template <typename Scalar> Vector<Scalar> operator*(const Vector<Scalar>& v, Scalar f); | |
template <typename Scalar> Vector<Scalar> operator*(Scalar f, const Vector<Scalar>& v); | |
template <typename Scalar> Vector<Scalar>& operator*=(Vector<Scalar>& v, Scalar f); | |
template <typename Scalar> Vector<Scalar> operator/(const Vector<Scalar>& v, Scalar f); | |
template <typename Scalar> Vector<Scalar>& operator/=(Vector<Scalar>& v, Scalar f); | |
template <typename Scalar> Scalar dot(const Vector<Scalar>& v1, const Vector<Scalar>& v2); | |
template <typename Scalar> Vector<Scalar> cross(const Vector<Scalar>& v1, const Vector<Scalar>& v2); | |
// Point template | |
template <typename Scalar> | |
struct Point { | |
Scalar x, y, z, w; | |
Point(); | |
Point(Scalar x_, Scalar y_, Scalar z_); | |
Scalar& operator[](int index); | |
Scalar operator[](int index) const; | |
Scalar distance(const Point<Scalar>& p) const; | |
Scalar distance_squared(const Point<Scalar>& p) const; | |
}; | |
template <typename Scalar> Point<Scalar> operator+(const Point<Scalar>& p, const Vector<Scalar>& v); | |
template <typename Scalar> Point<Scalar>& operator+=(Point<Scalar>& p, const Vector<Scalar>& v); | |
template <typename Scalar> Point<Scalar> operator-(const Point<Scalar>& p, const Vector<Scalar>& v); | |
template <typename Scalar> Point<Scalar>& operator-=(Point<Scalar>& p, const Vector<Scalar>& v); | |
template <typename Scalar> Vector<Scalar> operator-(const Point<Scalar>& p1, const Point<Scalar>& p2); | |
// Ray template | |
template <typename Scalar> | |
struct Ray { | |
Point<Scalar> origin; | |
Vector<Scalar> direction; | |
Ray(const Point<Scalar>& origin_, const Vector<Scalar>& direction_); | |
Point<Scalar> point_at(Scalar t) const; | |
}; | |
// Matrix template | |
template <typename Scalar> | |
class Matrix { | |
public: | |
static Matrix<Scalar> identity(); | |
static Matrix<Scalar> translate(const Vector<Scalar>& v); | |
static Matrix<Scalar> rotate_x(Scalar angle); | |
static Matrix<Scalar> rotate_y(Scalar angle); | |
static Matrix<Scalar> rotate_z(Scalar angle); | |
static Matrix<Scalar> rotate(const Vector<Scalar>& axis, Scalar angle); | |
static Matrix<Scalar> scale(Scalar f); | |
static Matrix<Scalar> scale(Scalar fx, Scalar fy, Scalar fz); | |
Matrix<Scalar> operator*(const Matrix<Scalar>& m) const; | |
Vector<Scalar> transform(const Vector<Scalar>& v) const; | |
Point<Scalar> transform(const Point<Scalar>& p) const; | |
Ray<Scalar> transform(const Ray<Scalar>& r) const; | |
private: | |
Matrix(const Vector<Scalar>& r0_, const Vector<Scalar>& r1_, const Vector<Scalar>& r2_); | |
Vector<Scalar> r0, r1, r2; | |
}; | |
// Transform template | |
template <typename Scalar> | |
class Transform { | |
public: | |
static Transform<Scalar> identity(); | |
static Transform<Scalar> translate(const Vector<Scalar>& v); | |
static Transform<Scalar> rotate_x(Scalar angle); | |
static Transform<Scalar> rotate_y(Scalar angle); | |
static Transform<Scalar> rotate_z(Scalar angle); | |
static Transform<Scalar> rotate(const Vector<Scalar>& axis, Scalar angle); | |
static Transform<Scalar> scale(Scalar f); | |
static Transform<Scalar> scale(Scalar fx, Scalar fy, Scalar fz); | |
Transform<Scalar> operator*(const Transform<Scalar>& t) const; | |
Transform<Scalar> inverse() const; | |
Vector<Scalar> transform(const Vector<Scalar>& v) const; | |
Point<Scalar> transform(const Point<Scalar>& p) const; | |
Ray<Scalar> transform(const Ray<Scalar>& r) const; | |
Vector<Scalar> inverse_transform(const Vector<Scalar>& v) const; | |
Point<Scalar> inverse_transform(const Point<Scalar>& p) const; | |
Ray<Scalar> inverse_transform(const Ray<Scalar>& r) const; | |
private: | |
Transform(const Matrix<Scalar>& matrix, const Matrix<Scalar>& inverse); | |
Matrix<Scalar> mat, inv; | |
}; | |
} // namespace vecmath | |
#define SANDBOX_VECMATH_IMPL_INCLUDE | |
#include "vecmath_impl.hpp" | |
#undef SANDBOX_VECMATH_IMPL_INCLUDE | |
// Include SSE-optimized implementation unless disabled or compiling for CUDA | |
#if !defined(SANDBOX_NO_SSE) && !defined(__CUDACC__) | |
#include "vecmath_sse.hpp" | |
#endif | |
#endif // SANDBOX_VECMATH_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright 2011 Jesper de Jong | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
#ifndef SANDBOX_VECMATH_IMPL_H | |
#define SANDBOX_VECMATH_IMPL_H | |
#ifndef SANDBOX_VECMATH_IMPL_INCLUDE | |
#error "Do not include vecmath_impl.hpp directly; include vecmath.hpp instead." | |
#endif | |
#ifdef __CUDACC__ | |
#define CUDA_DEVICE __host__ __device__ | |
#define NDEBUG | |
#else | |
#define CUDA_DEVICE | |
#endif | |
#include <cassert> | |
#include <cmath> | |
namespace vecmath { | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar>::Vector() { } | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar>::Vector(Scalar x_, Scalar y_, Scalar z_) : x(x_), y(y_), z(z_), w(0.0) { } | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar>::Vector(Scalar x_, Scalar y_, Scalar z_, Scalar w_) : x(x_), y(y_), z(z_), w(w_) { } | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar& Vector<Scalar>::operator[](int index) { | |
assert(index >= 0 && index <= 3); | |
return (&x)[index]; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar Vector<Scalar>::operator[](int index) const { | |
assert(index >= 0 && index <= 3); | |
return (&x)[index]; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar Vector<Scalar>::length() const { | |
return std::sqrt(length_squared()); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar Vector<Scalar>::length_squared() const { | |
return x * x + y * y + z * z; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> Vector<Scalar>::normalize() const { | |
return *this / length(); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> operator+(const Vector<Scalar>& v1, const Vector<Scalar>& v2) { | |
return Vector<Scalar>(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar>& operator+=(Vector<Scalar>& v1, const Vector<Scalar>& v2) { | |
v1.x += v2.x; v1.y += v2.y; v1.z += v2.z; | |
return v1; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> operator-(const Vector<Scalar>& v1, const Vector<Scalar>& v2) { | |
return Vector<Scalar>(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar>& operator-=(Vector<Scalar>& v1, const Vector<Scalar>& v2) { | |
v1.x -= v2.x; v1.y -= v2.y; v1.z -= v2.z; | |
return v1; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> operator-(const Vector<Scalar>& v) { | |
return Vector<Scalar>(-v.x, -v.y, -v.z); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> operator*(const Vector<Scalar>& v, Scalar f) { | |
return Vector<Scalar>(v.x * f, v.y * f, v.z * f); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> operator*(Scalar f, const Vector<Scalar>& v) { | |
return v * f; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar>& operator*=(Vector<Scalar>& v, Scalar f) { | |
v.x *= f; v.y *= f; v.z *= f; | |
return v; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> operator/(const Vector<Scalar>& v, Scalar f) { | |
return Vector<Scalar>(v.x / f, v.y / f, v.z / f); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar>& operator/=(Vector<Scalar>& v, Scalar f) { | |
v.x /= f; v.y /= f; v.z /= f; | |
return v; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar dot(const Vector<Scalar>& v1, const Vector<Scalar>& v2) { | |
return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> cross(const Vector<Scalar>& v1, const Vector<Scalar>& v2) { | |
return Vector<Scalar>(v1.y * v2.z - v1.z * v2.y, v1.z * v2.x - v1.x * v2.z, v1.x * v2.y - v1.y * v2.x); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar>::Point() { } | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar>::Point(Scalar x_, Scalar y_, Scalar z_) : x(x_), y(y_), z(z_), w(1.0) { } | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar& Point<Scalar>::operator[](int index) { | |
assert(index >= 0 && index <= 3); | |
return (&x)[index]; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar Point<Scalar>::operator[](int index) const { | |
assert(index >= 0 && index <= 3); | |
return (&x)[index]; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar Point<Scalar>::distance(const Point<Scalar>& p) const { | |
return (*this - p).length(); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Scalar Point<Scalar>::distance_squared(const Point<Scalar>& p) const { | |
return (*this - p).length_squared(); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar> operator+(const Point<Scalar>& p, const Vector<Scalar>& v) { | |
return Point<Scalar>(p.x + v.x, p.y + v.y, p.z + v.z); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar>& operator+=(Point<Scalar>& p, const Vector<Scalar>& v) { | |
p.x += v.x; p.y += v.y; p.z += v.z; | |
return p; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar> operator-(const Point<Scalar>& p, const Vector<Scalar>& v) { | |
return Point<Scalar>(p.x - v.x, p.y - v.y, p.z - v.z); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar>& operator-=(Point<Scalar>& p, const Vector<Scalar>& v) { | |
p.x -= v.x; p.y -= v.y; p.z -= v.z; | |
return p; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> operator-(const Point<Scalar>& p1, const Point<Scalar>& p2) { | |
return Vector<Scalar>(p1.x - p2.x, p1.y - p2.y, p1.z - p2.z); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Ray<Scalar>::Ray(const Point<Scalar>& origin_, const Vector<Scalar>& direction_) : origin(origin_), direction(direction_) { } | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar> Ray<Scalar>::point_at(Scalar t) const { | |
return origin + direction * t; | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::identity() { | |
return Matrix<Scalar>( | |
Vector<Scalar>(1.0, 0.0, 0.0, 0.0), | |
Vector<Scalar>(0.0, 1.0, 0.0, 0.0), | |
Vector<Scalar>(0.0, 0.0, 1.0, 0.0)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::translate(const Vector<Scalar>& v) { | |
return Matrix<Scalar>( | |
Vector<Scalar>(1.0, 0.0, 0.0, v.x), | |
Vector<Scalar>(0.0, 1.0, 0.0, v.y), | |
Vector<Scalar>(0.0, 0.0, 1.0, v.z)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::rotate_x(Scalar angle) { | |
Scalar c = std::cos(angle); | |
Scalar s = std::sin(angle); | |
return Matrix<Scalar>( | |
Vector<Scalar>(1.0, 0.0, 0.0, 0.0), | |
Vector<Scalar>(0.0, c, -s, 0.0), | |
Vector<Scalar>(0.0, s, c, 0.0)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::rotate_y(Scalar angle) { | |
Scalar c = std::cos(angle); | |
Scalar s = std::sin(angle); | |
return Matrix<Scalar>( | |
Vector<Scalar>( c, 0.0, s, 0.0), | |
Vector<Scalar>(0.0, 1.0, 0.0, 0.0), | |
Vector<Scalar>( -s, 0.0, c, 0.0)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::rotate_z(Scalar angle) { | |
Scalar c = std::cos(angle); | |
Scalar s = std::sin(angle); | |
return Matrix<Scalar>( | |
Vector<Scalar>( c, -s, 0.0, 0.0), | |
Vector<Scalar>( s, c, 0.0, 0.0), | |
Vector<Scalar>(0.0, 0.0, 1.0, 0.0)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::rotate(const Vector<Scalar>& axis, Scalar angle) { | |
Vector<Scalar> a = axis.normalize(); | |
Scalar c = std::cos(angle); | |
Scalar s = std::sin(angle); | |
Scalar cc = 1.0 - c; | |
Scalar t1 = a.x * a.y * cc; | |
Scalar t2 = a.x * a.z * cc; | |
Scalar t3 = a.y * a.z * cc; | |
Scalar u1 = a.x * s; | |
Scalar u2 = a.y * s; | |
Scalar u3 = a.z * s; | |
return Matrix<Scalar>( | |
Vector<Scalar>(a.x * a.x * cc + c, t1 - u3, t2 + u2, 0.0), | |
Vector<Scalar>(t1 + u3, a.y * a.y * cc + c, t3 - u1, 0.0), | |
Vector<Scalar>(t2 - u2, t3 + u1, a.z * a.z * cc + c, 0.0)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::scale(Scalar f) { | |
return Matrix<Scalar>( | |
Vector<Scalar>( f, 0.0, 0.0, 0.0), | |
Vector<Scalar>(0.0, f, 0.0, 0.0), | |
Vector<Scalar>(0.0, 0.0, f, 0.0)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::scale(Scalar fx, Scalar fy, Scalar fz) { | |
return Matrix<Scalar>( | |
Vector<Scalar>( fx, 0.0, 0.0, 0.0), | |
Vector<Scalar>(0.0, fy, 0.0, 0.0), | |
Vector<Scalar>(0.0, 0.0, fz, 0.0)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar> Matrix<Scalar>::operator*(const Matrix<Scalar>& m) const { | |
Vector<Scalar> r0_( | |
r0.x * m.r0.x + r0.y * m.r1.x + r0.z * m.r2.x, | |
r0.x * m.r0.y + r0.y * m.r1.y + r0.z * m.r2.y, | |
r0.x * m.r0.z + r0.y * m.r1.z + r0.z * m.r2.z, | |
r0.x * m.r0.w + r0.y * m.r1.w + r0.z * m.r2.w + r0.w); | |
Vector<Scalar> r1_( | |
r1.x * m.r0.x + r1.y * m.r1.x + r1.z * m.r2.x, | |
r1.x * m.r0.y + r1.y * m.r1.y + r1.z * m.r2.y, | |
r1.x * m.r0.z + r1.y * m.r1.z + r1.z * m.r2.z, | |
r1.x * m.r0.w + r1.y * m.r1.w + r1.z * m.r2.w + r1.w); | |
Vector<Scalar> r2_( | |
r2.x * m.r0.x + r2.y * m.r1.x + r2.z * m.r2.x, | |
r2.x * m.r0.y + r2.y * m.r1.y + r2.z * m.r2.y, | |
r2.x * m.r0.z + r2.y * m.r1.z + r2.z * m.r2.z, | |
r2.x * m.r0.w + r2.y * m.r1.w + r2.z * m.r2.w + r2.w); | |
return Matrix<Scalar>(r0_, r1_, r2_); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> Matrix<Scalar>::transform(const Vector<Scalar>& v) const { | |
return Vector<Scalar>(dot(r0, v), dot(r1, v), dot(r2, v)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar> Matrix<Scalar>::transform(const Point<Scalar>& p) const { | |
return Point<Scalar>( | |
r0.x * p.x + r0.y * p.y + r0.z * p.z + r0.w, | |
r1.x * p.x + r1.y * p.y + r1.z * p.z + r1.w, | |
r2.x * p.x + r2.y * p.y + r2.z * p.z + r2.w); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Ray<Scalar> Matrix<Scalar>::transform(const Ray<Scalar>& r) const { | |
return Ray<Scalar>(transform(r.origin), transform(r.direction)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Matrix<Scalar>::Matrix(const Vector<Scalar>& r0_, const Vector<Scalar>& r1_, const Vector<Scalar>& r2_) : r0(r0_), r1(r1_), r2(r2_) { } | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::identity() { | |
Matrix<Scalar> id = Matrix<Scalar>::identity(); | |
return Transform<Scalar>(id, id); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::translate(const Vector<Scalar>& v) { | |
return Transform<Scalar>(Matrix<Scalar>::translate(v), Matrix<Scalar>::translate(-v)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::rotate_x(Scalar angle) { | |
return Transform<Scalar>(Matrix<Scalar>::rotate_x(angle), Matrix<Scalar>::rotate_x(-angle)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::rotate_y(Scalar angle) { | |
return Transform<Scalar>(Matrix<Scalar>::rotate_y(angle), Matrix<Scalar>::rotate_y(-angle)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::rotate_z(Scalar angle) { | |
return Transform<Scalar>(Matrix<Scalar>::rotate_z(angle), Matrix<Scalar>::rotate_z(-angle)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::rotate(const Vector<Scalar>& axis, Scalar angle) { | |
return Transform<Scalar>(Matrix<Scalar>::rotate(axis, angle), Matrix<Scalar>::rotate(axis, -angle)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::scale(Scalar f) { | |
return Transform<Scalar>(Matrix<Scalar>::scale(f), Matrix<Scalar>::scale(1.0 / f)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::scale(Scalar fx, Scalar fy, Scalar fz) { | |
return Transform<Scalar>(Matrix<Scalar>::scale(fx, fy, fz), Matrix<Scalar>::scale(1.0 / fx, 1.0 / fy, 1.0 / fz)); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::operator*(const Transform<Scalar>& t) const { | |
return Transform<Scalar>(mat * t.mat, t.inv * inv); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar> Transform<Scalar>::inverse() const { | |
return Transform<Scalar>(inv, mat); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> Transform<Scalar>::transform(const Vector<Scalar>& v) const { | |
return mat.transform(v); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar> Transform<Scalar>::transform(const Point<Scalar>& p) const { | |
return mat.transform(p); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Ray<Scalar> Transform<Scalar>::transform(const Ray<Scalar>& r) const { | |
return mat.transform(r); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Vector<Scalar> Transform<Scalar>::inverse_transform(const Vector<Scalar>& v) const { | |
return inv.transform(v); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Point<Scalar> Transform<Scalar>::inverse_transform(const Point<Scalar>& p) const { | |
return inv.transform(p); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Ray<Scalar> Transform<Scalar>::inverse_transform(const Ray<Scalar>& r) const { | |
return inv.transform(r); | |
} | |
template <typename Scalar> | |
inline CUDA_DEVICE Transform<Scalar>::Transform(const Matrix<Scalar>& matrix, const Matrix<Scalar>& inverse) : mat(matrix), inv(inverse) { } | |
} // namespace vecmath | |
#endif // SANDBOX_VECMATH_IMPL_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright 2011 Jesper de Jong | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
#ifndef SANDBOX_VECMATH_SSE_H | |
#define SANDBOX_VECMATH_SSE_H | |
// Include SSE intrinsics | |
#include <xmmintrin.h> | |
#ifdef __SSE2__ | |
#include <emmintrin.h> | |
#endif | |
#ifdef __SSE3__ | |
// SSE3 includes the haddps (horizontal add) instruction | |
#include <pmmintrin.h> | |
#endif | |
#ifdef __SSE4_1__ | |
// SSE4.1 includes the dpps (dot product) instruction | |
#include <smmintrin.h> | |
#endif | |
namespace vecmath { | |
// Vector template | |
template <> | |
struct Vector<float> { | |
union { | |
struct { float x, y, z, w; }; | |
__m128 v; | |
}; | |
Vector(); | |
Vector(float x_, float y_, float z_); | |
explicit Vector(__m128 v_); | |
Vector(const Vector<float>& vector); | |
Vector<float>& operator=(const Vector<float>& vector); | |
float& operator[](int index); | |
float operator[](int index) const; | |
float length() const; | |
float length_squared() const; | |
Vector<float> normalize() const; | |
private: | |
// Constructor used by Matrix | |
Vector(float x_, float y_, float z_, float w_); | |
friend class Matrix<float>; | |
}; | |
// Point template | |
template <> | |
struct Point<float> { | |
union { | |
struct { float x, y, z, w; }; | |
__m128 v; | |
}; | |
Point(); | |
Point(float x_, float y_, float z_); | |
explicit Point(__m128 v_); | |
Point(const Point<float>& point); | |
Point<float>& operator=(const Point<float>& point); | |
float& operator[](int index); | |
float operator[](int index) const; | |
float distance(const Point<float>& p) const; | |
float distance_squared(const Point<float>& p) const; | |
}; | |
// Matrix template | |
template <> | |
class Matrix<float> { | |
public: | |
Matrix(const Matrix<float>& m); | |
Matrix<float>& operator=(const Matrix<float>& m); | |
static Matrix<float> identity(); | |
static Matrix<float> translate(const Vector<float>& v); | |
static Matrix<float> rotate_x(float angle); | |
static Matrix<float> rotate_y(float angle); | |
static Matrix<float> rotate_z(float angle); | |
static Matrix<float> rotate(const Vector<float>& axis, float angle); | |
static Matrix<float> scale(float f); | |
static Matrix<float> scale(float fx, float fy, float fz); | |
Matrix<float> operator*(const Matrix<float>& m) const; | |
Vector<float> transform(const Vector<float>& v) const; | |
Point<float> transform(const Point<float>& p) const; | |
Ray<float> transform(const Ray<float>& r) const; | |
private: | |
Matrix(const Vector<float>& r0_, const Vector<float>& r1_, const Vector<float>& r2_); | |
Vector<float> r0, r1, r2; | |
}; | |
} // namespace vecmath | |
#define SANDBOX_VECMATH_SSE_IMPL_INCLUDE | |
#include "vecmath_sse_impl.hpp" | |
#undef SANDBOX_VECMATH_SSE_IMPL_INCLUDE | |
#endif // SANDBOX_VECMATH_SSE_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright 2011 Jesper de Jong | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
#ifndef SANDBOX_VECMATH_SSE_IMPL_H | |
#define SANDBOX_VECMATH_SSE_IMPL_H | |
#ifndef SANDBOX_VECMATH_SSE_IMPL_INCLUDE | |
#error "Do not include vecmath_sse_impl.hpp directly; include vecmath_sse.hpp instead." | |
#endif | |
#include <cassert> | |
#include <cmath> | |
namespace vecmath { | |
// Compute dot product and return it in all four components of an XMM register | |
inline __m128 sse_dot4_ps(__m128 a, __m128 b) { | |
#ifdef __SSE4_1__ | |
__m128 dp = _mm_dp_ps(a, b, 0xff); | |
#elif defined(__SSE3__) | |
__m128 t1 = _mm_mul_ps(a, b); | |
__m128 t2 = _mm_hadd_ps(t1, t1); | |
__m128 dp = _mm_hadd_ps(t2, t2); | |
#else | |
__m128 t1 = _mm_mul_ps(a, b); | |
__m128 t2 = _mm_shuffle_ps(t1, t1, 0x93); | |
__m128 t3 = _mm_add_ps(t1, t2); | |
__m128 t4 = _mm_shuffle_ps(t3, t3, 0x4e); | |
__m128 dp = _mm_add_ps(t3, t4); | |
#endif | |
return dp; | |
} | |
// Compute dot product | |
inline float sse_dot1_ps(__m128 a, __m128 b) { | |
return _mm_cvtss_f32(sse_dot4_ps(a, b)); | |
} | |
inline Vector<float>::Vector() { } | |
inline Vector<float>::Vector(float x_, float y_, float z_) : v(_mm_setr_ps(x_, y_, z_, 0.0f)) { } | |
inline Vector<float>::Vector(float x_, float y_, float z_, float w_) : v(_mm_setr_ps(x_, y_, z_, w_)) { } | |
inline Vector<float>::Vector(__m128 v_) : v(v_) { } | |
inline Vector<float>::Vector(const Vector<float>& vector) : v(vector.v) { } | |
inline Vector<float>& Vector<float>::operator=(const Vector<float>& vector) { | |
v = vector.v; | |
return *this; | |
} | |
inline float& Vector<float>::operator[](int index) { | |
assert(index >= 0 && index <= 3); | |
return (&x)[index]; | |
} | |
inline float Vector<float>::operator[](int index) const { | |
assert(index >= 0 && index <= 3); | |
return (&x)[index]; | |
} | |
inline float Vector<float>::length() const { | |
return _mm_cvtss_f32(_mm_sqrt_ss(sse_dot4_ps(v, v))); | |
} | |
inline float Vector<float>::length_squared() const { | |
return sse_dot1_ps(v, v); | |
} | |
inline Vector<float> Vector<float>::normalize() const { | |
return Vector<float>(_mm_div_ps(v, _mm_sqrt_ps(sse_dot4_ps(v, v)))); | |
} | |
inline Vector<float> operator+(const Vector<float>& v1, const Vector<float>& v2) { | |
return Vector<float>(_mm_add_ps(v1.v, v2.v)); | |
} | |
inline Vector<float>& operator+=(Vector<float>& v1, const Vector<float>& v2) { | |
v1.v = _mm_add_ps(v1.v, v2.v); | |
return v1; | |
} | |
inline Vector<float> operator-(const Vector<float>& v1, const Vector<float>& v2) { | |
return Vector<float>(_mm_sub_ps(v1.v, v2.v)); | |
} | |
inline Vector<float>& operator-=(Vector<float>& v1, const Vector<float>& v2) { | |
v1.v = _mm_sub_ps(v1.v, v2.v); | |
return v1; | |
} | |
inline Vector<float> operator-(const Vector<float>& v) { | |
return Vector<float>(_mm_sub_ps(_mm_setzero_ps(), v.v)); | |
} | |
inline Vector<float> operator*(const Vector<float>& v, float f) { | |
return Vector<float>(_mm_mul_ps(v.v, _mm_set1_ps(f))); | |
} | |
inline Vector<float> operator*(float f, const Vector<float>& v) { | |
return v * f; | |
} | |
inline Vector<float>& operator*=(Vector<float>& v, float f) { | |
v.v = _mm_mul_ps(v.v, _mm_set1_ps(f)); | |
return v; | |
} | |
inline Vector<float> operator/(const Vector<float>& v, float f) { | |
return Vector<float>(_mm_div_ps(v.v, _mm_set1_ps(f))); | |
} | |
inline Vector<float>& operator/=(Vector<float>& v, float f) { | |
v.v = _mm_div_ps(v.v, _mm_set1_ps(f)); | |
return v; | |
} | |
inline float dot(const Vector<float>& v1, const Vector<float>& v2) { | |
return sse_dot1_ps(v1.v, v2.v); | |
} | |
inline Vector<float> cross(const Vector<float>& v1, const Vector<float>& v2) { | |
__m128 t1 = _mm_shuffle_ps(v1.v, v1.v, 0xc9); | |
__m128 t2 = _mm_shuffle_ps(v1.v, v1.v, 0xd2); | |
__m128 t3 = _mm_shuffle_ps(v2.v, v2.v, 0xd2); | |
__m128 t4 = _mm_shuffle_ps(v2.v, v2.v, 0xc9); | |
__m128 t5 = _mm_mul_ps(t1, t3); | |
__m128 t6 = _mm_mul_ps(t2, t4); | |
return Vector<float>(_mm_sub_ps(t5, t6)); | |
} | |
inline Point<float>::Point() { } | |
inline Point<float>::Point(float x_, float y_, float z_) : v(_mm_setr_ps(x_, y_, z_, 1.0f)) { } | |
inline Point<float>::Point(__m128 v_) : v(v_) { } | |
inline Point<float>::Point(const Point<float>& point) : v(point.v) { } | |
inline Point<float>& Point<float>::operator=(const Point<float>& point) { | |
v = point.v; | |
return *this; | |
} | |
inline float& Point<float>::operator[](int index) { | |
assert(index >= 0 && index <= 3); | |
return (&x)[index]; | |
} | |
inline float Point<float>::operator[](int index) const { | |
assert(index >= 0 && index <= 3); | |
return (&x)[index]; | |
} | |
inline float Point<float>::distance(const Point<float>& p) const { | |
return (*this - p).length(); | |
} | |
inline float Point<float>::distance_squared(const Point<float>& p) const { | |
return (*this - p).length_squared(); | |
} | |
inline Point<float> operator+(const Point<float>& p, const Vector<float>& v) { | |
return Point<float>(_mm_add_ps(p.v, v.v)); | |
} | |
inline Point<float>& operator+=(Point<float>& p, const Vector<float>& v) { | |
p.v = _mm_add_ps(p.v, v.v); | |
return p; | |
} | |
inline Point<float> operator-(const Point<float>& p, const Vector<float>& v) { | |
return Point<float>(_mm_sub_ps(p.v, v.v)); | |
} | |
inline Point<float>& operator-=(Point<float>& p, const Vector<float>& v) { | |
p.v = _mm_sub_ps(p.v, v.v); | |
return p; | |
} | |
inline Vector<float> operator-(const Point<float>& p1, const Point<float>& p2) { | |
return Vector<float>(_mm_sub_ps(p1.v, p2.v)); | |
} | |
inline Matrix<float>::Matrix(const Matrix<float>& m) : r0(m.r0), r1(m.r1), r2(m.r2) { } | |
inline Matrix<float>& Matrix<float>::operator=(const Matrix<float>& m) { | |
r0 = m.r0; | |
r1 = m.r1; | |
r2 = m.r2; | |
return *this; | |
} | |
inline Matrix<float> Matrix<float>::identity() { | |
return Matrix<float>( | |
Vector<float>(1.0f, 0.0f, 0.0f, 0.0f), | |
Vector<float>(0.0f, 1.0f, 0.0f, 0.0f), | |
Vector<float>(0.0f, 0.0f, 1.0f, 0.0f)); | |
} | |
inline Matrix<float> Matrix<float>::translate(const Vector<float>& v) { | |
return Matrix<float>( | |
Vector<float>(1.0f, 0.0f, 0.0f, v.x), | |
Vector<float>(0.0f, 1.0f, 0.0f, v.y), | |
Vector<float>(0.0f, 0.0f, 1.0f, v.z)); | |
} | |
inline Matrix<float> Matrix<float>::rotate_x(float angle) { | |
float c = std::cos(angle); | |
float s = std::sin(angle); | |
return Matrix<float>( | |
Vector<float>(1.0f, 0.0f, 0.0f, 0.0f), | |
Vector<float>(0.0f, c, -s, 0.0f), | |
Vector<float>(0.0f, s, c, 0.0f)); | |
} | |
inline Matrix<float> Matrix<float>::rotate_y(float angle) { | |
float c = std::cos(angle); | |
float s = std::sin(angle); | |
return Matrix<float>( | |
Vector<float>( c, 0.0f, s, 0.0f), | |
Vector<float>(0.0f, 1.0f, 0.0f, 0.0f), | |
Vector<float>( -s, 0.0f, c, 0.0f)); | |
} | |
inline Matrix<float> Matrix<float>::rotate_z(float angle) { | |
float c = std::cos(angle); | |
float s = std::sin(angle); | |
return Matrix<float>( | |
Vector<float>( c, -s, 0.0f, 0.0f), | |
Vector<float>( s, c, 0.0f, 0.0f), | |
Vector<float>(0.0f, 0.0f, 1.0f, 0.0f)); | |
} | |
inline Matrix<float> Matrix<float>::rotate(const Vector<float>& axis, float angle) { | |
Vector<float> a = axis.normalize(); | |
float c = std::cos(angle); | |
float s = std::sin(angle); | |
float cc = 1.0f - c; | |
float t1 = a.x * a.y * cc; | |
float t2 = a.x * a.z * cc; | |
float t3 = a.y * a.z * cc; | |
float u1 = a.x * s; | |
float u2 = a.y * s; | |
float u3 = a.z * s; | |
return Matrix<float>( | |
Vector<float>(a.x * a.x * cc + c, t1 - u3, t2 + u2, 0.0f), | |
Vector<float>(t1 + u3, a.y * a.y * cc + c, t3 - u1, 0.0f), | |
Vector<float>(t2 - u2, t3 + u1, a.z * a.z * cc + c, 0.0f)); | |
} | |
inline Matrix<float> Matrix<float>::scale(float f) { | |
return Matrix<float>( | |
Vector<float>( f, 0.0f, 0.0f, 0.0f), | |
Vector<float>(0.0f, f, 0.0f, 0.0f), | |
Vector<float>(0.0f, 0.0f, f, 0.0f)); | |
} | |
inline Matrix<float> Matrix<float>::scale(float fx, float fy, float fz) { | |
return Matrix<float>( | |
Vector<float>( fx, 0.0f, 0.0f, 0.0f), | |
Vector<float>(0.0f, fy, 0.0f, 0.0f), | |
Vector<float>(0.0f, 0.0f, fz, 0.0f)); | |
} | |
inline Matrix<float> Matrix<float>::operator*(const Matrix<float>& m) const { | |
__m128 r3 = _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f); | |
__m128 t0 = _mm_unpacklo_ps(m.r0.v, m.r1.v); | |
__m128 t1 = _mm_unpacklo_ps(m.r2.v, r3); | |
__m128 t2 = _mm_unpackhi_ps(m.r0.v, m.r1.v); | |
__m128 t3 = _mm_unpackhi_ps(m.r2.v, r3); | |
__m128 tr0 = _mm_movelh_ps(t0, t1); | |
__m128 tr1 = _mm_movehl_ps(t1, t0); | |
__m128 tr2 = _mm_movelh_ps(t2, t3); | |
__m128 tr3 = _mm_movehl_ps(t3, t2); | |
return Matrix<float>( | |
Vector<float>(sse_dot1_ps(r0.v, tr0), sse_dot1_ps(r0.v, tr1), sse_dot1_ps(r0.v, tr2), sse_dot1_ps(r0.v, tr3)), | |
Vector<float>(sse_dot1_ps(r1.v, tr0), sse_dot1_ps(r1.v, tr1), sse_dot1_ps(r1.v, tr2), sse_dot1_ps(r1.v, tr3)), | |
Vector<float>(sse_dot1_ps(r2.v, tr0), sse_dot1_ps(r2.v, tr1), sse_dot1_ps(r2.v, tr2), sse_dot1_ps(r2.v, tr3))); | |
} | |
inline Vector<float> Matrix<float>::transform(const Vector<float>& v) const { | |
return Vector<float>(sse_dot1_ps(r0.v, v.v), sse_dot1_ps(r1.v, v.v), sse_dot1_ps(r2.v, v.v)); | |
} | |
inline Point<float> Matrix<float>::transform(const Point<float>& p) const { | |
return Point<float>(sse_dot1_ps(r0.v, p.v), sse_dot1_ps(r1.v, p.v), sse_dot1_ps(r2.v, p.v)); | |
} | |
inline Ray<float> Matrix<float>::transform(const Ray<float>& r) const { | |
return Ray<float>(transform(r.origin), transform(r.direction)); | |
} | |
inline Matrix<float>::Matrix(const Vector<float>& r0_, const Vector<float>& r1_, const Vector<float>& r2_) : r0(r0_), r1(r1_), r2(r2_) { } | |
} // namespace vecmath | |
#endif // SANDBOX_VECMATH_SSE_IMPL_H |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment