Created
December 3, 2021 20:05
-
-
Save natevm/92d2cd0b654788dacbc957312192eddb to your computer and use it in GitHub Desktop.
CUDA Math Helper Functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <iostream> | |
#include <cuda_runtime.h> | |
#include <math.h> | |
#include <cstdio> | |
#include <cstdlib> | |
#ifndef __CUDACC__ | |
using namespace std; | |
#endif | |
// Enable run time assertion checking in kernel code | |
#define cudaAssert(condition) if (!(condition)) { printf("ASSERT: %s %s\n", #condition, __FILE__); } | |
#define cudaSafeCall(err) _internal_cudaSafeCall(err,__FILE__,__LINE__) | |
// Adapted from the G3D innovation engine's debugAssert.h | |
# if defined(_MSC_VER) | |
# define rawBreak() __debugbreak(); | |
# elif defined(__i386__) | |
// gcc on intel | |
# define rawBreak() __asm__ __volatile__ ( "int $3" ); | |
# else | |
// some other gcc | |
# define rawBreak() ::abort() | |
# endif | |
inline void _internal_cudaSafeCall(cudaError err, const char *file, const int line){ | |
if (cudaSuccess != err) { | |
printf("%s(%i) : cudaSafeCall() error: %s\n", file, line, cudaGetErrorString(err)); | |
rawBreak(); | |
} | |
} | |
// Cuda defines this for device only, provide missing host version. | |
#if !defined(__CUDACC__) | |
__inline__ __host__ float rsqrtf(float x) { | |
return 1.0f / sqrtf(x); | |
} | |
#endif | |
#define CUDA_UTIL_FUNC __inline__ __host__ __device__ | |
CUDA_UTIL_FUNC float2 make_float2(float x) { | |
return make_float2(x, x); | |
} | |
CUDA_UTIL_FUNC float2 make_float2(int2 v) { | |
return make_float2(int(v.x), int(v.y)); | |
} | |
CUDA_UTIL_FUNC float2 make_float2(float4 v) { | |
return make_float2(v.x, v.y); | |
} | |
CUDA_UTIL_FUNC float3 make_float3(float x) { | |
return make_float3(x, x, x); | |
} | |
CUDA_UTIL_FUNC float4 make_float4(float x) { | |
return make_float4(x, x, x, x); | |
} | |
CUDA_UTIL_FUNC float3 make_float3(float4 f) { | |
return make_float3(f.x, f.y, f.z); | |
} | |
CUDA_UTIL_FUNC float4 make_float4(float3 f, float s) { | |
return make_float4(f.x, f.y, f.z, s); | |
} | |
CUDA_UTIL_FUNC float4 make_float4(float2 f1, float2 f2) { | |
return make_float4(f1.x, f1.y, f2.x, f2.y); | |
} | |
/////////////// Scalar-wise vector add //////////////////// | |
CUDA_UTIL_FUNC float2 operator+(float2 v0, float2 v1) { | |
return make_float2(v0.x + v1.x, v0.y + v1.y); | |
} | |
CUDA_UTIL_FUNC float3 operator+(float3 v0, float3 v1) { | |
return make_float3(v0.x + v1.x, v0.y + v1.y, v0.z + v1.z); | |
} | |
CUDA_UTIL_FUNC float4 operator+(float4 v0, float4 v1) { | |
return make_float4(v0.x + v1.x, v0.y + v1.y, v0.z + v1.z, v0.w + v1.w); | |
} | |
CUDA_UTIL_FUNC float2 operator+(float2 v0, float s) { | |
return make_float2(v0.x + s, v0.y + s); | |
} | |
CUDA_UTIL_FUNC float3 operator+(float3 v0, float s) { | |
return make_float3(v0.x + s, v0.y + s, v0.z + s); | |
} | |
CUDA_UTIL_FUNC float4 operator+(float4 v0, float s) { | |
return make_float4(v0.x + s, v0.y + s, v0.z + s, v0.w + s); | |
} | |
/////////////// Scalar-wise vector subtract //////////////////// | |
CUDA_UTIL_FUNC float2 operator-(float2 v0) { | |
return make_float2(-v0.x, -v0.y); | |
} | |
CUDA_UTIL_FUNC float3 operator-(float3 v0) { | |
return make_float3(-v0.x, -v0.y, -v0.z); | |
} | |
CUDA_UTIL_FUNC float4 operator-(float4 v0) { | |
return make_float4(-v0.x, -v0.y, -v0.z, -v0.w); | |
} | |
CUDA_UTIL_FUNC float2 operator-(float2 v0, float2 v1) { | |
return make_float2(v0.x - v1.x, v0.y - v1.y); | |
} | |
CUDA_UTIL_FUNC float3 operator-(float3 v0, float3 v1) { | |
return make_float3(v0.x - v1.x, v0.y - v1.y, v0.z - v1.z); | |
} | |
CUDA_UTIL_FUNC float4 operator-(float4 v0, float4 v1) { | |
return make_float4(v0.x - v1.x, v0.y - v1.y, v0.z - v1.z, v0.w - v1.w); | |
} | |
CUDA_UTIL_FUNC float2 operator-(float2 v0, float s) { | |
return make_float2(v0.x - s, v0.y - s); | |
} | |
CUDA_UTIL_FUNC float3 operator-(float3 v0, float s) { | |
return make_float3(v0.x - s, v0.y - s, v0.z - s); | |
} | |
CUDA_UTIL_FUNC float4 operator-(float4 v0, float s) { | |
return make_float4(v0.x - s, v0.y - s, v0.z - s, v0.w - s); | |
} | |
/////////////// Scalar-wise vector multiply //////////////////// | |
CUDA_UTIL_FUNC float2 operator*(float2 v0, float2 v1) { | |
return make_float2(v0.x*v1.x, v0.y*v1.y); | |
} | |
CUDA_UTIL_FUNC float3 operator*(float3 v0, float3 v1) { | |
return make_float3(v0.x*v1.x, v0.y*v1.y, v0.z*v1.z); | |
} | |
CUDA_UTIL_FUNC float4 operator*(float4 v0, float4 v1) { | |
return make_float4(v0.x*v1.x, v0.y*v1.y, v0.z*v1.z, v0.w*v1.w); | |
} | |
/////////////// Scalar-wise vector divide //////////////////// | |
CUDA_UTIL_FUNC float2 operator/(float2 v0, float2 v1) { | |
return make_float2(v0.x / v1.x, v0.y / v1.y); | |
} | |
CUDA_UTIL_FUNC float3 operator/(float3 v0, float3 v1) { | |
return make_float3(v0.x / v1.x, v0.y / v1.y, v0.z / v1.z); | |
} | |
CUDA_UTIL_FUNC float4 operator/(float4 v0, float4 v1) { | |
return make_float4(v0.x / v1.x, v0.y / v1.y, v0.z / v1.z, v0.w / v1.w); | |
} | |
/////////////// += //////////////////// | |
CUDA_UTIL_FUNC void operator+=(float2& v0, float2 v1) { | |
v0.x += v1.x; | |
v0.y += v1.y; | |
} | |
CUDA_UTIL_FUNC void operator+=(float3& v0, float3 v1) { | |
v0.x += v1.x; | |
v0.y += v1.y; | |
v0.z += v1.z; | |
} | |
CUDA_UTIL_FUNC void operator+=(float4& v0, float4 v1) { | |
v0.x += v1.x; | |
v0.y += v1.y; | |
v0.z += v1.z; | |
v0.w += v1.w; | |
} | |
CUDA_UTIL_FUNC void operator+=(float2& v0, float x) { | |
v0.x += x; | |
v0.y += x; | |
} | |
CUDA_UTIL_FUNC void operator+=(float3& v0, float x) { | |
v0.x += x; | |
v0.y += x; | |
v0.z += x; | |
} | |
CUDA_UTIL_FUNC void operator+=(float4& v0, float x) { | |
v0.x += x; | |
v0.y += x; | |
v0.z += x; | |
v0.w += x; | |
} | |
/////////////// -= //////////////////// | |
CUDA_UTIL_FUNC void operator-=(float2& v0, float2 v1) { | |
v0.x -= v1.x; | |
v0.y -= v1.y; | |
} | |
CUDA_UTIL_FUNC void operator-=(float3& v0, float3 v1) { | |
v0.x -= v1.x; | |
v0.y -= v1.y; | |
v0.z -= v1.z; | |
} | |
CUDA_UTIL_FUNC void operator-=(float4& v0, float4 v1) { | |
v0.x -= v1.x; | |
v0.y -= v1.y; | |
v0.z -= v1.z; | |
v0.w -= v1.w; | |
} | |
CUDA_UTIL_FUNC void operator-=(float2& v0, float x) { | |
v0.x -= x; | |
v0.y -= x; | |
} | |
CUDA_UTIL_FUNC void operator-=(float3& v0, float x) { | |
v0.x -= x; | |
v0.y -= x; | |
v0.z -= x; | |
} | |
CUDA_UTIL_FUNC void operator-=(float4& v0, float x) { | |
v0.x -= x; | |
v0.y -= x; | |
v0.z -= x; | |
v0.w -= x; | |
} | |
/////////////// Multiply by a scalar //////////////////// | |
CUDA_UTIL_FUNC float2 operator*(float x, float2 v) { | |
return make_float2(v.x*x, v.y*x); | |
} | |
CUDA_UTIL_FUNC float3 operator*(float x, float3 v) { | |
return make_float3(v.x*x, v.y*x, v.z*x); | |
} | |
CUDA_UTIL_FUNC float4 operator*(float x, float4 v) { | |
return make_float4(v.x*x, v.y*x, v.z*x, v.w*x); | |
} | |
CUDA_UTIL_FUNC float2 operator*(float2 v, float x) { | |
return make_float2(v.x*x, v.y*x); | |
} | |
CUDA_UTIL_FUNC float3 operator*(float3 v, float x) { | |
return make_float3(v.x*x, v.y*x, v.z*x); | |
} | |
CUDA_UTIL_FUNC float4 operator*(float4 v, float x) { | |
return make_float4(v.x*x, v.y*x, v.z*x, v.w*x); | |
} | |
/////////////// Divide with a scalar //////////////////// | |
CUDA_UTIL_FUNC float2 operator/(float x, float2 v) { | |
return make_float2(x / v.x, x / v.y); | |
} | |
CUDA_UTIL_FUNC float3 operator/(float x, float3 v) { | |
return make_float3(x / v.x, x / v.y, x / v.z); | |
} | |
CUDA_UTIL_FUNC float4 operator/(float x, float4 v) { | |
return make_float4(x / v.x, x / v.y, x / v.z, x / v.w); | |
} | |
CUDA_UTIL_FUNC float2 operator/(float2 v, float x) { | |
return make_float2(v.x / x, v.y / x); | |
} | |
CUDA_UTIL_FUNC float3 operator/(float3 v, float x) { | |
return make_float3(v.x / x, v.y / x, v.z / x); | |
} | |
CUDA_UTIL_FUNC float4 operator/(float4 v, float x) { | |
return make_float4(v.x/x, v.y/x, v.z/x, v.w/x); | |
} | |
CUDA_UTIL_FUNC float2 min(float2 v0, float2 v1) { | |
return { | |
v0.x < v1.x ? v0.x : v1.x, | |
v0.y < v1.y ? v0.y : v1.y, | |
}; | |
} | |
CUDA_UTIL_FUNC float2 max(float2 v0, float2 v1) { | |
return { | |
v0.x > v1.x ? v0.x : v1.x, | |
v0.y > v1.y ? v0.y : v1.y, | |
}; | |
} | |
CUDA_UTIL_FUNC float3 min(float3 v0, float3 v1) { | |
return { | |
v0.x < v1.x ? v0.x : v1.x, | |
v0.y < v1.y ? v0.y : v1.y, | |
v0.z < v1.z ? v0.z : v1.z, | |
}; | |
} | |
CUDA_UTIL_FUNC float3 max(float3 v0, float3 v1) { | |
return { | |
v0.x > v1.x ? v0.x : v1.x, | |
v0.y > v1.y ? v0.y : v1.y, | |
v0.z > v1.z ? v0.z : v1.z, | |
}; | |
} | |
CUDA_UTIL_FUNC float4 max(float4 v0, float4 v1) { | |
return { | |
v0.x > v1.x ? v0.x : v1.x, | |
v0.y > v1.y ? v0.y : v1.y, | |
v0.z > v1.z ? v0.z : v1.z, | |
v0.w > v1.w ? v0.w : v1.w, | |
}; | |
} | |
CUDA_UTIL_FUNC int2 max(int2 v0, int2 v1) { | |
return { | |
v0.x > v1.x ? v0.x : v1.x, | |
v0.y > v1.y ? v0.y : v1.y, | |
}; | |
} | |
CUDA_UTIL_FUNC int2 min(int2 v0, int2 v1) { | |
return { | |
v0.x < v1.x ? v0.x : v1.x, | |
v0.y < v1.y ? v0.y : v1.y, | |
}; | |
} | |
CUDA_UTIL_FUNC float4 sqrt(float4 v) { | |
return { | |
sqrtf(v.x), | |
sqrtf(v.y), | |
sqrtf(v.z), | |
sqrtf(v.w), | |
}; | |
} | |
CUDA_UTIL_FUNC float clamp(float v, float a, float b) { | |
return min(max(v, a),b); | |
} | |
CUDA_UTIL_FUNC float2 clamp(float2 v, float2 a, float2 b) { | |
return min(max(v, a),b); | |
} | |
CUDA_UTIL_FUNC float3 clamp(float3 v, float3 a, float3 b) { | |
return min(max(v, a),b); | |
} | |
CUDA_UTIL_FUNC int2 clamp(int2 v, int2 a, int2 b) { | |
return min(max(v, a),b); | |
} | |
CUDA_UTIL_FUNC int sign(float x) | |
{ | |
int t = x < 0 ? -1 : 0; | |
return x > 0 ? 1 : t; | |
} | |
CUDA_UTIL_FUNC float3 cross(float3 a, float3 b) | |
{ | |
return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); | |
} | |
CUDA_UTIL_FUNC float dot(float2 v0, float2 v1) { | |
return v0.x*v1.x + v0.y*v1.y; | |
} | |
CUDA_UTIL_FUNC float dot(float3 v0, float3 v1) { | |
return v0.x*v1.x + v0.y*v1.y + v0.z*v1.z; | |
} | |
CUDA_UTIL_FUNC float dot(float4 v0, float4 v1) { | |
return v0.x*v1.x + v0.y*v1.y + v0.z*v1.z + v0.w*v1.w; | |
} | |
CUDA_UTIL_FUNC float dot2(float2 v0) { | |
return v0.x*v0.x + v0.y*v0.y; | |
} | |
CUDA_UTIL_FUNC float dot2(float3 v0) { | |
return v0.x*v0.x + v0.y*v0.y + v0.z*v0.z; | |
} | |
CUDA_UTIL_FUNC float dot2(float4 v0) { | |
return v0.x*v0.x + v0.y*v0.y + v0.z*v0.z + v0.w*v0.w; | |
} | |
CUDA_UTIL_FUNC float length(float2 v) { | |
return sqrtf(dot(v, v)); | |
} | |
CUDA_UTIL_FUNC float length(float3 v) { | |
return sqrtf(dot(v, v)); | |
} | |
CUDA_UTIL_FUNC float length(float4 v) { | |
return sqrtf(dot(v, v)); | |
} | |
CUDA_UTIL_FUNC float2 normalize(float2 v) { | |
return v*rsqrtf(dot(v, v)); | |
} | |
CUDA_UTIL_FUNC float3 normalize(float3 v) { | |
return v*rsqrtf(dot(v, v)); | |
} | |
CUDA_UTIL_FUNC float4 normalize(float4 v) { | |
return v*rsqrtf(dot(v, v)); | |
} | |
CUDA_UTIL_FUNC float2 floor(float2 i) { | |
return make_float2(floor(i.x), floor(i.y)); | |
} | |
CUDA_UTIL_FUNC float2 fract(float2 i) { | |
return i - floor(i); | |
} | |
CUDA_UTIL_FUNC float2 ceil(float2 i) { | |
return make_float2(ceil(i.x), ceil(i.y)); | |
} | |
// CUDA_UTIL_FUNC float3 reflect(float3 i, float3 n) { | |
// return i - 2.f * n * dot(i, n); | |
// } | |
// CUDA_UTIL_FUNC float3 refract( float3 i, float3 n, float eta ) | |
// { | |
// // if (eta == 1.f) return i; | |
// // if (eta <= 0.f) return make_float3(0.f); | |
// // if (isnan(eta)) return make_float3(0.f); | |
// // if (isinf(eta)) return make_float3(0.f); | |
// float cosi = dot(-1.f * i, n); | |
// float cost2 = 1.0f - eta * eta * (1.0f - cosi*cosi); | |
// float3 t = eta*i + ((eta*cosi - sqrtf(fabsf(cost2))) * n); | |
// return t * ((cost2 > 0.f) ? make_float3(1.f) : make_float3(0.f)); | |
// } | |
CUDA_UTIL_FUNC void fresnel(const float3 &I, const float3 &N, const float &ior, float &kr) | |
{ | |
float cosi = clamp(-1.f, 1.f, dot(I, N)); | |
float etai = 1, etat = ior; | |
if (cosi > 0) { | |
float tmp = etai; | |
etai = etat; | |
etat = tmp; | |
} | |
// Compute sini using Snell's law | |
float sint = etai / etat * sqrtf(max(0.f, 1.f - cosi * cosi)); | |
// Total internal reflection | |
if (sint >= 1) { | |
kr = 1; | |
} | |
else { | |
float cost = sqrtf(max(0.f, 1.f - sint * sint)); | |
cosi = fabsf(cosi); | |
float Rs = ((etat * cosi) - (etai * cost)) / ((etat * cosi) + (etai * cost)); | |
float Rp = ((etai * cosi) - (etat * cost)) / ((etai * cosi) + (etat * cost)); | |
kr = (Rs * Rs + Rp * Rp) / 2; | |
} | |
// As a consequence of the conservation of energy, transmittance is given by: | |
// kt = 1 - kr; | |
} | |
CUDA_UTIL_FUNC bool all_zero(const float3 &v) { | |
return v.x == 0.f && v.y == 0.f && v.z == 0.f; | |
} | |
CUDA_UTIL_FUNC bool all_zero(const float4 &v) { | |
return v.x == 0.f && v.y == 0.f && v.z == 0.f && v.w == 0.f; | |
} | |
////////////// int stuff | |
/////////////// Scalar-wise vector add //////////////////// | |
CUDA_UTIL_FUNC int2 operator+(int2 v0, int2 v1) { | |
return make_int2(v0.x + v1.x, v0.y + v1.y); | |
} | |
CUDA_UTIL_FUNC int3 operator+(int3 v0, int3 v1) { | |
return make_int3(v0.x + v1.x, v0.y + v1.y, v0.z + v1.z); | |
} | |
CUDA_UTIL_FUNC int4 operator+(int4 v0, int4 v1) { | |
return make_int4(v0.x + v1.x, v0.y + v1.y, v0.z + v1.z, v0.w + v1.w); | |
} | |
#undef CUDA_UTIL_FUNC | |
#if defined(__CUDA_ARCH__) | |
#define __CONDITIONAL_UNROLL__ #pragma unroll | |
#else | |
#define __CONDITIONAL_UNROLL__ | |
#endif | |
////////////////////////////// | |
// float2x2 | |
////////////////////////////// | |
class float2x2 | |
{ | |
public: | |
inline __device__ __host__ float2x2() | |
{ | |
} | |
inline __device__ __host__ float2x2(const float values[4]) | |
{ | |
m11 = values[0]; m12 = values[1]; | |
m21 = values[2]; m22 = values[3]; | |
} | |
inline __device__ __host__ float2x2(const float2x2& other) | |
{ | |
m11 = other.m11; m12 = other.m12; | |
m21 = other.m21; m22 = other.m22; | |
} | |
inline __device__ __host__ void setZero() | |
{ | |
m11 = 0.0f; m12 = 0.0f; | |
m21 = 0.0f; m22 = 0.0f; | |
} | |
static inline __device__ __host__ float2x2 getIdentity() | |
{ | |
float2x2 res; | |
res.setZero(); | |
res.m11 = res.m22 = 1.0f; | |
return res; | |
} | |
inline __device__ __host__ float2x2& operator=(const float2x2 &other) | |
{ | |
m11 = other.m11; m12 = other.m12; | |
m21 = other.m21; m22 = other.m22; | |
return *this; | |
} | |
inline __device__ __host__ float2x2 getInverse() | |
{ | |
float2x2 res; | |
res.m11 = m22; res.m12 = -m12; | |
res.m21 = -m21; res.m22 = m11; | |
return res*(1.0f/det()); | |
} | |
inline __device__ __host__ float det() | |
{ | |
return m11*m22-m21*m12; | |
} | |
inline __device__ __host__ float2 operator*(const float2& v) const | |
{ | |
return make_float2(m11*v.x + m12*v.y, m21*v.x + m22*v.y); | |
} | |
//! matrix scalar multiplication | |
inline __device__ __host__ float2x2 operator*(const float t) const | |
{ | |
float2x2 res; | |
res.m11 = m11 * t; res.m12 = m12 * t; | |
res.m21 = m21 * t; res.m22 = m22 * t; | |
return res; | |
} | |
//! matrix matrix multiplication | |
inline __device__ __host__ float2x2 operator*(const float2x2& other) const | |
{ | |
float2x2 res; | |
res.m11 = m11 * other.m11 + m12 * other.m21; | |
res.m12 = m11 * other.m12 + m12 * other.m22; | |
res.m21 = m21 * other.m11 + m22 * other.m21; | |
res.m22 = m21 * other.m12 + m22 * other.m22; | |
return res; | |
} | |
//! matrix matrix addition | |
inline __device__ __host__ float2x2 operator+(const float2x2& other) const | |
{ | |
float2x2 res; | |
res.m11 = m11 + other.m11; | |
res.m12 = m12 + other.m12; | |
res.m21 = m21 + other.m21; | |
res.m22 = m22 + other.m22; | |
return res; | |
} | |
inline __device__ __host__ float& operator()(int i, int j) | |
{ | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ float operator()(int i, int j) const | |
{ | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ const float* ptr() const { | |
return entries; | |
} | |
inline __device__ __host__ float* ptr() { | |
return entries; | |
} | |
union | |
{ | |
struct | |
{ | |
float m11; float m12; | |
float m21; float m22; | |
}; | |
float entries[4]; | |
float entries2[2][2]; | |
}; | |
}; | |
////////////////////////////// | |
// float2x3 | |
////////////////////////////// | |
class float2x3 | |
{ | |
public: | |
inline __device__ __host__ float2x3() | |
{ | |
} | |
inline __device__ __host__ float2x3(const float values[6]) | |
{ | |
m11 = values[0]; m12 = values[1]; m13 = values[2]; | |
m21 = values[3]; m22 = values[4]; m23 = values[5]; | |
} | |
inline __device__ __host__ float2x3(const float2x3& other) | |
{ | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; | |
} | |
inline __device__ __host__ float2x3& operator=(const float2x3 &other) | |
{ | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; | |
return *this; | |
} | |
inline __device__ __host__ float2 operator*(const float3 &v) const | |
{ | |
return make_float2(m11*v.x + m12*v.y + m13*v.z, m21*v.x + m22*v.y + m23*v.z); | |
} | |
//! matrix scalar multiplication | |
inline __device__ __host__ float2x3 operator*(const float t) const | |
{ | |
float2x3 res; | |
res.m11 = m11 * t; res.m12 = m12 * t; res.m13 = m13 * t; | |
res.m21 = m21 * t; res.m22 = m22 * t; res.m23 = m23 * t; | |
return res; | |
} | |
//! matrix scalar division | |
inline __device__ __host__ float2x3 operator/(const float t) const | |
{ | |
float2x3 res; | |
res.m11 = m11 / t; res.m12 = m12 / t; res.m13 = m13 / t; | |
res.m21 = m21 / t; res.m22 = m22 / t; res.m23 = m23 / t; | |
return res; | |
} | |
inline __device__ __host__ float& operator()(int i, int j) | |
{ | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ float operator()(int i, int j) const | |
{ | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ const float* ptr() const { | |
return entries; | |
} | |
inline __device__ __host__ float* ptr() { | |
return entries; | |
} | |
union | |
{ | |
struct | |
{ | |
float m11; float m12; float m13; | |
float m21; float m22; float m23; | |
}; | |
float entries[6]; | |
float entries2[3][2]; | |
}; | |
}; | |
////////////////////////////// | |
// float3x2 | |
////////////////////////////// | |
class float3x2 | |
{ | |
public: | |
inline __device__ __host__ float3x2() | |
{ | |
} | |
inline __device__ __host__ float3x2(const float values[6]) | |
{ | |
m11 = values[0]; m12 = values[1]; | |
m21 = values[2]; m22 = values[3]; | |
m31 = values[4]; m32 = values[5]; | |
} | |
inline __device__ __host__ float3x2& operator=(const float3x2& other) | |
{ | |
m11 = other.m11; m12 = other.m12; | |
m21 = other.m21; m22 = other.m22; | |
m31 = other.m31; m32 = other.m32; | |
return *this; | |
} | |
inline __device__ __host__ float3 operator*(const float2& v) const | |
{ | |
return make_float3(m11*v.x + m12*v.y, m21*v.x + m22*v.y, m31*v.x + m32*v.y); | |
} | |
inline __device__ __host__ float3x2 operator*(const float t) const | |
{ | |
float3x2 res; | |
res.m11 = m11 * t; res.m12 = m12 * t; | |
res.m21 = m21 * t; res.m22 = m22 * t; | |
res.m31 = m31 * t; res.m32 = m32 * t; | |
return res; | |
} | |
inline __device__ __host__ float& operator()(int i, int j) | |
{ | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ float operator()(int i, int j) const | |
{ | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ float2x3 getTranspose() | |
{ | |
float2x3 res; | |
res.m11 = m11; res.m12 = m21; res.m13 = m31; | |
res.m21 = m12; res.m22 = m22; res.m23 = m32; | |
return res; | |
} | |
inline __device__ __host__ const float* ptr() const { | |
return entries; | |
} | |
inline __device__ __host__ float* ptr() { | |
return entries; | |
} | |
union | |
{ | |
struct | |
{ | |
float m11; float m12; | |
float m21; float m22; | |
float m31; float m32; | |
}; | |
float entries[6]; | |
float entries2[3][2]; | |
}; | |
}; | |
inline __device__ __host__ float2x2 matMul(const float2x3& m0, const float3x2& m1) | |
{ | |
float2x2 res; | |
res.m11 = m0.m11*m1.m11+m0.m12*m1.m21+m0.m13*m1.m31; | |
res.m12 = m0.m11*m1.m12+m0.m12*m1.m22+m0.m13*m1.m32; | |
res.m21 = m0.m21*m1.m11+m0.m22*m1.m21+m0.m23*m1.m31; | |
res.m22 = m0.m21*m1.m12+m0.m22*m1.m22+m0.m23*m1.m32; | |
return res; | |
} | |
class float3x3 { | |
public: | |
inline __device__ __host__ float3x3() { | |
} | |
inline __device__ __host__ float3x3(const float values[9]) { | |
m11 = values[0]; m12 = values[1]; m13 = values[2]; | |
m21 = values[3]; m22 = values[4]; m23 = values[5]; | |
m31 = values[6]; m32 = values[7]; m33 = values[8]; | |
} | |
inline __device__ __host__ float3x3(const float3x3& other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; | |
} | |
explicit inline __device__ __host__ float3x3(const float2x2& other) { | |
m11 = other.m11; m12 = other.m12; m13 = 0.0; | |
m21 = other.m21; m22 = other.m22; m23 = 0.0; | |
m31 = 0.0; m32 = 0.0; m33 = 0.0; | |
} | |
inline __device__ __host__ float3x3& operator=(const float3x3 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; | |
return *this; | |
} | |
inline __device__ __host__ float& operator()(int i, int j) { | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ float operator()(int i, int j) const { | |
return entries2[i][j]; | |
} | |
static inline __device__ __host__ void swap(float& v0, float& v1) { | |
float tmp = v0; | |
v0 = v1; | |
v1 = tmp; | |
} | |
inline __device__ __host__ void transpose() { | |
swap(m12, m21); | |
swap(m13, m31); | |
swap(m23, m32); | |
} | |
inline __device__ __host__ float3x3 getTranspose() const { | |
float3x3 ret = *this; | |
ret.transpose(); | |
return ret; | |
} | |
//! inverts the matrix | |
inline __device__ __host__ void invert() { | |
*this = getInverse(); | |
} | |
//! computes the inverse of the matrix; the result is returned | |
inline __device__ __host__ float3x3 getInverse() const { | |
float3x3 res; | |
res.entries[0] = entries[4]*entries[8] - entries[5]*entries[7]; | |
res.entries[1] = -entries[1]*entries[8] + entries[2]*entries[7]; | |
res.entries[2] = entries[1]*entries[5] - entries[2]*entries[4]; | |
res.entries[3] = -entries[3]*entries[8] + entries[5]*entries[6]; | |
res.entries[4] = entries[0]*entries[8] - entries[2]*entries[6]; | |
res.entries[5] = -entries[0]*entries[5] + entries[2]*entries[3]; | |
res.entries[6] = entries[3]*entries[7] - entries[4]*entries[6]; | |
res.entries[7] = -entries[0]*entries[7] + entries[1]*entries[6]; | |
res.entries[8] = entries[0]*entries[4] - entries[1]*entries[3]; | |
float nom = 1.0f/det(); | |
return res * nom; | |
} | |
inline __device__ __host__ void setZero(float value = 0.0f) { | |
m11 = m12 = m13 = value; | |
m21 = m22 = m23 = value; | |
m31 = m32 = m33 = value; | |
} | |
inline __device__ __host__ float det() const { | |
return | |
+ m11*m22*m33 | |
+ m12*m23*m31 | |
+ m13*m21*m32 | |
- m31*m22*m13 | |
- m32*m23*m11 | |
- m33*m21*m12; | |
} | |
inline __device__ __host__ float3 getRow(unsigned int i) { | |
return make_float3(entries[3*i+0], entries[3*i+1], entries[3*i+2]); | |
} | |
inline __device__ __host__ void setRow(unsigned int i, float3& r) { | |
entries[3*i+0] = r.x; | |
entries[3*i+1] = r.y; | |
entries[3*i+2] = r.z; | |
} | |
inline __device__ __host__ void normalizeRows() | |
{ | |
for(unsigned int i = 0; i<3; i++) | |
{ | |
float3 r = normalize(getRow(i)); | |
setRow(i, r); | |
} | |
} | |
//! computes the product of two matrices (result stored in this) | |
inline __device__ __host__ void mult(const float3x3 &other) { | |
float3x3 res; | |
res.m11 = m11 * other.m11 + m12 * other.m21 + m13 * other.m31; | |
res.m12 = m11 * other.m12 + m12 * other.m22 + m13 * other.m32; | |
res.m13 = m11 * other.m13 + m12 * other.m23 + m13 * other.m33; | |
res.m21 = m21 * other.m11 + m22 * other.m21 + m23 * other.m31; | |
res.m22 = m21 * other.m12 + m22 * other.m22 + m23 * other.m32; | |
res.m23 = m21 * other.m13 + m22 * other.m23 + m23 * other.m33; | |
res.m31 = m21 * other.m11 + m32 * other.m21 + m33 * other.m31; | |
res.m32 = m21 * other.m12 + m32 * other.m22 + m33 * other.m32; | |
res.m33 = m21 * other.m13 + m32 * other.m23 + m33 * other.m33; | |
*this = res; | |
} | |
//! computes the sum of two matrices (result stored in this) | |
inline __device__ __host__ void add(const float3x3 &other) { | |
m11 += other.m11; m12 += other.m12; m13 += other.m13; | |
m21 += other.m21; m22 += other.m22; m23 += other.m23; | |
m31 += other.m31; m32 += other.m32; m33 += other.m33; | |
} | |
//! standard matrix matrix multiplication | |
inline __device__ __host__ float3x3 operator*(const float3x3 &other) const { | |
float3x3 res; | |
res.m11 = m11 * other.m11 + m12 * other.m21 + m13 * other.m31; | |
res.m12 = m11 * other.m12 + m12 * other.m22 + m13 * other.m32; | |
res.m13 = m11 * other.m13 + m12 * other.m23 + m13 * other.m33; | |
res.m21 = m21 * other.m11 + m22 * other.m21 + m23 * other.m31; | |
res.m22 = m21 * other.m12 + m22 * other.m22 + m23 * other.m32; | |
res.m23 = m21 * other.m13 + m22 * other.m23 + m23 * other.m33; | |
res.m31 = m31 * other.m11 + m32 * other.m21 + m33 * other.m31; | |
res.m32 = m31 * other.m12 + m32 * other.m22 + m33 * other.m32; | |
res.m33 = m31 * other.m13 + m32 * other.m23 + m33 * other.m33; | |
return res; | |
} | |
//! standard matrix matrix multiplication | |
inline __device__ __host__ float3x2 operator*(const float3x2 &other) const { | |
float3x2 res; | |
res.m11 = m11 * other.m11 + m12 * other.m21 + m13 * other.m31; | |
res.m12 = m11 * other.m12 + m12 * other.m22 + m13 * other.m32; | |
res.m21 = m21 * other.m11 + m22 * other.m21 + m23 * other.m31; | |
res.m22 = m21 * other.m12 + m22 * other.m22 + m23 * other.m32; | |
res.m31 = m31 * other.m11 + m32 * other.m21 + m33 * other.m31; | |
res.m32 = m31 * other.m12 + m32 * other.m22 + m33 * other.m32; | |
return res; | |
} | |
inline __device__ __host__ float3 operator*(const float3 &v) const { | |
return make_float3( | |
m11*v.x + m12*v.y + m13*v.z, | |
m21*v.x + m22*v.y + m23*v.z, | |
m31*v.x + m32*v.y + m33*v.z | |
); | |
} | |
inline __device__ __host__ float3x3 operator*(const float t) const { | |
float3x3 res; | |
res.m11 = m11 * t; res.m12 = m12 * t; res.m13 = m13 * t; | |
res.m21 = m21 * t; res.m22 = m22 * t; res.m23 = m23 * t; | |
res.m31 = m31 * t; res.m32 = m32 * t; res.m33 = m33 * t; | |
return res; | |
} | |
inline __device__ __host__ float3x3 operator+(const float3x3 &other) const { | |
float3x3 res; | |
res.m11 = m11 + other.m11; res.m12 = m12 + other.m12; res.m13 = m13 + other.m13; | |
res.m21 = m21 + other.m21; res.m22 = m22 + other.m22; res.m23 = m23 + other.m23; | |
res.m31 = m31 + other.m31; res.m32 = m32 + other.m32; res.m33 = m33 + other.m33; | |
return res; | |
} | |
inline __device__ __host__ float3x3 operator-(const float3x3 &other) const { | |
float3x3 res; | |
res.m11 = m11 - other.m11; res.m12 = m12 - other.m12; res.m13 = m13 - other.m13; | |
res.m21 = m21 - other.m21; res.m22 = m22 - other.m22; res.m23 = m23 - other.m23; | |
res.m31 = m31 - other.m31; res.m32 = m32 - other.m32; res.m33 = m33 - other.m33; | |
return res; | |
} | |
static inline __device__ __host__ float3x3 getIdentity() { | |
float3x3 res; | |
res.setZero(); | |
res.m11 = res.m22 = res.m33 = 1.0f; | |
return res; | |
} | |
static inline __device__ __host__ float3x3 getZeroMatrix() { | |
float3x3 res; | |
res.setZero(); | |
return res; | |
} | |
static inline __device__ __host__ float3x3 getDiagonalMatrix(float diag = 1.0f) { | |
float3x3 res; | |
res.m11 = diag; res.m12 = 0.0f; res.m13 = 0.0f; | |
res.m21 = 0.0f; res.m22 = diag; res.m23 = 0.0f; | |
res.m31 = 0.0f; res.m32 = 0.0f; res.m33 = diag; | |
return res; | |
} | |
static inline __device__ __host__ float3x3 tensorProduct(const float3 &v, const float3 &vt) { | |
float3x3 res; | |
res.m11 = v.x * vt.x; res.m12 = v.x * vt.y; res.m13 = v.x * vt.z; | |
res.m21 = v.y * vt.x; res.m22 = v.y * vt.y; res.m23 = v.y * vt.z; | |
res.m31 = v.z * vt.x; res.m32 = v.z * vt.y; res.m33 = v.z * vt.z; | |
return res; | |
} | |
inline __device__ __host__ const float* ptr() const { | |
return entries; | |
} | |
inline __device__ __host__ float* ptr() { | |
return entries; | |
} | |
union { | |
struct { | |
float m11; float m12; float m13; | |
float m21; float m22; float m23; | |
float m31; float m32; float m33; | |
}; | |
float entries[9]; | |
float entries2[3][3]; | |
}; | |
}; | |
inline __device__ __host__ float2x3 matMul(const float2x3& m0, const float3x3& m1) | |
{ | |
float2x3 res; | |
res.m11 = m0.m11*m1.m11+m0.m12*m1.m21+m0.m13*m1.m31; | |
res.m12 = m0.m11*m1.m12+m0.m12*m1.m22+m0.m13*m1.m32; | |
res.m13 = m0.m11*m1.m13+m0.m12*m1.m23+m0.m13*m1.m33; | |
res.m21 = m0.m21*m1.m11+m0.m22*m1.m21+m0.m23*m1.m31; | |
res.m22 = m0.m21*m1.m12+m0.m22*m1.m22+m0.m23*m1.m32; | |
res.m23 = m0.m21*m1.m13+m0.m22*m1.m23+m0.m23*m1.m33; | |
return res; | |
} | |
// (1x2) row matrix as float2 | |
inline __device__ __host__ float3 matMul(const float2& m0, const float2x3& m1) | |
{ | |
float3 res; | |
res.x = m0.x*m1.m11+m0.y*m1.m21; | |
res.y = m0.x*m1.m12+m0.y*m1.m22; | |
res.z = m0.x*m1.m13+m0.y*m1.m23; | |
return res; | |
} | |
class float3x4 { | |
public: | |
inline __device__ __host__ float3x4() { | |
} | |
inline __device__ __host__ float3x4(const float values[12]) { | |
m11 = values[0]; m12 = values[1]; m13 = values[2]; m14 = values[3]; | |
m21 = values[4]; m22 = values[5]; m23 = values[6]; m24 = values[7]; | |
m31 = values[8]; m32 = values[9]; m33 = values[10]; m34 = values[11]; | |
} | |
inline __device__ __host__ float3x4(const float3x4& other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = other.m14; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = other.m24; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = other.m34; | |
} | |
inline __device__ __host__ float3x4(const float3x3& other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = 0.0f; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = 0.0f; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = 0.0f; | |
} | |
inline __device__ __host__ float3x4 operator=(const float3x4 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = other.m14; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = other.m24; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = other.m34; | |
return *this; | |
} | |
inline __device__ __host__ float3x4& operator=(const float3x3 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = 0.0f; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = 0.0f; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = 0.0f; | |
return *this; | |
} | |
//! assumes the last line of the matrix implicitly to be (0,0,0,1) | |
inline __device__ __host__ float4 operator*(const float4 &v) const { | |
return make_float4( | |
m11*v.x + m12*v.y + m13*v.z + m14*v.w, | |
m21*v.x + m22*v.y + m23*v.z + m24*v.w, | |
m31*v.x + m32*v.y + m33*v.z + m34*v.w, | |
v.w | |
); | |
} | |
//! assumes an implicit 1 in w component of the input vector | |
inline __device__ __host__ float3 operator*(const float3 &v) const { | |
return make_float3( | |
m11*v.x + m12*v.y + m13*v.z + m14, | |
m21*v.x + m22*v.y + m23*v.z + m24, | |
m31*v.x + m32*v.y + m33*v.z + m34 | |
); | |
} | |
//! matrix scalar multiplication | |
inline __device__ __host__ float3x4 operator*(const float t) const { | |
float3x4 res; | |
res.m11 = m11 * t; res.m12 = m12 * t; res.m13 = m13 * t; res.m14 = m14 * t; | |
res.m21 = m21 * t; res.m22 = m22 * t; res.m23 = m23 * t; res.m24 = m24 * t; | |
res.m31 = m31 * t; res.m32 = m32 * t; res.m33 = m33 * t; res.m34 = m34 * t; | |
return res; | |
} | |
inline __device__ __host__ float3x4& operator*=(const float t) { | |
*this = *this * t; | |
return *this; | |
} | |
//! matrix scalar division | |
inline __device__ __host__ float3x4 operator/(const float t) const { | |
float3x4 res; | |
res.m11 = m11 / t; res.m12 = m12 / t; res.m13 = m13 / t; res.m14 = m14 / t; | |
res.m21 = m21 / t; res.m22 = m22 / t; res.m23 = m23 / t; res.m24 = m24 / t; | |
res.m31 = m31 / t; res.m32 = m32 / t; res.m33 = m33 / t; res.m34 = m34 / t; | |
return res; | |
} | |
inline __device__ __host__ float3x4& operator/=(const float t) { | |
*this = *this / t; | |
return *this; | |
} | |
//! assumes the last line of the matrix implicitly to be (0,0,0,1) | |
inline __device__ __host__ float3x4 operator*(const float3x4 &other) const { | |
float3x4 res; | |
res.m11 = m11*other.m11 + m12*other.m21 + m13*other.m31; | |
res.m12 = m11*other.m12 + m12*other.m22 + m13*other.m32; | |
res.m13 = m11*other.m13 + m12*other.m23 + m13*other.m33; | |
res.m14 = m11*other.m14 + m12*other.m24 + m13*other.m34 + m14; | |
res.m21 = m21*other.m11 + m22*other.m21 + m23*other.m31; | |
res.m22 = m21*other.m12 + m22*other.m22 + m23*other.m32; | |
res.m23 = m21*other.m13 + m22*other.m23 + m23*other.m33; | |
res.m24 = m21*other.m14 + m22*other.m24 + m23*other.m34 + m24; | |
res.m31 = m31*other.m11 + m32*other.m21 + m33*other.m31; | |
res.m32 = m31*other.m12 + m32*other.m22 + m33*other.m32; | |
res.m33 = m31*other.m13 + m32*other.m23 + m33*other.m33; | |
res.m34 = m31*other.m14 + m32*other.m24 + m33*other.m34 + m34; | |
//res.m41 = m41*other.m11 + m42*other.m21 + m43*other.m31 + m44*other.m41; | |
//res.m42 = m41*other.m12 + m42*other.m22 + m43*other.m32 + m44*other.m42; | |
//res.m43 = m41*other.m13 + m42*other.m23 + m43*other.m33 + m44*other.m43; | |
//res.m44 = m41*other.m14 + m42*other.m24 + m43*other.m34 + m44*other.m44; | |
return res; | |
} | |
//! assumes the last line of the matrix implicitly to be (0,0,0,1); and a (0,0,0) translation of other | |
inline __device__ __host__ float3x4 operator*(const float3x3 &other) const { | |
float3x4 res; | |
res.m11 = m11*other.m11 + m12*other.m21 + m13*other.m31; | |
res.m12 = m11*other.m12 + m12*other.m22 + m13*other.m32; | |
res.m13 = m11*other.m13 + m12*other.m23 + m13*other.m33; | |
res.m14 = m14; | |
res.m21 = m21*other.m11 + m22*other.m21 + m23*other.m31; | |
res.m22 = m21*other.m12 + m22*other.m22 + m23*other.m32; | |
res.m23 = m21*other.m13 + m22*other.m23 + m23*other.m33; | |
res.m24 = m24; | |
res.m31 = m31*other.m11 + m32*other.m21 + m33*other.m31; | |
res.m32 = m31*other.m12 + m32*other.m22 + m33*other.m32; | |
res.m33 = m31*other.m13 + m32*other.m23 + m33*other.m33; | |
res.m34 = m34; | |
return res; | |
} | |
inline __device__ __host__ float& operator()(int i, int j) { | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ float operator()(int i, int j) const { | |
return entries2[i][j]; | |
} | |
//! returns the translation part of the matrix | |
inline __device__ __host__ float3 getTranslation() { | |
return make_float3(m14, m24, m34); | |
} | |
//! sets only the translation part of the matrix (other values remain unchanged) | |
inline __device__ __host__ void setTranslation(const float3 &t) { | |
m14 = t.x; | |
m24 = t.y; | |
m34 = t.z; | |
} | |
//! returns the 3x3 part of the matrix | |
inline __device__ __host__ float3x3 getFloat3x3() { | |
float3x3 ret; | |
ret.m11 = m11; ret.m12 = m12; ret.m13 = m13; | |
ret.m21 = m21; ret.m22 = m22; ret.m23 = m23; | |
ret.m31 = m31; ret.m32 = m32; ret.m33 = m33; | |
return ret; | |
} | |
//! sets the 3x3 part of the matrix (other values remain unchanged) | |
inline __device__ __host__ void setFloat3x3(const float3x3 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; | |
} | |
//! inverts the matrix | |
inline __device__ __host__ void inverse() { | |
*this = getInverse(); | |
} | |
//! computes the inverse of the matrix | |
inline __device__ __host__ float3x4 getInverse() { | |
float3x3 A = getFloat3x3(); | |
A.invert(); | |
float3 t = getTranslation(); | |
t = A*t; | |
float3x4 ret; | |
ret.setFloat3x3(A); | |
ret.setTranslation(make_float3(-t.x, -t.y, -t.z)); //float3 doesn't have unary '-'... thank you cuda | |
return ret; | |
} | |
/* drops last column of 3x4 */ | |
inline __device__ __host__ float3x3 get3x3() { | |
float3x3 other; | |
other.m11 = m11; other.m12 = m12; other.m13 = m13; | |
other.m21 = m21; other.m22 = m22; other.m23 = m23; | |
other.m31 = m31; other.m32 = m32; other.m33 = m33; | |
return other; | |
} | |
//! prints the matrix; only host | |
__host__ void print() { | |
std::cout << | |
m11 << " " << m12 << " " << m13 << " " << m14 << std::endl << | |
m21 << " " << m22 << " " << m23 << " " << m24 << std::endl << | |
m31 << " " << m32 << " " << m33 << " " << m34 << std::endl << | |
std::endl; | |
} | |
inline __device__ __host__ const float* ptr() const { | |
return entries; | |
} | |
inline __device__ __host__ float* ptr() { | |
return entries; | |
} | |
union { | |
struct { | |
float m11; float m12; float m13; float m14; | |
float m21; float m22; float m23; float m24; | |
float m31; float m32; float m33; float m34; | |
}; | |
float entries[9]; | |
float entries2[3][4]; | |
}; | |
}; | |
class float4x4 { | |
public: | |
inline __device__ __host__ float4x4() { | |
} | |
inline __device__ __host__ float4x4(const float values[16]) { | |
m11 = values[0]; m12 = values[1]; m13 = values[2]; m14 = values[3]; | |
m21 = values[4]; m22 = values[5]; m23 = values[6]; m24 = values[7]; | |
m31 = values[8]; m32 = values[9]; m33 = values[10]; m34 = values[11]; | |
m41 = values[12]; m42 = values[13]; m43 = values[14]; m44 = values[15]; | |
} | |
inline __device__ __host__ float4x4(const float4x4& other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = other.m14; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = other.m24; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = other.m34; | |
m41 = other.m41; m42 = other.m42; m43 = other.m43; m44 = other.m44; | |
} | |
//implicitly assumes last line to (0,0,0,1) | |
inline __device__ __host__ float4x4(const float3x4& other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = other.m14; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = other.m24; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = other.m34; | |
m41 = 0.0f; m42 = 0.0f; m43 = 0.0f; m44 = 1.0f; | |
} | |
inline __device__ __host__ float4x4(const float3x3& other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = 0.0f; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = 0.0f; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = 0.0f; | |
m41 = 0.0f; m42 = 0.0f; m43 = 0.0f; m44 = 1.0f; | |
} | |
inline __device__ __host__ float4x4 operator=(const float4x4 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = other.m14; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = other.m24; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = other.m34; | |
m41 = other.m41; m42 = other.m42; m43 = other.m43; m44 = other.m44; | |
return *this; | |
} | |
inline __device__ __host__ float4x4 operator=(const float3x4 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = other.m14; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = other.m24; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = other.m34; | |
m41 = 0.0f; m42 = 0.0f; m43 = 0.0f; m44 = 1.0f; | |
return *this; | |
} | |
inline __device__ __host__ float4x4& operator=(const float3x3 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = 0.0f; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = 0.0f; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = 0.0f; | |
m41 = 0.0f; m42 = 0.0f; m43 = 0.0f; m44 = 1.0f; | |
return *this; | |
} | |
inline __device__ __host__ bool operator==(const float4x4 &other) { | |
bool equal = true; | |
equal &= m11 == other.m11; equal &= m12 == other.m12; equal &= m13 == other.m13; equal &= m14 == other.m14; | |
equal &= m21 == other.m21; equal &= m22 == other.m22; equal &= m23 == other.m23; equal &= m24 == other.m24; | |
equal &= m31 == other.m31; equal &= m32 == other.m32; equal &= m33 == other.m33; equal &= m34 == other.m34; | |
equal &= m41 == other.m41; equal &= m42 == other.m42; equal &= m43 == other.m43; equal &= m44 == other.m44; | |
return equal; | |
} | |
inline __device__ __host__ bool operator!=(const float4x4 &other) { | |
bool equal = true; | |
equal &= m11 == other.m11; equal &= m12 == other.m12; equal &= m13 == other.m13; equal &= m14 == other.m14; | |
equal &= m21 == other.m21; equal &= m22 == other.m22; equal &= m23 == other.m23; equal &= m24 == other.m24; | |
equal &= m31 == other.m31; equal &= m32 == other.m32; equal &= m33 == other.m33; equal &= m34 == other.m34; | |
equal &= m41 == other.m41; equal &= m42 == other.m42; equal &= m43 == other.m43; equal &= m44 == other.m44; | |
return !equal; | |
} | |
//! not tested | |
inline __device__ __host__ float4x4 operator*(const float4x4 &other) const { | |
float4x4 res; | |
res.m11 = m11*other.m11 + m12*other.m21 + m13*other.m31 + m14*other.m41; | |
res.m12 = m11*other.m12 + m12*other.m22 + m13*other.m32 + m14*other.m42; | |
res.m13 = m11*other.m13 + m12*other.m23 + m13*other.m33 + m14*other.m43; | |
res.m14 = m11*other.m14 + m12*other.m24 + m13*other.m34 + m14*other.m44; | |
res.m21 = m21*other.m11 + m22*other.m21 + m23*other.m31 + m24*other.m41; | |
res.m22 = m21*other.m12 + m22*other.m22 + m23*other.m32 + m24*other.m42; | |
res.m23 = m21*other.m13 + m22*other.m23 + m23*other.m33 + m24*other.m43; | |
res.m24 = m21*other.m14 + m22*other.m24 + m23*other.m34 + m24*other.m44; | |
res.m31 = m31*other.m11 + m32*other.m21 + m33*other.m31 + m34*other.m41; | |
res.m32 = m31*other.m12 + m32*other.m22 + m33*other.m32 + m34*other.m42; | |
res.m33 = m31*other.m13 + m32*other.m23 + m33*other.m33 + m34*other.m43; | |
res.m34 = m31*other.m14 + m32*other.m24 + m33*other.m34 + m34*other.m44; | |
res.m41 = m41*other.m11 + m42*other.m21 + m43*other.m31 + m44*other.m41; | |
res.m42 = m41*other.m12 + m42*other.m22 + m43*other.m32 + m44*other.m42; | |
res.m43 = m41*other.m13 + m42*other.m23 + m43*other.m33 + m44*other.m43; | |
res.m44 = m41*other.m14 + m42*other.m24 + m43*other.m34 + m44*other.m44; | |
return res; | |
} | |
// untested | |
inline __device__ __host__ float4 operator*(const float4& v) const | |
{ | |
return make_float4( | |
m11*v.x + m12*v.y + m13*v.z + m14*v.w, | |
m21*v.x + m22*v.y + m23*v.z + m24*v.w, | |
m31*v.x + m32*v.y + m33*v.z + m34*v.w, | |
m41*v.x + m42*v.y + m43*v.z + m44*v.w | |
); | |
} | |
// untested | |
//implicitly assumes w to be 1 | |
inline __device__ __host__ float3 operator*(const float3& v) const | |
{ | |
return make_float3( | |
m11*v.x + m12*v.y + m13*v.z + m14*1.0f, | |
m21*v.x + m22*v.y + m23*v.z + m24*1.0f, | |
m31*v.x + m32*v.y + m33*v.z + m34*1.0f | |
); | |
} | |
inline __device__ __host__ float& operator()(int i, int j) { | |
return entries2[i][j]; | |
} | |
inline __device__ __host__ float operator()(int i, int j) const { | |
return entries2[i][j]; | |
} | |
static inline __device__ __host__ void swap(float& v0, float& v1) { | |
float tmp = v0; | |
v0 = v1; | |
v1 = tmp; | |
} | |
inline __device__ __host__ void transpose() { | |
swap(m12, m21); | |
swap(m13, m31); | |
swap(m23, m32); | |
swap(m41, m14); | |
swap(m42, m24); | |
swap(m43, m34); | |
} | |
inline __device__ __host__ float4x4 getTranspose() const { | |
float4x4 ret = *this; | |
ret.transpose(); | |
return ret; | |
} | |
inline __device__ __host__ void invert() { | |
*this = getInverse(); | |
} | |
//! return the inverse matrix; but does not change the current matrix | |
inline __device__ __host__ float4x4 getInverse() const { | |
float inv[16]; | |
inv[0] = entries[5] * entries[10] * entries[15] - | |
entries[5] * entries[11] * entries[14] - | |
entries[9] * entries[6] * entries[15] + | |
entries[9] * entries[7] * entries[14] + | |
entries[13] * entries[6] * entries[11] - | |
entries[13] * entries[7] * entries[10]; | |
inv[4] = -entries[4] * entries[10] * entries[15] + | |
entries[4] * entries[11] * entries[14] + | |
entries[8] * entries[6] * entries[15] - | |
entries[8] * entries[7] * entries[14] - | |
entries[12] * entries[6] * entries[11] + | |
entries[12] * entries[7] * entries[10]; | |
inv[8] = entries[4] * entries[9] * entries[15] - | |
entries[4] * entries[11] * entries[13] - | |
entries[8] * entries[5] * entries[15] + | |
entries[8] * entries[7] * entries[13] + | |
entries[12] * entries[5] * entries[11] - | |
entries[12] * entries[7] * entries[9]; | |
inv[12] = -entries[4] * entries[9] * entries[14] + | |
entries[4] * entries[10] * entries[13] + | |
entries[8] * entries[5] * entries[14] - | |
entries[8] * entries[6] * entries[13] - | |
entries[12] * entries[5] * entries[10] + | |
entries[12] * entries[6] * entries[9]; | |
inv[1] = -entries[1] * entries[10] * entries[15] + | |
entries[1] * entries[11] * entries[14] + | |
entries[9] * entries[2] * entries[15] - | |
entries[9] * entries[3] * entries[14] - | |
entries[13] * entries[2] * entries[11] + | |
entries[13] * entries[3] * entries[10]; | |
inv[5] = entries[0] * entries[10] * entries[15] - | |
entries[0] * entries[11] * entries[14] - | |
entries[8] * entries[2] * entries[15] + | |
entries[8] * entries[3] * entries[14] + | |
entries[12] * entries[2] * entries[11] - | |
entries[12] * entries[3] * entries[10]; | |
inv[9] = -entries[0] * entries[9] * entries[15] + | |
entries[0] * entries[11] * entries[13] + | |
entries[8] * entries[1] * entries[15] - | |
entries[8] * entries[3] * entries[13] - | |
entries[12] * entries[1] * entries[11] + | |
entries[12] * entries[3] * entries[9]; | |
inv[13] = entries[0] * entries[9] * entries[14] - | |
entries[0] * entries[10] * entries[13] - | |
entries[8] * entries[1] * entries[14] + | |
entries[8] * entries[2] * entries[13] + | |
entries[12] * entries[1] * entries[10] - | |
entries[12] * entries[2] * entries[9]; | |
inv[2] = entries[1] * entries[6] * entries[15] - | |
entries[1] * entries[7] * entries[14] - | |
entries[5] * entries[2] * entries[15] + | |
entries[5] * entries[3] * entries[14] + | |
entries[13] * entries[2] * entries[7] - | |
entries[13] * entries[3] * entries[6]; | |
inv[6] = -entries[0] * entries[6] * entries[15] + | |
entries[0] * entries[7] * entries[14] + | |
entries[4] * entries[2] * entries[15] - | |
entries[4] * entries[3] * entries[14] - | |
entries[12] * entries[2] * entries[7] + | |
entries[12] * entries[3] * entries[6]; | |
inv[10] = entries[0] * entries[5] * entries[15] - | |
entries[0] * entries[7] * entries[13] - | |
entries[4] * entries[1] * entries[15] + | |
entries[4] * entries[3] * entries[13] + | |
entries[12] * entries[1] * entries[7] - | |
entries[12] * entries[3] * entries[5]; | |
inv[14] = -entries[0] * entries[5] * entries[14] + | |
entries[0] * entries[6] * entries[13] + | |
entries[4] * entries[1] * entries[14] - | |
entries[4] * entries[2] * entries[13] - | |
entries[12] * entries[1] * entries[6] + | |
entries[12] * entries[2] * entries[5]; | |
inv[3] = -entries[1] * entries[6] * entries[11] + | |
entries[1] * entries[7] * entries[10] + | |
entries[5] * entries[2] * entries[11] - | |
entries[5] * entries[3] * entries[10] - | |
entries[9] * entries[2] * entries[7] + | |
entries[9] * entries[3] * entries[6]; | |
inv[7] = entries[0] * entries[6] * entries[11] - | |
entries[0] * entries[7] * entries[10] - | |
entries[4] * entries[2] * entries[11] + | |
entries[4] * entries[3] * entries[10] + | |
entries[8] * entries[2] * entries[7] - | |
entries[8] * entries[3] * entries[6]; | |
inv[11] = -entries[0] * entries[5] * entries[11] + | |
entries[0] * entries[7] * entries[9] + | |
entries[4] * entries[1] * entries[11] - | |
entries[4] * entries[3] * entries[9] - | |
entries[8] * entries[1] * entries[7] + | |
entries[8] * entries[3] * entries[5]; | |
inv[15] = entries[0] * entries[5] * entries[10] - | |
entries[0] * entries[6] * entries[9] - | |
entries[4] * entries[1] * entries[10] + | |
entries[4] * entries[2] * entries[9] + | |
entries[8] * entries[1] * entries[6] - | |
entries[8] * entries[2] * entries[5]; | |
float matrixDet = entries[0] * inv[0] + entries[1] * inv[4] + entries[2] * inv[8] + entries[3] * inv[12]; | |
float matrixDetr = 1.0f / matrixDet; | |
float4x4 res; | |
for (unsigned int i = 0; i < 16; i++) { | |
res.entries[i] = inv[i] * matrixDetr; | |
} | |
return res; | |
} | |
//! returns the 3x3 part of the matrix | |
inline __device__ __host__ float3x3 getFloat3x3() { | |
float3x3 ret; | |
ret.m11 = m11; ret.m12 = m12; ret.m13 = m13; | |
ret.m21 = m21; ret.m22 = m22; ret.m23 = m23; | |
ret.m31 = m31; ret.m32 = m32; ret.m33 = m33; | |
return ret; | |
} | |
//! sets the 3x3 part of the matrix (other values remain unchanged) | |
inline __device__ __host__ void setFloat3x3(const float3x3 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; | |
} | |
//! sets the 4x4 part of the matrix to identity | |
inline __device__ __host__ void setIdentity() | |
{ | |
m11 = 1.0f; m12 = 0.0f; m13 = 0.0f; m14 = 0.0f; | |
m21 = 0.0f; m22 = 1.0f; m23 = 0.0f; m24 = 0.0f; | |
m31 = 0.0f; m32 = 0.0f; m33 = 1.0f; m34 = 0.0f; | |
m41 = 0.0f; m42 = 0.0f; m43 = 0.0f; m44 = 1.0f; | |
} | |
//! sets the 4x4 part of the matrix to identity | |
inline __device__ __host__ void setValue(float v) | |
{ | |
m11 = v; m12 = v; m13 = v; m14 = v; | |
m21 = v; m22 = v; m23 = v; m24 = v; | |
m31 = v; m32 = v; m33 = v; m34 = v; | |
m41 = v; m42 = v; m43 = v; m44 = v; | |
} | |
//! returns the 3x4 part of the matrix | |
inline __device__ __host__ float3x4 getFloat3x4() { | |
float3x4 ret; | |
ret.m11 = m11; ret.m12 = m12; ret.m13 = m13; ret.m14 = m14; | |
ret.m21 = m21; ret.m22 = m22; ret.m23 = m23; ret.m24 = m24; | |
ret.m31 = m31; ret.m32 = m32; ret.m33 = m33; ret.m34 = m34; | |
return ret; | |
} | |
//! sets the 3x4 part of the matrix (other values remain unchanged) | |
inline __device__ __host__ void setFloat3x4(const float3x4 &other) { | |
m11 = other.m11; m12 = other.m12; m13 = other.m13; m14 = other.m14; | |
m21 = other.m21; m22 = other.m22; m23 = other.m23; m24 = other.m24; | |
m31 = other.m31; m32 = other.m32; m33 = other.m33; m34 = other.m34; | |
} | |
inline __device__ __host__ const float* ptr() const { | |
return entries; | |
} | |
inline __device__ __host__ float* ptr() { | |
return entries; | |
} | |
union { | |
struct { | |
float m11; float m12; float m13; float m14; | |
float m21; float m22; float m23; float m24; | |
float m31; float m32; float m33; float m34; | |
float m41; float m42; float m43; float m44; | |
}; | |
float entries[16]; | |
float entries2[4][4]; | |
}; | |
}; | |
////////////////////////////// | |
// matNxM | |
////////////////////////////// | |
template<unsigned int N, unsigned int M> | |
class matNxM | |
{ | |
public: | |
////////////////////////////// | |
// Initialization | |
////////////////////////////// | |
inline __device__ __host__ matNxM() | |
{ | |
} | |
inline __device__ __host__ matNxM(float* values) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N*M; i++) entries[i] = values[i]; | |
} | |
inline __device__ __host__ matNxM(const float* values) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N*M; i++) entries[i] = values[i]; | |
} | |
inline __device__ __host__ matNxM(const matNxM& other) | |
{ | |
(*this) = other; | |
} | |
inline __device__ __host__ matNxM<N,M>& operator=(const matNxM<N,M>& other) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N*M; i++) entries[i] = other.entries[i]; | |
return *this; | |
} | |
inline __device__ __host__ void setZero() | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N*M; i++) entries[i] = 0.0f; | |
} | |
inline __device__ __host__ void setIdentity() | |
{ | |
setZero(); | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<min(N, M); i++) entries2D[i][i] = 1.0f; | |
} | |
static inline __device__ __host__ matNxM<N, M> getIdentity() | |
{ | |
matNxM<N, M> R; R.setIdentity(); | |
return R; | |
} | |
////////////////////////////// | |
// Conversion | |
////////////////////////////// | |
// declare generic constructors for compile time checking of matrix size | |
template<class B> | |
explicit inline __device__ __host__ matNxM(const B& other); | |
template<class B> | |
explicit inline __device__ __host__ matNxM(const B& other0, const B& other1); | |
// declare generic casts for compile time checking of matrix size | |
inline __device__ __host__ operator float(); | |
inline __device__ __host__ operator float2(); | |
inline __device__ __host__ operator float3(); | |
inline __device__ __host__ operator float4(); | |
inline __device__ __host__ operator float2x2(); | |
inline __device__ __host__ operator float3x3(); | |
inline __device__ __host__ operator float4x4(); | |
////////////////////////////// | |
// Matrix - Matrix Multiplication | |
////////////////////////////// | |
template<unsigned int NOther, unsigned int MOther> | |
inline __device__ __host__ matNxM<N,MOther> operator*(const matNxM<NOther,MOther>& other) const | |
{ | |
cudaAssert(M == NOther); | |
matNxM<N,MOther> res; | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N; i++) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int j = 0; j<MOther; j++) | |
{ | |
float sum = 0.0f; | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int k = 0; k<M; k++) | |
{ | |
sum += (*this)(i, k)*other(k, j); | |
} | |
res(i, j) = sum; | |
} | |
} | |
return res; | |
} | |
////////////////////////////// | |
// Matrix - Inversion | |
////////////////////////////// | |
inline __device__ __host__ float det() const; | |
inline __device__ __host__ matNxM<N, M> getInverse() const; | |
////////////////////////////// | |
// Matrix - Transpose | |
////////////////////////////// | |
inline __device__ __host__ matNxM<M,N> getTranspose() const | |
{ | |
matNxM<M,N> res; | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<M; i++) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int j = 0; j<N; j++) | |
{ | |
res(i, j) = (*this)(j, i); | |
} | |
} | |
return res; | |
} | |
inline __device__ void printCUDA() const | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N; i++) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int j = 0; j<M; j++) | |
{ | |
printf("%f ", (*this)(i, j)); | |
} | |
printf("\n"); | |
} | |
} | |
inline __device__ bool checkMINF() const | |
{ | |
#ifdef __CUDACC__ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N; i++) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int j = 0; j<M; j++) | |
{ | |
if((*this)(i, j) == MINF) return true; | |
} | |
} | |
#endif | |
return false; | |
} | |
inline __device__ bool checkINF() const | |
{ | |
#ifdef __CUDACC__ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N; i++) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int j = 0; j<M; j++) | |
{ | |
if((*this)(i, j) == INF) return true; | |
} | |
} | |
#endif | |
return false; | |
} | |
inline __device__ bool checkQNAN() const | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N; i++) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int j = 0; j<M; j++) | |
{ | |
if((*this)(i, j) != (*this)(i, j)) return true; | |
} | |
} | |
return false; | |
} | |
////////////////////////////// | |
// Matrix - Matrix Addition | |
////////////////////////////// | |
inline __device__ __host__ matNxM<N,M> operator+(const matNxM<N,M>& other) const | |
{ | |
matNxM<N,M> res = (*this); | |
res+=other; | |
return res; | |
} | |
inline __device__ __host__ matNxM<N,M>& operator+=(const matNxM<N,M>& other) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N*M; i++) entries[i] += other.entries[i]; | |
return (*this); | |
} | |
////////////////////////////// | |
// Matrix - Negation | |
////////////////////////////// | |
inline __device__ __host__ matNxM<N,M> operator-() const | |
{ | |
matNxM<N,M> res = (*this)*(-1.0f); | |
return res; | |
} | |
////////////////////////////// | |
// Matrix - Matrix Subtraction | |
////////////////////////////// | |
inline __device__ __host__ matNxM<N,M> operator-(const matNxM<N,M>& other) const | |
{ | |
matNxM<N,M> res = (*this); | |
res-=other; | |
return res; | |
} | |
inline __device__ __host__ matNxM<N,M>& operator-=(const matNxM<N,M>& other) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N*M; i++) entries[i] -= other.entries[i]; | |
return (*this); | |
} | |
////////////////////////////// | |
// Matrix - Scalar Multiplication | |
////////////////////////////// | |
inline __device__ __host__ matNxM<N,M> operator*(const float t) const | |
{ | |
matNxM<N,M> res = (*this); | |
res*=t; | |
return res; | |
} | |
inline __device__ __host__ matNxM<N, M>& operator*=(const float t) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N*M; i++) entries[i] *= t; | |
return (*this); | |
} | |
////////////////////////////// | |
// Matrix - Scalar Division | |
////////////////////////////// | |
inline __device__ __host__ matNxM<N, M> operator/(const float t) const | |
{ | |
matNxM<N, M> res = (*this); | |
res/=t; | |
return res; | |
} | |
inline __device__ __host__ matNxM<N, M>& operator/=(const float t) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<N*M; i++) entries[i] /= t; | |
return (*this); | |
} | |
////////////////////////// | |
// Element Access | |
////////////////////////// | |
inline __device__ __host__ unsigned int nRows() | |
{ | |
return N; | |
} | |
inline __device__ __host__ unsigned int nCols() | |
{ | |
return M; | |
} | |
inline __device__ __host__ float& operator()(unsigned int i, unsigned int j) | |
{ | |
cudaAssert(i<N && j<M); | |
return entries2D[i][j]; | |
} | |
inline __device__ __host__ float operator()(unsigned int i, unsigned int j) const | |
{ | |
cudaAssert(i<N && j<M); | |
return entries2D[i][j]; | |
} | |
inline __device__ __host__ float& operator()(unsigned int i) | |
{ | |
cudaAssert(i<N*M); | |
return entries[i]; | |
} | |
inline __device__ __host__ float operator()(unsigned int i) const | |
{ | |
cudaAssert(i<N*M); | |
return entries[i]; | |
} | |
template<unsigned int NOther, unsigned int MOther> | |
inline __device__ __host__ void getBlock(unsigned int xStart, unsigned int yStart, matNxM<NOther, MOther>& res) const | |
{ | |
cudaAssert(xStart+NOther <= N && yStart+MOther <= M); | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<NOther; i++) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int j = 0; j<MOther; j++) | |
{ | |
res(i, j) = (*this)(xStart+i, yStart+j); | |
} | |
} | |
} | |
template<unsigned int NOther, unsigned int MOther> | |
inline __device__ __host__ void setBlock(matNxM<NOther, MOther>& input, unsigned int xStart, unsigned int yStart) | |
{ | |
cudaAssert(xStart+NOther <= N && yStart+MOther <= M); | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<NOther; i++) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int j = 0; j<MOther; j++) | |
{ | |
(*this)(xStart+i, yStart+j) = input(i, j); | |
} | |
} | |
} | |
inline __device__ __host__ const float* ptr() const { | |
return entries; | |
} | |
inline __device__ __host__ float* ptr() { | |
return entries; | |
} | |
// Operators | |
inline __device__ __host__ float norm1DSquared() const | |
{ | |
cudaAssert(M==1 || N==1); | |
float sum = 0.0f; | |
for(unsigned int i = 0; i<(unsigned int)max(N, M); i++) sum += entries[i]*entries[i]; | |
return sum; | |
} | |
inline __device__ __host__ float norm1D() const | |
{ | |
return sqrt(norm1DSquared()); | |
} | |
private: | |
union | |
{ | |
float entries[N*M]; | |
float entries2D[N][M]; | |
}; | |
}; | |
////////////////////////////// | |
// Scalar - Matrix Multiplication | |
////////////////////////////// | |
template<unsigned int N, unsigned int M> | |
inline __device__ __host__ matNxM<N,M> operator*(const float t, const matNxM<N, M>& mat) | |
{ | |
matNxM<N,M> res = mat; | |
res*=t; | |
return res; | |
} | |
////////////////////////////// | |
// Matrix Inversion | |
////////////////////////////// | |
template<> | |
inline __device__ __host__ float matNxM<3, 3>::det() const | |
{ | |
const float& m11 = entries2D[0][0]; | |
const float& m12 = entries2D[0][1]; | |
const float& m13 = entries2D[0][2]; | |
const float& m21 = entries2D[1][0]; | |
const float& m22 = entries2D[1][1]; | |
const float& m23 = entries2D[1][2]; | |
const float& m31 = entries2D[2][0]; | |
const float& m32 = entries2D[2][1]; | |
const float& m33 = entries2D[2][2]; | |
return m11*m22*m33 + m12*m23*m31 + m13*m21*m32 - m31*m22*m13 - m32*m23*m11 - m33*m21*m12; | |
} | |
template<> | |
inline __device__ __host__ matNxM<3, 3> matNxM<3, 3>::getInverse() const | |
{ | |
matNxM<3, 3> res; | |
res.entries[0] = entries[4]*entries[8] - entries[5]*entries[7]; | |
res.entries[1] = -entries[1]*entries[8] + entries[2]*entries[7]; | |
res.entries[2] = entries[1]*entries[5] - entries[2]*entries[4]; | |
res.entries[3] = -entries[3]*entries[8] + entries[5]*entries[6]; | |
res.entries[4] = entries[0]*entries[8] - entries[2]*entries[6]; | |
res.entries[5] = -entries[0]*entries[5] + entries[2]*entries[3]; | |
res.entries[6] = entries[3]*entries[7] - entries[4]*entries[6]; | |
res.entries[7] = -entries[0]*entries[7] + entries[1]*entries[6]; | |
res.entries[8] = entries[0]*entries[4] - entries[1]*entries[3]; | |
return res*(1.0f/det()); | |
} | |
template<> | |
inline __device__ __host__ float matNxM<2, 2>::det() const | |
{ | |
return (*this)(0, 0)*(*this)(1, 1)-(*this)(1, 0)*(*this)(0, 1); | |
} | |
template<> | |
inline __device__ __host__ matNxM<2, 2> matNxM<2, 2>::getInverse() const | |
{ | |
matNxM<2, 2> res; | |
res(0, 0) = (*this)(1, 1); res(0, 1) = -(*this)(0, 1); | |
res(1, 0) = -(*this)(1, 0); res(1, 1) = (*this)(0, 0); | |
return res*(1.0f/det()); | |
} | |
////////////////////////////// | |
// Conversion | |
////////////////////////////// | |
// To Matrix from floatNxN | |
template<> | |
template<> | |
inline __device__ __host__ matNxM<1, 1>::matNxM(const float& other) | |
{ | |
entries[0] = other; | |
} | |
// To Matrix from floatNxN | |
template<> | |
template<> | |
inline __device__ __host__ matNxM<2, 2>::matNxM(const float2x2& other) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<4; i++) entries[i] = other.entries[i]; | |
} | |
template<> | |
template<> | |
inline __device__ __host__ matNxM<3, 3>::matNxM(const float3x3& other) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<9; i++) entries[i] = other.entries[i]; | |
} | |
template<> | |
template<> | |
inline __device__ __host__ matNxM<4, 4>::matNxM(const float4x4& other) | |
{ | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<16; i++) entries[i] = other.entries[i]; | |
} | |
template<> | |
template<> | |
inline __device__ __host__ matNxM<3, 2>::matNxM(const float3& col0, const float3& col1) | |
{ | |
entries2D[0][0] = col0.x; entries2D[0][1] = col1.x; | |
entries2D[1][0] = col0.y; entries2D[1][1] = col1.y; | |
entries2D[2][0] = col0.z; entries2D[2][1] = col1.z; | |
} | |
// To floatNxM from Matrix | |
template<> | |
inline __device__ __host__ matNxM<4, 4>::operator float4x4() | |
{ | |
float4x4 res; | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<16; i++) res.entries[i] = entries[i]; | |
return res; | |
} | |
template<> | |
inline __device__ __host__ matNxM<3, 3>::operator float3x3() | |
{ | |
float3x3 res; | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<9; i++) res.entries[i] = entries[i]; | |
return res; | |
} | |
template<> | |
inline __device__ __host__ matNxM<2, 2>::operator float2x2() | |
{ | |
float2x2 res; | |
__CONDITIONAL_UNROLL__ | |
for(unsigned int i = 0; i<4; i++) res.entries[i] = entries[i]; | |
return res; | |
} | |
// To Matrix from floatN | |
template<> | |
template<> | |
inline __device__ __host__ matNxM<2, 1>::matNxM(const float2& other) | |
{ | |
entries[0] = other.x; | |
entries[1] = other.y; | |
} | |
template<> | |
template<> | |
inline __device__ __host__ matNxM<3, 1>::matNxM(const float3& other) | |
{ | |
entries[0] = other.x; | |
entries[1] = other.y; | |
entries[2] = other.z; | |
} | |
template<> | |
template<> | |
inline __device__ __host__ matNxM<4, 1>::matNxM(const float4& other) | |
{ | |
entries[0] = other.x; | |
entries[1] = other.y; | |
entries[2] = other.z; | |
entries[3] = other.w; | |
} | |
// To floatN from Matrix | |
template<> | |
inline __device__ __host__ matNxM<1, 1>::operator float() | |
{ | |
return entries[0]; | |
} | |
template<> | |
inline __device__ __host__ matNxM<2, 1>::operator float2() | |
{ | |
return make_float2(entries[0], entries[1]); | |
} | |
template<> | |
inline __device__ __host__ matNxM<3, 1>::operator float3() | |
{ | |
return make_float3(entries[0], entries[1], entries[2]); | |
} | |
template<> | |
inline __device__ __host__ matNxM<4, 1>::operator float4() | |
{ | |
return make_float4(entries[0], entries[1], entries[2], entries[3]); | |
} | |
////////////////////////////// | |
// Typedefs | |
////////////////////////////// | |
typedef matNxM<9, 3> mat9x3; | |
typedef matNxM<3, 9> mat3x9; | |
typedef matNxM<9, 1> mat9x1; | |
typedef matNxM<1, 9> mat1x9; | |
typedef matNxM<6, 6> mat6x6; | |
typedef matNxM<6, 1> mat6x1; | |
typedef matNxM<1, 6> mat1x6; | |
typedef matNxM<3, 6> mat3x6; | |
typedef matNxM<6, 3> mat6x3; | |
typedef matNxM<4, 4> mat4x4; | |
typedef matNxM<4, 1> mat4x1; | |
typedef matNxM<1, 4> mat1x4; | |
typedef matNxM<3, 3> mat3x3; | |
typedef matNxM<2, 3> mat2x3; | |
typedef matNxM<3, 2> mat3x2; | |
typedef matNxM<2, 2> mat2x2; | |
typedef matNxM<1, 2> mat1x2; | |
typedef matNxM<2, 1> mat2x1; | |
typedef matNxM<1, 3> mat1x3; | |
typedef matNxM<3, 1> mat3x1; | |
typedef matNxM<1, 1> mat1x1; | |
typedef matNxM<16, 1> mat16x1; | |
typedef matNxM<16, 3> mat16x3; | |
typedef matNxM<3, 16> mat3x16; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment