From 5df0bae04541671da56dcb911198439abbcd3312 Mon Sep 17 00:00:00 2001 From: Phillip Stephens Date: Sun, 19 Apr 2015 16:15:32 -0700 Subject: [PATCH] * Update MathLib --- CPlane.hpp | 21 ++++---- CProjection.cpp | 2 - CProjection.hpp | 126 +++++++++++++++++++++++++++++++++++++----------- CVector3f.cpp | 26 +--------- CVector3f.hpp | 20 ++++++-- 5 files changed, 130 insertions(+), 65 deletions(-) diff --git a/CPlane.hpp b/CPlane.hpp index 084669a..c9570c6 100644 --- a/CPlane.hpp +++ b/CPlane.hpp @@ -9,14 +9,7 @@ class ZE_ALIGN(16) CPlane public: ZE_DECLARE_ALIGNED_ALLOCATOR(); - CPlane() - { -#if __SSE__ - mVec128 = _mm_xor_ps(mVec128, mVec128); -#else - a = 0.0f; b = 0.0f; c = 0.0f; d = 0.0f; -#endif - } + inline CPlane() {} CPlane(float a, float b, float c, float d) : a(a), b(b), c(c), d(d) {} CPlane(const CVector3f& point, float displacement) { @@ -28,7 +21,16 @@ public: d = displacement; } -protected: + inline void normalize() + { + float nd = d; + float mag = vec.length(); + assert(mag != 0.0f); + mag = 1.0 / mag; + vec *= mag; + d = nd * mag; + } + union { struct @@ -36,6 +38,7 @@ protected: float a, b, c, d; }; float p[4]; + CVector3f vec; #ifdef __SSE__ __m128 mVec128; #endif diff --git a/CProjection.cpp b/CProjection.cpp index cde161c..6349289 100644 --- a/CProjection.cpp +++ b/CProjection.cpp @@ -70,5 +70,3 @@ void CProjection::_updateCachedMatrix() throw std::runtime_error("attempted to cache invalid projection type"); } - - diff --git a/CProjection.hpp b/CProjection.hpp index 6a59122..4c441ec 100644 --- a/CProjection.hpp +++ b/CProjection.hpp @@ -7,38 +7,110 @@ #define _USE_MATH_DEFINES 1 #include -typedef union +union TMatrix4f { float m[4][4]; #if __SSE__ __m128 mVec128[4]; #endif -} TMatrix4f; -static inline void copyMatrix4f(TMatrix4f& dest, const TMatrix4f& src) -{ + inline TMatrix4f transposed() + { + TMatrix4f ret; #if __SSE__ - dest.mVec128[0] = src.mVec128[0]; - dest.mVec128[1] = src.mVec128[1]; - dest.mVec128[2] = src.mVec128[2]; - dest.mVec128[3] = src.mVec128[3]; + __m128 T0 = _mm_unpacklo_ps(mVec128[0], mVec128[1]); + __m128 T2 = _mm_unpacklo_ps(mVec128[2], mVec128[3]); + __m128 T1 = _mm_unpackhi_ps(mVec128[0], mVec128[1]); + __m128 T3 = _mm_unpackhi_ps(mVec128[2], mVec128[3]); + ret.mVec128[0] = _mm_movelh_ps(T0, T2); + ret.mVec128[1] = _mm_movehl_ps(T2, T0); + ret.mVec128[2] = _mm_movelh_ps(T1, T3); + ret.mVec128[3] = _mm_movehl_ps(T3, T1); #else - dest.m[0][0] = src.m[0][0]; - dest.m[0][1] = src.m[0][1]; - dest.m[0][2] = src.m[0][2]; - dest.m[0][3] = src.m[0][3]; - dest.m[1][0] = src.m[1][0]; - dest.m[1][1] = src.m[1][1]; - dest.m[1][2] = src.m[1][2]; - dest.m[1][3] = src.m[1][3]; - dest.m[2][0] = src.m[2][0]; - dest.m[2][1] = src.m[2][1]; - dest.m[2][2] = src.m[2][2]; - dest.m[2][3] = src.m[2][3]; - dest.m[3][0] = src.m[3][0]; - dest.m[3][1] = src.m[3][1]; - dest.m[3][2] = src.m[3][2]; - dest.m[3][3] = src.m[3][3]; + ret.m[0][0] = m[0][0]; + ret.m[1][0] = m[0][1]; + ret.m[2][0] = m[0][2]; + ret.m[3][0] = m[0][3]; + + ret.m[0][1] = m[1][0]; + ret.m[1][1] = m[1][1]; + ret.m[2][1] = m[1][2]; + ret.m[3][1] = m[1][3]; + + ret.m[0][2] = m[2][0]; + ret.m[1][2] = m[2][1]; + ret.m[2][2] = m[2][2]; + ret.m[3][2] = m[2][3]; + + ret.m[0][3] = m[3][0]; + ret.m[1][3] = m[3][1]; + ret.m[2][3] = m[3][2]; + ret.m[3][3] = m[3][3]; #endif + return ret; + } + inline TMatrix4f& operator=(const TMatrix4f& other) + { +#if __SSE__ + mVec128[0] = other.mVec128[0]; + mVec128[1] = other.mVec128[1]; + mVec128[2] = other.mVec128[2]; + mVec128[3] = other.mVec128[3]; +#else + m[0][0] = other.m[0][0]; + m[0][1] = other.m[0][1]; + m[0][2] = other.m[0][2]; + m[0][3] = other.m[0][3]; + m[1][0] = other.m[1][0]; + m[1][1] = other.m[1][1]; + m[1][2] = other.m[1][2]; + m[1][3] = other.m[1][3]; + m[2][0] = other.m[2][0]; + m[2][1] = other.m[2][1]; + m[2][2] = other.m[2][2]; + m[2][3] = other.m[2][3]; + m[3][0] = other.m[3][0]; + m[3][1] = other.m[3][1]; + m[3][2] = other.m[3][2]; + m[3][3] = other.m[3][3]; +#endif + return *this; + } +}; +static inline TMatrix4f operator*(const TMatrix4f& lhs, const TMatrix4f& rhs) +{ + TMatrix4f ret; +#if __SSE__ + unsigned i; + for (i=0 ; i<4 ; ++i) { + ret.mVec128[i] = + _mm_add_ps(_mm_add_ps(_mm_add_ps( + _mm_mul_ps(lhs.mVec128[0], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(0, 0, 0, 0))), + _mm_mul_ps(lhs.mVec128[1], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(1, 1, 1, 1)))), + _mm_mul_ps(lhs.mVec128[2], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(2, 2, 2, 2)))), + _mm_mul_ps(lhs.mVec128[3], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(3, 3, 3, 3)))); + } +#else + ret.m[0][0] = lhs.m[0][0]*rhs.m[0][0] + lhs.m[1][0]*rhs.m[0][1] + lhs.m[2][0]*rhs.m[0][2] + lhs.m[3][0]*rhs.m[0][3]; + ret.m[1][0] = lhs.m[0][0]*rhs.m[1][0] + lhs.m[1][0]*rhs.m[1][1] + lhs.m[2][0]*rhs.m[1][2] + lhs.m[3][0]*rhs.m[1][3]; + ret.m[2][0] = lhs.m[0][0]*rhs.m[2][0] + lhs.m[1][0]*rhs.m[2][1] + lhs.m[2][0]*rhs.m[2][2] + lhs.m[3][0]*rhs.m[2][3]; + ret.m[3][0] = lhs.m[0][0]*rhs.m[3][0] + lhs.m[1][0]*rhs.m[3][1] + lhs.m[2][0]*rhs.m[3][2] + lhs.m[3][0]*rhs.m[3][3]; + + ret.m[0][1] = lhs.m[0][1]*rhs.m[0][0] + lhs.m[1][1]*rhs.m[0][1] + lhs.m[2][1]*rhs.m[0][2] + lhs.m[3][1]*rhs.m[0][3]; + ret.m[1][1] = lhs.m[0][1]*rhs.m[1][0] + lhs.m[1][1]*rhs.m[1][1] + lhs.m[2][1]*rhs.m[1][2] + lhs.m[3][1]*rhs.m[1][3]; + ret.m[2][1] = lhs.m[0][1]*rhs.m[2][0] + lhs.m[1][1]*rhs.m[2][1] + lhs.m[2][1]*rhs.m[2][2] + lhs.m[3][1]*rhs.m[2][3]; + ret.m[3][1] = lhs.m[0][1]*rhs.m[3][0] + lhs.m[1][1]*rhs.m[3][1] + lhs.m[2][1]*rhs.m[3][2] + lhs.m[3][1]*rhs.m[3][3]; + + ret.m[0][2] = lhs.m[0][2]*rhs.m[0][0] + lhs.m[1][2]*rhs.m[0][1] + lhs.m[2][2]*rhs.m[0][2] + lhs.m[3][2]*rhs.m[0][3]; + ret.m[1][2] = lhs.m[0][2]*rhs.m[1][0] + lhs.m[1][2]*rhs.m[1][1] + lhs.m[2][2]*rhs.m[1][2] + lhs.m[3][2]*rhs.m[1][3]; + ret.m[2][2] = lhs.m[0][2]*rhs.m[2][0] + lhs.m[1][2]*rhs.m[2][1] + lhs.m[2][2]*rhs.m[2][2] + lhs.m[3][2]*rhs.m[2][3]; + ret.m[3][2] = lhs.m[0][2]*rhs.m[3][0] + lhs.m[1][2]*rhs.m[3][1] + lhs.m[2][2]*rhs.m[3][2] + lhs.m[3][2]*rhs.m[3][3]; + + ret.m[0][3] = lhs.m[0][3]*rhs.m[0][0] + lhs.m[1][3]*rhs.m[0][1] + lhs.m[2][3]*rhs.m[0][2] + lhs.m[3][3]*rhs.m[0][3]; + ret.m[1][3] = lhs.m[0][3]*rhs.m[1][0] + lhs.m[1][3]*rhs.m[1][1] + lhs.m[2][3]*rhs.m[1][2] + lhs.m[3][3]*rhs.m[1][3]; + ret.m[2][3] = lhs.m[0][3]*rhs.m[2][0] + lhs.m[1][3]*rhs.m[2][1] + lhs.m[2][3]*rhs.m[2][2] + lhs.m[3][3]*rhs.m[2][3]; + ret.m[3][3] = lhs.m[0][3]*rhs.m[3][0] + lhs.m[1][3]*rhs.m[3][1] + lhs.m[2][3]*rhs.m[2][2] + lhs.m[3][3]*rhs.m[3][3]; +#endif + return ret; } extern const TMatrix4f kIdentityMtx4; @@ -73,7 +145,7 @@ public: { m_projType = PROJ_ORTHO; m_ortho = SProjOrtho(); - copyMatrix4f(m_mtx, kIdentityMtx4); + m_mtx = kIdentityMtx4; } CProjection(const CProjection& other) {*this = other;} CProjection(const SProjOrtho& ortho) {setOrtho(ortho);} @@ -85,7 +157,7 @@ public: { m_projType = other.m_projType; m_ortho = other.m_ortho; - copyMatrix4f(m_mtx, other.m_mtx); + m_mtx = other.m_mtx; } return *this; } @@ -109,7 +181,7 @@ public: return m_persp; } - inline const TMatrix4f& getCachedMatrix() {return m_mtx;} + inline const TMatrix4f& getCachedMatrix() const {return m_mtx;} protected: diff --git a/CVector3f.cpp b/CVector3f.cpp index 42efdfd..5869dea 100644 --- a/CVector3f.cpp +++ b/CVector3f.cpp @@ -4,32 +4,10 @@ #include #include "Math.hpp" -const CVector3f CVector3f::skOne = CVector3f(1); +const CVector3f CVector3f::skOne = CVector3f(1.0); +const CVector3f CVector3f::skNegOne = CVector3f(-1.0); const CVector3f CVector3f::skZero; -void CVector3f::normalize() -{ - float mag = length(); - assert(mag != 0.0); - - x /= mag; - y /= mag; - z /= mag; -} - -CVector3f CVector3f::normalized() const -{ - CVector3f ret; - float mag = length(); - assert(mag != 0.0); - - ret.x = x/mag; - ret.y = y/mag; - ret.z = z/mag; - - return ret; -} - float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b) { float mag1 = a.length(); diff --git a/CVector3f.hpp b/CVector3f.hpp index 975f233..df3497a 100644 --- a/CVector3f.hpp +++ b/CVector3f.hpp @@ -4,6 +4,7 @@ #include "Global.hpp" #include #include +#include typedef union { @@ -18,7 +19,7 @@ class ZE_ALIGN(16) CVector3f public: ZE_DECLARE_ALIGNED_ALLOCATOR(); - CVector3f() {zeroOut();} + inline CVector3f() {zeroOut();} #if __SSE__ CVector3f(const __m128& mVec128) : mVec128(mVec128) {v[3] = 0.0f;} #endif @@ -148,8 +149,20 @@ public: #endif return *this; } - void normalize(); - CVector3f normalized() const; + inline void normalize() + { + float mag = length(); + assert(mag != 0.0); + mag = 1.0 / mag; + *this *= mag; + } + inline CVector3f normalized() const + { + float mag = length(); + assert(mag != 0.0); + mag = 1.0 / mag; + return *this * mag; + } inline CVector3f cross(const CVector3f& rhs) const { return CVector3f(y * rhs.z - z * rhs.y, z * rhs.x - x * rhs.z, x * rhs.y - y * rhs.x); @@ -236,6 +249,7 @@ public: }; static const CVector3f skOne; + static const CVector3f skNegOne; static const CVector3f skZero; };