* Update MathLib

2025-12-20 10:25:24 +00:00 · 2015-04-19 16:15:32 -07:00
parent c8fa20ddbf
commit 5df0bae045
5 changed files with 130 additions and 65 deletions
--- a/CPlane.hpp
+++ b/CPlane.hpp
@@ -9,14 +9,7 @@ class ZE_ALIGN(16) CPlane
 public:
    ZE_DECLARE_ALIGNED_ALLOCATOR();

-    CPlane()
-    {
-#if __SSE__
-        mVec128 = _mm_xor_ps(mVec128, mVec128);
-#else
-        a = 0.0f; b = 0.0f; c = 0.0f; d = 0.0f;
-#endif
-    }
+    inline CPlane() {}
    CPlane(float a, float b, float c, float d) : a(a), b(b), c(c), d(d) {}
    CPlane(const CVector3f& point, float displacement)
    {
@@ -28,7 +21,16 @@ public:
        d = displacement;
    }
    
-protected:
+    inline void normalize()
+    {
+        float nd = d;
+        float mag = vec.length();
+        assert(mag != 0.0f);
+        mag = 1.0 / mag;
+        vec *= mag;
+        d = nd * mag;
+    }
+    
    union
    {
        struct
@@ -36,6 +38,7 @@ protected:
            float a, b, c, d;
        };
        float p[4];
+        CVector3f vec;
 #ifdef __SSE__
        __m128 mVec128;
 #endif
--- a/CProjection.cpp
+++ b/CProjection.cpp
@@ -70,5 +70,3 @@ void CProjection::_updateCachedMatrix()
        throw std::runtime_error("attempted to cache invalid projection type");
 }

-
-
--- a/CProjection.hpp
+++ b/CProjection.hpp
@@ -7,38 +7,110 @@
 #define _USE_MATH_DEFINES 1
 #include <math.h>

-typedef union
+union TMatrix4f
 {
    float m[4][4];
 #if __SSE__
    __m128 mVec128[4];
 #endif
-} TMatrix4f;
-static inline void copyMatrix4f(TMatrix4f& dest, const TMatrix4f& src)
+    inline TMatrix4f transposed()
+    {
+        TMatrix4f ret;
+#if __SSE__
+        __m128 T0 = _mm_unpacklo_ps(mVec128[0], mVec128[1]);
+        __m128 T2 = _mm_unpacklo_ps(mVec128[2], mVec128[3]);
+        __m128 T1 = _mm_unpackhi_ps(mVec128[0], mVec128[1]);
+        __m128 T3 = _mm_unpackhi_ps(mVec128[2], mVec128[3]);
+        ret.mVec128[0] = _mm_movelh_ps(T0, T2);
+        ret.mVec128[1] = _mm_movehl_ps(T2, T0);
+        ret.mVec128[2] = _mm_movelh_ps(T1, T3);
+        ret.mVec128[3] = _mm_movehl_ps(T3, T1);
+#else
+        ret.m[0][0] = m[0][0];
+        ret.m[1][0] = m[0][1];
+        ret.m[2][0] = m[0][2];
+        ret.m[3][0] = m[0][3];
+        
+        ret.m[0][1] = m[1][0];
+        ret.m[1][1] = m[1][1];
+        ret.m[2][1] = m[1][2];
+        ret.m[3][1] = m[1][3];
+        
+        ret.m[0][2] = m[2][0];
+        ret.m[1][2] = m[2][1];
+        ret.m[2][2] = m[2][2];
+        ret.m[3][2] = m[2][3];
+        
+        ret.m[0][3] = m[3][0];
+        ret.m[1][3] = m[3][1];
+        ret.m[2][3] = m[3][2];
+        ret.m[3][3] = m[3][3];
+#endif
+        return ret;
+    }
+    inline TMatrix4f& operator=(const TMatrix4f& other)
    {
 #if __SSE__
-    dest.mVec128[0] = src.mVec128[0];
-    dest.mVec128[1] = src.mVec128[1];
-    dest.mVec128[2] = src.mVec128[2];
-    dest.mVec128[3] = src.mVec128[3];
+        mVec128[0] = other.mVec128[0];
+        mVec128[1] = other.mVec128[1];
+        mVec128[2] = other.mVec128[2];
+        mVec128[3] = other.mVec128[3];
 #else
-    dest.m[0][0] = src.m[0][0];
-    dest.m[0][1] = src.m[0][1];
-    dest.m[0][2] = src.m[0][2];
-    dest.m[0][3] = src.m[0][3];
-    dest.m[1][0] = src.m[1][0];
-    dest.m[1][1] = src.m[1][1];
-    dest.m[1][2] = src.m[1][2];
-    dest.m[1][3] = src.m[1][3];
-    dest.m[2][0] = src.m[2][0];
-    dest.m[2][1] = src.m[2][1];
-    dest.m[2][2] = src.m[2][2];
-    dest.m[2][3] = src.m[2][3];
-    dest.m[3][0] = src.m[3][0];
-    dest.m[3][1] = src.m[3][1];
-    dest.m[3][2] = src.m[3][2];
-    dest.m[3][3] = src.m[3][3];
+        m[0][0] = other.m[0][0];
+        m[0][1] = other.m[0][1];
+        m[0][2] = other.m[0][2];
+        m[0][3] = other.m[0][3];
+        m[1][0] = other.m[1][0];
+        m[1][1] = other.m[1][1];
+        m[1][2] = other.m[1][2];
+        m[1][3] = other.m[1][3];
+        m[2][0] = other.m[2][0];
+        m[2][1] = other.m[2][1];
+        m[2][2] = other.m[2][2];
+        m[2][3] = other.m[2][3];
+        m[3][0] = other.m[3][0];
+        m[3][1] = other.m[3][1];
+        m[3][2] = other.m[3][2];
+        m[3][3] = other.m[3][3];
 #endif
+        return *this;
+    }
+};
+static inline TMatrix4f operator*(const TMatrix4f& lhs, const TMatrix4f& rhs)
+{
+    TMatrix4f ret;
+#if __SSE__
+    unsigned i;
+    for (i=0 ; i<4 ; ++i) {
+        ret.mVec128[i] =
+        _mm_add_ps(_mm_add_ps(_mm_add_ps(
+                   _mm_mul_ps(lhs.mVec128[0], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(0, 0, 0, 0))),
+                   _mm_mul_ps(lhs.mVec128[1], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(1, 1, 1, 1)))),
+                   _mm_mul_ps(lhs.mVec128[2], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(2, 2, 2, 2)))),
+                   _mm_mul_ps(lhs.mVec128[3], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(3, 3, 3, 3))));
+    }
+#else
+    ret.m[0][0] = lhs.m[0][0]*rhs.m[0][0] + lhs.m[1][0]*rhs.m[0][1] + lhs.m[2][0]*rhs.m[0][2] + lhs.m[3][0]*rhs.m[0][3];
+    ret.m[1][0] = lhs.m[0][0]*rhs.m[1][0] + lhs.m[1][0]*rhs.m[1][1] + lhs.m[2][0]*rhs.m[1][2] + lhs.m[3][0]*rhs.m[1][3];
+    ret.m[2][0] = lhs.m[0][0]*rhs.m[2][0] + lhs.m[1][0]*rhs.m[2][1] + lhs.m[2][0]*rhs.m[2][2] + lhs.m[3][0]*rhs.m[2][3];
+    ret.m[3][0] = lhs.m[0][0]*rhs.m[3][0] + lhs.m[1][0]*rhs.m[3][1] + lhs.m[2][0]*rhs.m[3][2] + lhs.m[3][0]*rhs.m[3][3];
+    
+    ret.m[0][1] = lhs.m[0][1]*rhs.m[0][0] + lhs.m[1][1]*rhs.m[0][1] + lhs.m[2][1]*rhs.m[0][2] + lhs.m[3][1]*rhs.m[0][3];
+    ret.m[1][1] = lhs.m[0][1]*rhs.m[1][0] + lhs.m[1][1]*rhs.m[1][1] + lhs.m[2][1]*rhs.m[1][2] + lhs.m[3][1]*rhs.m[1][3];
+    ret.m[2][1] = lhs.m[0][1]*rhs.m[2][0] + lhs.m[1][1]*rhs.m[2][1] + lhs.m[2][1]*rhs.m[2][2] + lhs.m[3][1]*rhs.m[2][3];
+    ret.m[3][1] = lhs.m[0][1]*rhs.m[3][0] + lhs.m[1][1]*rhs.m[3][1] + lhs.m[2][1]*rhs.m[3][2] + lhs.m[3][1]*rhs.m[3][3];
+    
+    ret.m[0][2] = lhs.m[0][2]*rhs.m[0][0] + lhs.m[1][2]*rhs.m[0][1] + lhs.m[2][2]*rhs.m[0][2] + lhs.m[3][2]*rhs.m[0][3];
+    ret.m[1][2] = lhs.m[0][2]*rhs.m[1][0] + lhs.m[1][2]*rhs.m[1][1] + lhs.m[2][2]*rhs.m[1][2] + lhs.m[3][2]*rhs.m[1][3];
+    ret.m[2][2] = lhs.m[0][2]*rhs.m[2][0] + lhs.m[1][2]*rhs.m[2][1] + lhs.m[2][2]*rhs.m[2][2] + lhs.m[3][2]*rhs.m[2][3];
+    ret.m[3][2] = lhs.m[0][2]*rhs.m[3][0] + lhs.m[1][2]*rhs.m[3][1] + lhs.m[2][2]*rhs.m[3][2] + lhs.m[3][2]*rhs.m[3][3];
+    
+    ret.m[0][3] = lhs.m[0][3]*rhs.m[0][0] + lhs.m[1][3]*rhs.m[0][1] + lhs.m[2][3]*rhs.m[0][2] + lhs.m[3][3]*rhs.m[0][3];
+    ret.m[1][3] = lhs.m[0][3]*rhs.m[1][0] + lhs.m[1][3]*rhs.m[1][1] + lhs.m[2][3]*rhs.m[1][2] + lhs.m[3][3]*rhs.m[1][3];
+    ret.m[2][3] = lhs.m[0][3]*rhs.m[2][0] + lhs.m[1][3]*rhs.m[2][1] + lhs.m[2][3]*rhs.m[2][2] + lhs.m[3][3]*rhs.m[2][3];
+    ret.m[3][3] = lhs.m[0][3]*rhs.m[3][0] + lhs.m[1][3]*rhs.m[3][1] + lhs.m[2][3]*rhs.m[2][2] + lhs.m[3][3]*rhs.m[3][3];
+#endif
+    return ret;
 }
 extern const TMatrix4f kIdentityMtx4;

@@ -73,7 +145,7 @@ public:
    {
        m_projType = PROJ_ORTHO;
        m_ortho = SProjOrtho();
-        copyMatrix4f(m_mtx, kIdentityMtx4);
+        m_mtx = kIdentityMtx4;
    }
    CProjection(const CProjection& other) {*this = other;}
    CProjection(const SProjOrtho& ortho) {setOrtho(ortho);}
@@ -85,7 +157,7 @@ public:
        {
            m_projType = other.m_projType;
            m_ortho = other.m_ortho;
-            copyMatrix4f(m_mtx, other.m_mtx);
+            m_mtx = other.m_mtx;
        }
        return *this;
    }
@@ -109,7 +181,7 @@ public:
        return m_persp;
    }
    
-    inline const TMatrix4f& getCachedMatrix() {return m_mtx;}
+    inline const TMatrix4f& getCachedMatrix() const {return m_mtx;}
    
 protected:

--- a/CVector3f.cpp
+++ b/CVector3f.cpp
@@ -4,32 +4,10 @@
 #include <assert.h>
 #include "Math.hpp"

-const CVector3f CVector3f::skOne = CVector3f(1);
+const CVector3f CVector3f::skOne = CVector3f(1.0);
+const CVector3f CVector3f::skNegOne = CVector3f(-1.0);
 const CVector3f CVector3f::skZero;

-void CVector3f::normalize()
-{
-    float mag = length();
-    assert(mag != 0.0);
-    
-    x /= mag;
-    y /= mag;
-    z /= mag;
-}
-
-CVector3f CVector3f::normalized() const
-{
-    CVector3f ret;
-    float mag = length();
-    assert(mag != 0.0);
-
-    ret.x = x/mag;
-    ret.y = y/mag;
-    ret.z = z/mag;
-
-    return ret;
-}
-
 float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b)
 {
    float mag1 = a.length();
--- a/CVector3f.hpp
+++ b/CVector3f.hpp
@@ -4,6 +4,7 @@
 #include "Global.hpp"
 #include <Athena/IStreamReader.hpp>
 #include <math.h>
+#include <assert.h>

 typedef union
 {
@@ -18,7 +19,7 @@ class ZE_ALIGN(16) CVector3f
 public:
    ZE_DECLARE_ALIGNED_ALLOCATOR();
    
-    CVector3f() {zeroOut();}
+    inline CVector3f() {zeroOut();}
 #if __SSE__
    CVector3f(const __m128& mVec128) : mVec128(mVec128) {v[3] = 0.0f;}
 #endif
@@ -148,8 +149,20 @@ public:
 #endif
        return *this;
    }
-    void normalize();
-    CVector3f normalized() const;
+    inline void normalize()
+    {
+        float mag = length();
+        assert(mag != 0.0);
+        mag = 1.0 / mag;
+        *this *= mag;
+    }
+    inline CVector3f normalized() const
+    {
+        float mag = length();
+        assert(mag != 0.0);
+        mag = 1.0 / mag;
+        return *this * mag;
+    }
    inline CVector3f cross(const CVector3f& rhs) const
    {
        return CVector3f(y * rhs.z - z * rhs.y, z * rhs.x - x * rhs.z, x * rhs.y - y * rhs.x);
@@ -236,6 +249,7 @@ public:
    };

    static const CVector3f skOne;
+    static const CVector3f skNegOne;
    static const CVector3f skZero;
 };