Various optimizations

2025-08-17 09:21:50 +00:00 · 2016-02-14 17:57:57 -10:00 · 2016-02-14 17:57:57 -10:00 · bd88f654a9
commit bd88f654a9
parent bbb81c96b8
6 changed files with 36 additions and 44 deletions
--- a/include/CFrustum.hpp
+++ b/include/CFrustum.hpp
@ -13,8 +13,7 @@ public:
    
    inline void updatePlanes(const CTransform& modelview, const CProjection& projection)
    {
-        CMatrix4f mv;
-        modelview.toMatrix4f(mv);
+        CMatrix4f mv = modelview.toMatrix4f();
        CMatrix4f mvp = projection.getCachedMatrix() * mv;
        CMatrix4f mvp_rm = mvp.transposed();
        
--- a/include/CMatrix3f.hpp
+++ b/include/CMatrix3f.hpp
@ -127,7 +127,32 @@ public:
    };
 };

-CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs);
+static CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs)
+{
+#if __SSE__
+    unsigned i;
+    TVectorUnion resVec[3];
+    for (i=0 ; i<3 ; ++i) {
+        resVec[i].mVec128 =
+        _mm_add_ps(_mm_add_ps(
+                   _mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
+                   _mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
+                   _mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
+        resVec[i].v[3] = 0.0;
+    }
+    return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
+#else
+    return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
+                     lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
+                     lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
+                     lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
+                     lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
+                     lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
+                     lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
+                     lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
+                     lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
+#endif
+}

 }

--- a/include/CTransform.hpp
+++ b/include/CTransform.hpp
@ -170,19 +170,14 @@ public:

    inline CVector3f operator*(const CVector3f& other) const {return m_origin + m_basis * other;}
    
-    inline void toMatrix4f(CMatrix4f& mat) const
+    inline CMatrix4f toMatrix4f() const
    {
-#if __SSE__
-        mat.vec[0].mVec128 = m_basis[0].mVec128; mat.m[0][3] = 0.0f;
-        mat.vec[1].mVec128 = m_basis[1].mVec128; mat.m[1][3] = 0.0f;
-        mat.vec[2].mVec128 = m_basis[2].mVec128; mat.m[2][3] = 0.0f;
-        mat.vec[3].mVec128 = m_origin.mVec128; mat.m[3][3] = 1.0f;
-#else
-        mat.m[0][0] = m_basis[0][0]; mat.m[0][1] = m_basis[0][1]; mat.m[0][2] = m_basis[0][2]; mat.m[0][3] = 0.0f;
-        mat.m[1][0] = m_basis[1][0]; mat.m[1][1] = m_basis[1][1]; mat.m[1][2] = m_basis[1][2]; mat.m[1][3] = 0.0f;
-        mat.m[2][0] = m_basis[2][0]; mat.m[2][1] = m_basis[2][1]; mat.m[2][2] = m_basis[2][2]; mat.m[2][3] = 0.0f;
-        mat.m[3][0] = m_origin[0]; mat.m[3][1] = m_origin[1]; mat.m[3][2] = m_origin[2]; mat.m[3][3] = 1.0f;
-#endif
+        CMatrix4f ret(m_basis[0], m_basis[1], m_basis[2], m_origin);
+        ret[0][3] = 0.0f;
+        ret[1][3] = 0.0f;
+        ret[2][3] = 0.0f;
+        ret[3][3] = 1.0f;
+        return ret;
    }

    static inline CTransform fromColumns(const CVector3f& m0, const CVector3f& m1, const CVector3f& m2, const CVector3f& m3)
--- a/include/Math.hpp
+++ b/include/Math.hpp
@ -32,7 +32,7 @@ const bool AESNI         = false;
 * Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
 */
 void detectCPU();
-const CPUInfo cpuFeatures();
+const CPUInfo& cpuFeatures();
 class CVector3f;
 class CTransform;
 namespace Math
--- a/src/CMatrix3f.cpp
+++ b/src/CMatrix3f.cpp
@ -30,33 +30,6 @@ CMatrix3f::CMatrix3f(const CQuaternion& quat)
    m[2][3] = 0.0f;
 }

-CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs)
-{
-#if __SSE__
-    unsigned i;
-    TVectorUnion resVec[3];
-    for (i=0 ; i<3 ; ++i) {
-        resVec[i].mVec128 =
-        _mm_add_ps(_mm_add_ps(
-                   _mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
-                   _mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
-                   _mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
-        resVec[i].v[3] = 0.0;
-    }
-    return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
-#else
-    return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
-                     lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
-                     lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
-                     lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
-                     lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
-                     lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
-                     lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
-                     lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
-                     lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
-#endif
-}
-
 void CMatrix3f::transpose()
 {
 #if __SSE__
--- a/src/Math.cpp
+++ b/src/Math.cpp
@ -64,7 +64,7 @@ void detectCPU()
 }


-const CPUInfo cpuFeatures() { return g_cpuFeatures; }
+const CPUInfo& cpuFeatures() { return g_cpuFeatures; }

 namespace Math
 {