Use reciprocal-multiplication for vector-scalar division

2025-12-10 05:57:56 +00:00 · 2018-11-02 22:44:52 -10:00
parent 0fc3e5e9ca
commit 18afb32077
3 changed files with 18 additions and 14 deletions
--- a/include/zeus/CVector2f.hpp
+++ b/include/zeus/CVector2f.hpp
@@ -198,11 +198,12 @@ public:
    }
    inline CVector2f operator/(float val) const
    {
+        float ooval = 1.f / val;
 #if __SSE__
-        TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
-        return CVector2f(_mm_div_ps(mVec128, splat.mVec128));
+        TVectorUnion splat = {{ooval, ooval, 0.0f, 0.0f}};
+        return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
 #else
-        return CVector2f(x / val, y / val);
+        return CVector2f(x * ooval, y * ooval);
 #endif
    }
    inline const CVector2f& operator+=(const CVector2f& rhs)
@@ -280,12 +281,13 @@ public:
    }
    inline const CVector2f& operator/=(float rhs)
    {
+        float oorhs = 1.f / rhs;
 #if __SSE__
-        TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
-        mVec128 = _mm_div_ps(mVec128, splat.mVec128);
+        TVectorUnion splat = {{oorhs, oorhs, 0.f, 0.0f}};
+        mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
 #else
-        x /= rhs;
-        y /= rhs;
+        x *= oorhs;
+        y *= oorhs;
 #endif
        return *this;
    }
--- a/include/zeus/CVector3f.hpp
+++ b/include/zeus/CVector3f.hpp
@@ -198,15 +198,16 @@ public:
    }
    inline CVector3f operator/(float val) const
    {
+        float ooval = 1.f / val;
 #if __SSE__ || __GEKKO_PS__
-        TVectorUnion splat = {{val, val, val, 0.0f}};
+        TVectorUnion splat = {{ooval, ooval, ooval, 0.0f}};
 #endif
 #if __SSE__
-        return CVector3f(_mm_div_ps(mVec128, splat.mVec128));
+        return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
 #elif __GEKKO_PS__
-        return CVector3f(_mm_gekko_div_ps(mVec128, splat.mVec128));
+        return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
 #else
-        return CVector3f(x / val, y / val, z / val);
+        return CVector3f(x * ooval, y * ooval, z * ooval);
 #endif
    }
    inline const CVector3f& operator+=(const CVector3f& rhs)
--- a/include/zeus/CVector4f.hpp
+++ b/include/zeus/CVector4f.hpp
@@ -232,11 +232,12 @@ public:
    }
    inline CVector4f operator/(float val) const
    {
+        float ooval = 1.f / val;
 #if __SSE__
-        TVectorUnion splat = {{val, val, val, val}};
-        return CVector4f(_mm_div_ps(mVec128, splat.mVec128));
+        TVectorUnion splat = {{ooval, ooval, ooval, ooval}};
+        return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
 #else
-        return CVector4f(x / val, y / val, z / val, w / val);
+        return CVector4f(x * ooval, y * ooval, z * ooval, w * ooval);
 #endif
    }
    inline const CVector4f& operator+=(const CVector4f& rhs)