Use reciprocal-multiplication for vector-scalar division

This commit is contained in:
Jack Andersen 2018-11-02 22:44:52 -10:00
parent 0fc3e5e9ca
commit 18afb32077
3 changed files with 18 additions and 14 deletions

View File

@ -198,11 +198,12 @@ public:
}
inline CVector2f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_div_ps(mVec128, splat.mVec128));
TVectorUnion splat = {{ooval, ooval, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x / val, y / val);
return CVector2f(x * ooval, y * ooval);
#endif
}
inline const CVector2f& operator+=(const CVector2f& rhs)
@ -280,12 +281,13 @@ public:
}
inline const CVector2f& operator/=(float rhs)
{
float oorhs = 1.f / rhs;
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_div_ps(mVec128, splat.mVec128);
TVectorUnion splat = {{oorhs, oorhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x /= rhs;
y /= rhs;
x *= oorhs;
y *= oorhs;
#endif
return *this;
}

View File

@ -198,15 +198,16 @@ public:
}
inline CVector3f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{val, val, val, 0.0f}};
TVectorUnion splat = {{ooval, ooval, ooval, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_div_ps(mVec128, splat.mVec128));
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_div_ps(mVec128, splat.mVec128));
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x / val, y / val, z / val);
return CVector3f(x * ooval, y * ooval, z * ooval);
#endif
}
inline const CVector3f& operator+=(const CVector3f& rhs)

View File

@ -232,11 +232,12 @@ public:
}
inline CVector4f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_div_ps(mVec128, splat.mVec128));
TVectorUnion splat = {{ooval, ooval, ooval, ooval}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x / val, y / val, z / val, w / val);
return CVector4f(x * ooval, y * ooval, z * ooval, w * ooval);
#endif
}
inline const CVector4f& operator+=(const CVector4f& rhs)