Various optimizations

This commit is contained in:
Jack Andersen 2016-02-14 17:57:57 -10:00
parent bbb81c96b8
commit bd88f654a9
6 changed files with 36 additions and 44 deletions

View File

@ -13,8 +13,7 @@ public:
inline void updatePlanes(const CTransform& modelview, const CProjection& projection)
{
CMatrix4f mv;
modelview.toMatrix4f(mv);
CMatrix4f mv = modelview.toMatrix4f();
CMatrix4f mvp = projection.getCachedMatrix() * mv;
CMatrix4f mvp_rm = mvp.transposed();

View File

@ -127,7 +127,32 @@ public:
};
};
CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs);
static CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs)
{
#if __SSE__
unsigned i;
TVectorUnion resVec[3];
for (i=0 ; i<3 ; ++i) {
resVec[i].mVec128 =
_mm_add_ps(_mm_add_ps(
_mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
_mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
_mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
resVec[i].v[3] = 0.0;
}
return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
#else
return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
#endif
}
}

View File

@ -170,19 +170,14 @@ public:
inline CVector3f operator*(const CVector3f& other) const {return m_origin + m_basis * other;}
inline void toMatrix4f(CMatrix4f& mat) const
inline CMatrix4f toMatrix4f() const
{
#if __SSE__
mat.vec[0].mVec128 = m_basis[0].mVec128; mat.m[0][3] = 0.0f;
mat.vec[1].mVec128 = m_basis[1].mVec128; mat.m[1][3] = 0.0f;
mat.vec[2].mVec128 = m_basis[2].mVec128; mat.m[2][3] = 0.0f;
mat.vec[3].mVec128 = m_origin.mVec128; mat.m[3][3] = 1.0f;
#else
mat.m[0][0] = m_basis[0][0]; mat.m[0][1] = m_basis[0][1]; mat.m[0][2] = m_basis[0][2]; mat.m[0][3] = 0.0f;
mat.m[1][0] = m_basis[1][0]; mat.m[1][1] = m_basis[1][1]; mat.m[1][2] = m_basis[1][2]; mat.m[1][3] = 0.0f;
mat.m[2][0] = m_basis[2][0]; mat.m[2][1] = m_basis[2][1]; mat.m[2][2] = m_basis[2][2]; mat.m[2][3] = 0.0f;
mat.m[3][0] = m_origin[0]; mat.m[3][1] = m_origin[1]; mat.m[3][2] = m_origin[2]; mat.m[3][3] = 1.0f;
#endif
CMatrix4f ret(m_basis[0], m_basis[1], m_basis[2], m_origin);
ret[0][3] = 0.0f;
ret[1][3] = 0.0f;
ret[2][3] = 0.0f;
ret[3][3] = 1.0f;
return ret;
}
static inline CTransform fromColumns(const CVector3f& m0, const CVector3f& m1, const CVector3f& m2, const CVector3f& m3)

View File

@ -32,7 +32,7 @@ const bool AESNI = false;
* Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
*/
void detectCPU();
const CPUInfo cpuFeatures();
const CPUInfo& cpuFeatures();
class CVector3f;
class CTransform;
namespace Math

View File

@ -30,33 +30,6 @@ CMatrix3f::CMatrix3f(const CQuaternion& quat)
m[2][3] = 0.0f;
}
CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs)
{
#if __SSE__
unsigned i;
TVectorUnion resVec[3];
for (i=0 ; i<3 ; ++i) {
resVec[i].mVec128 =
_mm_add_ps(_mm_add_ps(
_mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
_mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
_mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
resVec[i].v[3] = 0.0;
}
return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
#else
return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
#endif
}
void CMatrix3f::transpose()
{
#if __SSE__

View File

@ -64,7 +64,7 @@ void detectCPU()
}
const CPUInfo cpuFeatures() { return g_cpuFeatures; }
const CPUInfo& cpuFeatures() { return g_cpuFeatures; }
namespace Math
{