mirror of https://github.com/AxioDL/zeus.git
Various optimizations
This commit is contained in:
parent
bbb81c96b8
commit
bd88f654a9
|
@ -13,8 +13,7 @@ public:
|
|||
|
||||
inline void updatePlanes(const CTransform& modelview, const CProjection& projection)
|
||||
{
|
||||
CMatrix4f mv;
|
||||
modelview.toMatrix4f(mv);
|
||||
CMatrix4f mv = modelview.toMatrix4f();
|
||||
CMatrix4f mvp = projection.getCachedMatrix() * mv;
|
||||
CMatrix4f mvp_rm = mvp.transposed();
|
||||
|
||||
|
|
|
@ -127,7 +127,32 @@ public:
|
|||
};
|
||||
};
|
||||
|
||||
CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs);
|
||||
static CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs)
|
||||
{
|
||||
#if __SSE__
|
||||
unsigned i;
|
||||
TVectorUnion resVec[3];
|
||||
for (i=0 ; i<3 ; ++i) {
|
||||
resVec[i].mVec128 =
|
||||
_mm_add_ps(_mm_add_ps(
|
||||
_mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
|
||||
_mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
|
||||
_mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
|
||||
resVec[i].v[3] = 0.0;
|
||||
}
|
||||
return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
|
||||
#else
|
||||
return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
|
||||
lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
|
||||
lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
|
||||
lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
|
||||
lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
|
||||
lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
|
||||
lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
|
||||
lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
|
||||
lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -170,19 +170,14 @@ public:
|
|||
|
||||
inline CVector3f operator*(const CVector3f& other) const {return m_origin + m_basis * other;}
|
||||
|
||||
inline void toMatrix4f(CMatrix4f& mat) const
|
||||
inline CMatrix4f toMatrix4f() const
|
||||
{
|
||||
#if __SSE__
|
||||
mat.vec[0].mVec128 = m_basis[0].mVec128; mat.m[0][3] = 0.0f;
|
||||
mat.vec[1].mVec128 = m_basis[1].mVec128; mat.m[1][3] = 0.0f;
|
||||
mat.vec[2].mVec128 = m_basis[2].mVec128; mat.m[2][3] = 0.0f;
|
||||
mat.vec[3].mVec128 = m_origin.mVec128; mat.m[3][3] = 1.0f;
|
||||
#else
|
||||
mat.m[0][0] = m_basis[0][0]; mat.m[0][1] = m_basis[0][1]; mat.m[0][2] = m_basis[0][2]; mat.m[0][3] = 0.0f;
|
||||
mat.m[1][0] = m_basis[1][0]; mat.m[1][1] = m_basis[1][1]; mat.m[1][2] = m_basis[1][2]; mat.m[1][3] = 0.0f;
|
||||
mat.m[2][0] = m_basis[2][0]; mat.m[2][1] = m_basis[2][1]; mat.m[2][2] = m_basis[2][2]; mat.m[2][3] = 0.0f;
|
||||
mat.m[3][0] = m_origin[0]; mat.m[3][1] = m_origin[1]; mat.m[3][2] = m_origin[2]; mat.m[3][3] = 1.0f;
|
||||
#endif
|
||||
CMatrix4f ret(m_basis[0], m_basis[1], m_basis[2], m_origin);
|
||||
ret[0][3] = 0.0f;
|
||||
ret[1][3] = 0.0f;
|
||||
ret[2][3] = 0.0f;
|
||||
ret[3][3] = 1.0f;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline CTransform fromColumns(const CVector3f& m0, const CVector3f& m1, const CVector3f& m2, const CVector3f& m3)
|
||||
|
|
|
@ -32,7 +32,7 @@ const bool AESNI = false;
|
|||
* Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
|
||||
*/
|
||||
void detectCPU();
|
||||
const CPUInfo cpuFeatures();
|
||||
const CPUInfo& cpuFeatures();
|
||||
class CVector3f;
|
||||
class CTransform;
|
||||
namespace Math
|
||||
|
|
|
@ -30,33 +30,6 @@ CMatrix3f::CMatrix3f(const CQuaternion& quat)
|
|||
m[2][3] = 0.0f;
|
||||
}
|
||||
|
||||
CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs)
|
||||
{
|
||||
#if __SSE__
|
||||
unsigned i;
|
||||
TVectorUnion resVec[3];
|
||||
for (i=0 ; i<3 ; ++i) {
|
||||
resVec[i].mVec128 =
|
||||
_mm_add_ps(_mm_add_ps(
|
||||
_mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
|
||||
_mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
|
||||
_mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
|
||||
resVec[i].v[3] = 0.0;
|
||||
}
|
||||
return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
|
||||
#else
|
||||
return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
|
||||
lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
|
||||
lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
|
||||
lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
|
||||
lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
|
||||
lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
|
||||
lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
|
||||
lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
|
||||
lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
|
||||
#endif
|
||||
}
|
||||
|
||||
void CMatrix3f::transpose()
|
||||
{
|
||||
#if __SSE__
|
||||
|
|
|
@ -64,7 +64,7 @@ void detectCPU()
|
|||
}
|
||||
|
||||
|
||||
const CPUInfo cpuFeatures() { return g_cpuFeatures; }
|
||||
const CPUInfo& cpuFeatures() { return g_cpuFeatures; }
|
||||
|
||||
namespace Math
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue