alternate matrix transpose code-path for pre-SSE4.1

This commit is contained in:
Jack Andersen
2016-07-04 12:28:59 -10:00
parent 68b5c47e25
commit 3925f8509e
6 changed files with 173 additions and 42 deletions

View File

@@ -33,6 +33,11 @@ CMatrix3f::CMatrix3f(const CQuaternion& quat)
void CMatrix3f::transpose()
{
#if __SSE__
if (!cpuFeatures().SSE41)
{
transposeSSE3();
return;
}
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
@@ -61,6 +66,8 @@ void CMatrix3f::transpose()
CMatrix3f CMatrix3f::transposed() const
{
#if __SSE__
if (!cpuFeatures().SSE41)
return transposedSSE3();
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);