diff --git a/src/SSELegacy.cpp b/src/SSELegacy.cpp index 5be08e1..b0aa2e6 100644 --- a/src/SSELegacy.cpp +++ b/src/SSELegacy.cpp @@ -63,31 +63,38 @@ CMatrix3f CMatrix3f::transposedSSE3() const CMatrix4f CMatrix4f::transposedSSE3() const { + CMatrix3f ret; #if __SSE__ - __m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); __m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); - __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); + __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128); __m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); - __m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero); - return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3)); + __m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128); + ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2); + ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0); + ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3); + ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1); #else - CMatrix3f ret(*this); - float tmp; + ret.m[0][0] = m[0][0]; + ret.m[1][0] = m[0][1]; + ret.m[2][0] = m[0][2]; + ret.m[3][0] = m[0][3]; - tmp = ret.m[0][1]; - ret.m[0][1] = ret.m[1][0]; - ret.m[1][0] = tmp; + ret.m[0][1] = m[1][0]; + ret.m[1][1] = m[1][1]; + ret.m[2][1] = m[1][2]; + ret.m[3][1] = m[1][3]; - tmp = m[0][2]; - ret.m[0][2] = ret.m[2][0]; - ret.m[2][0] = tmp; + ret.m[0][2] = m[2][0]; + ret.m[1][2] = m[2][1]; + ret.m[2][2] = m[2][2]; + ret.m[3][2] = m[2][3]; - tmp = m[1][2]; - ret.m[1][2] = ret.m[2][1]; - ret.m[2][1] = tmp; - - return ret; + ret.m[0][3] = m[3][0]; + ret.m[1][3] = m[3][1]; + ret.m[2][3] = m[3][2]; + ret.m[3][3] = m[3][3]; #endif + return ret; } }