Remove SSELegacy.cpp and add validateCPU()

This commit is contained in:
Jack Andersen
2017-12-11 16:03:39 -10:00
parent 4134568660
commit b359ff96d0
7 changed files with 65 additions and 145 deletions

View File

@@ -33,11 +33,6 @@ CMatrix3f::CMatrix3f(const CQuaternion& quat)
void CMatrix3f::transpose()
{
#if __SSE__
if (!cpuFeatures().SSE41)
{
transposeSSE3();
return;
}
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
@@ -66,8 +61,6 @@ void CMatrix3f::transpose()
CMatrix3f CMatrix3f::transposed() const
{
#if __SSE__
if (!cpuFeatures().SSE41)
return transposedSSE3();
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);

View File

@@ -9,8 +9,6 @@ CMatrix4f CMatrix4f::transposed() const
{
CMatrix4f ret;
#if __SSE__
if (!cpuFeatures().SSE41)
return transposedSSE3();
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);

View File

@@ -48,10 +48,10 @@ CQuaternion CQuaternion::operator*(const CQuaternion& q) const
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const
{
return CQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
return CNUQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
}
CQuaternion CQuaternion::operator/(const CQuaternion& q) const

View File

@@ -12,7 +12,8 @@ namespace zeus
{
static bool isCPUInit = false;
static CPUInfo g_cpuFeatures;
static CPUInfo g_cpuFeatures = {};
static CPUInfo g_missingFeatures = {};
void getCpuInfo(int level, int regs[4])
{
@@ -68,6 +69,64 @@ void detectCPU()
const CPUInfo& cpuFeatures() { detectCPU(); return g_cpuFeatures; }
std::pair<bool, const CPUInfo&> validateCPU()
{
detectCPU();
bool ret = true;
#if __SSE4A__
if (!g_cpuFeatures.SSE4a)
{
*(bool*) &g_missingFeatures.SSE4a = true;
ret = false;
}
#endif
#if __SSE4_2__
if (!g_cpuFeatures.SSE42)
{
*(bool*) &g_missingFeatures.SSE42 = true;
ret = false;
}
#endif
#if __SSE4_1__
if (!g_cpuFeatures.SSE41)
{
*(bool*) &g_missingFeatures.SSE41 = true;
ret = false;
}
#endif
#if __SSSE3__
if (!g_cpuFeatures.SSSE3)
{
*(bool*) &g_missingFeatures.SSSE3 = true;
ret = false;
}
#endif
#if __SSE3__
if (!g_cpuFeatures.SSE3)
{
*(bool*) &g_missingFeatures.SSE3 = true;
ret = false;
}
#endif
#if __SSE2__
if (!g_cpuFeatures.SSE2)
{
*(bool*) &g_missingFeatures.SSE2 = true;
ret = false;
}
#endif
#if __SSE__
if (!g_cpuFeatures.SSE1)
{
*(bool*) &g_missingFeatures.SSE1 = true;
ret = false;
}
#endif
return {ret, g_missingFeatures};
}
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up)
{
CVector3f vLook, vRight, vUp;

View File

@@ -1,99 +0,0 @@
#include "zeus/CMatrix3f.hpp"
#include "zeus/CMatrix4f.hpp"
namespace zeus
{
void CMatrix3f::transposeSSE3()
{
#if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero);
vec[0].mVec128 = _mm_movelh_ps(T0, T2);
vec[1].mVec128 = _mm_movehl_ps(T2, T0);
vec[2].mVec128 = _mm_movelh_ps(T1, T3);
#else
float tmp;
tmp = m[0][1];
m[0][1] = m[1][0];
m[1][0] = tmp;
tmp = m[0][2];
m[0][2] = m[2][0];
m[2][0] = tmp;
tmp = m[1][2];
m[1][2] = m[2][1];
m[2][1] = tmp;
#endif
}
CMatrix3f CMatrix3f::transposedSSE3() const
{
#if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero);
return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3));
#else
CMatrix3f ret(*this);
float tmp;
tmp = ret.m[0][1];
ret.m[0][1] = ret.m[1][0];
ret.m[1][0] = tmp;
tmp = m[0][2];
ret.m[0][2] = ret.m[2][0];
ret.m[2][0] = tmp;
tmp = m[1][2];
ret.m[1][2] = ret.m[2][1];
ret.m[2][1] = tmp;
return ret;
#endif
}
CMatrix4f CMatrix4f::transposedSSE3() const
{
CMatrix4f ret;
#if __SSE__
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128);
ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2);
ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0);
ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3);
ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1);
#else
ret.m[0][0] = m[0][0];
ret.m[1][0] = m[0][1];
ret.m[2][0] = m[0][2];
ret.m[3][0] = m[0][3];
ret.m[0][1] = m[1][0];
ret.m[1][1] = m[1][1];
ret.m[2][1] = m[1][2];
ret.m[3][1] = m[1][3];
ret.m[0][2] = m[2][0];
ret.m[1][2] = m[2][1];
ret.m[2][2] = m[2][2];
ret.m[3][2] = m[2][3];
ret.m[0][3] = m[3][0];
ret.m[1][3] = m[3][1];
ret.m[2][3] = m[3][2];
ret.m[3][3] = m[3][3];
#endif
return ret;
}
}