Remove SSE4 runtime checks

This commit is contained in:
Jack Andersen 2017-12-15 15:32:06 -10:00
parent b438e30060
commit 50f47d6455
6 changed files with 52 additions and 80 deletions

View File

@ -259,16 +259,13 @@ public:
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
}
#endif
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return r * r + g * g + b * b + a * a;
#endif
@ -295,15 +292,13 @@ public:
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
}
#endif
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (r * rhs.r) + (g * rhs.g) + (b * rhs.b);
#endif

View File

@ -175,16 +175,13 @@ public:
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
}
#endif
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif

View File

@ -280,14 +280,12 @@ public:
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0];
}
#endif
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1];
#endif
#else
return (x * rhs.x) + (y * rhs.y);
#endif
@ -296,15 +294,13 @@ public:
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0];
}
#endif
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1];
#endif
#else
return x * x + y * y;
#endif

View File

@ -74,16 +74,14 @@ public:
{
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
return result.v[0] + (v[2] * v[2]);
}
#endif
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
return result.v[0] + (v[2] * v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
@ -97,20 +95,16 @@ public:
double dot(const CVector3d& rhs) const
{
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
return result.v[0] + (v[2] * rhs.v[2]);
}
#endif
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
return result.v[0] + (v[2] * rhs.v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif

View File

@ -280,15 +280,13 @@ public:
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
}
#endif
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
@ -298,16 +296,13 @@ public:
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
}
#endif
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif

View File

@ -312,16 +312,13 @@ public:
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
}
#endif
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
@ -330,15 +327,13 @@ public:
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
}
#endif
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z + w * w;
#endif