Remove SSE4 runtime checks

This commit is contained in:
Jack Andersen 2017-12-15 15:32:06 -10:00
parent b438e30060
commit 50f47d6455
6 changed files with 52 additions and 80 deletions

View File

@ -259,16 +259,13 @@ public:
{ {
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1); result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, mVec128); result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3]; return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else #else
return r * r + g * g + b * b + a * a; return r * r + g * g + b * b + a * a;
#endif #endif
@ -295,15 +292,13 @@ public:
{ {
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71); result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2]; return result.v[0] + result.v[1] + result.v[2];
#endif
#else #else
return (r * rhs.r) + (g * rhs.g) + (b * rhs.b); return (r * rhs.r) + (g * rhs.g) + (b * rhs.b);
#endif #endif

View File

@ -175,16 +175,13 @@ public:
{ {
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1); result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3]; return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else #else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w); return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif #endif

View File

@ -280,14 +280,12 @@ public:
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31); result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1]; return result.v[0] + result.v[1];
#endif
#else #else
return (x * rhs.x) + (y * rhs.y); return (x * rhs.x) + (y * rhs.y);
#endif #endif
@ -296,15 +294,13 @@ public:
{ {
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31); result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, mVec128); result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1]; return result.v[0] + result.v[1];
#endif
#else #else
return x * x + y * y; return x * x + y * y;
#endif #endif

View File

@ -74,16 +74,14 @@ public:
{ {
#if __SSE__ #if __SSE__
TDblVectorUnion result; TDblVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31); result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
return result.v[0] + (v[2] * v[2]); return result.v[0] + (v[2] * v[2]);
} #else
#endif
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]); result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]); result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
return result.v[0] + result.v[1] + result.v[2]; return result.v[0] + result.v[1] + result.v[2];
#endif
#else #else
return x * x + y * y + z * z; return x * x + y * y + z * z;
#endif #endif
@ -97,20 +95,16 @@ public:
double dot(const CVector3d& rhs) const double dot(const CVector3d& rhs) const
{ {
#if __SSE__ #if __SSE__
TDblVectorUnion result; TDblVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31); result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
return result.v[0] + (v[2] * rhs.v[2]); return result.v[0] + (v[2] * rhs.v[2]);
} #else
#endif
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]); result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]); result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
return result.v[0] + result.v[1] + result.v[2]; return result.v[0] + result.v[1] + result.v[2];
#endif
#else #else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z); return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif #endif

View File

@ -280,15 +280,13 @@ public:
{ {
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71); result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2]; return result.v[0] + result.v[1] + result.v[2];
#endif
#else #else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z); return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif #endif
@ -298,16 +296,13 @@ public:
{ {
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71); result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, mVec128); result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2]; return result.v[0] + result.v[1] + result.v[2];
#endif
#else #else
return x * x + y * y + z * z; return x * x + y * y + z * z;
#endif #endif

View File

@ -312,16 +312,13 @@ public:
{ {
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1); result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3]; return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else #else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w); return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif #endif
@ -330,15 +327,13 @@ public:
{ {
#if __SSE__ #if __SSE__
TVectorUnion result; TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__ #if __SSE4_1__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1); result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0]; return result.v[0];
} #else
#endif
result.mVec128 = _mm_mul_ps(mVec128, mVec128); result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2]; return result.v[0] + result.v[1] + result.v[2];
#endif
#else #else
return x * x + y * y + z * z + w * w; return x * x + y * y + z * z + w * w;
#endif #endif