mirror of https://github.com/AxioDL/zeus.git
Remove SSE4 runtime checks
This commit is contained in:
parent
b438e30060
commit
50f47d6455
|
@ -259,16 +259,13 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __SSE4_1__
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
|
||||
#endif
|
||||
#else
|
||||
return r * r + g * g + b * b + a * a;
|
||||
#endif
|
||||
|
@ -295,15 +292,13 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
#if __SSE4_1__
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
|
||||
return result.v[0];
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#endif
|
||||
#else
|
||||
return (r * rhs.r) + (g * rhs.g) + (b * rhs.b);
|
||||
#endif
|
||||
|
|
|
@ -175,16 +175,13 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
#if __SSE4_1__
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
|
||||
#endif
|
||||
#else
|
||||
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
|
||||
#endif
|
||||
|
|
|
@ -280,14 +280,12 @@ public:
|
|||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
|
||||
return result.v[0];
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
|
||||
return result.v[0] + result.v[1];
|
||||
#endif
|
||||
#else
|
||||
return (x * rhs.x) + (y * rhs.y);
|
||||
#endif
|
||||
|
@ -296,15 +294,13 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
#if __SSE4_1__
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
|
||||
return result.v[0];
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
|
||||
return result.v[0] + result.v[1];
|
||||
#endif
|
||||
#else
|
||||
return x * x + y * y;
|
||||
#endif
|
||||
|
|
|
@ -74,16 +74,14 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TDblVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
|
||||
return result.v[0] + (v[2] * v[2]);
|
||||
}
|
||||
#endif
|
||||
#if __SSE4_1__
|
||||
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
|
||||
return result.v[0] + (v[2] * v[2]);
|
||||
#else
|
||||
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
|
||||
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#endif
|
||||
#else
|
||||
return x * x + y * y + z * z;
|
||||
#endif
|
||||
|
@ -97,20 +95,16 @@ public:
|
|||
|
||||
double dot(const CVector3d& rhs) const
|
||||
{
|
||||
|
||||
#if __SSE__
|
||||
TDblVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
|
||||
return result.v[0] + (v[2] * rhs.v[2]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __SSE4_1__
|
||||
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
|
||||
return result.v[0] + (v[2] * rhs.v[2]);
|
||||
#else
|
||||
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
|
||||
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#endif
|
||||
#else
|
||||
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
|
||||
#endif
|
||||
|
|
|
@ -280,15 +280,13 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
#if __SSE4_1__
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
|
||||
return result.v[0];
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#endif
|
||||
#else
|
||||
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
|
||||
#endif
|
||||
|
@ -298,16 +296,13 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __SSE4_1__
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
|
||||
return result.v[0];
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#endif
|
||||
#else
|
||||
return x * x + y * y + z * z;
|
||||
#endif
|
||||
|
|
|
@ -312,16 +312,13 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __SSE4_1__
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
|
||||
#endif
|
||||
#else
|
||||
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
|
||||
#endif
|
||||
|
@ -330,15 +327,13 @@ public:
|
|||
{
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
}
|
||||
#endif
|
||||
#if __SSE4_1__
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
#else
|
||||
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#endif
|
||||
#else
|
||||
return x * x + y * y + z * z + w * w;
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue