From 50f47d645582a5106b3be46825f654d67db17e14 Mon Sep 17 00:00:00 2001 From: Jack Andersen Date: Fri, 15 Dec 2017 15:32:06 -1000 Subject: [PATCH] Remove SSE4 runtime checks --- include/zeus/CColor.hpp | 25 ++++++++++--------------- include/zeus/CQuaternion.hpp | 9 +++------ include/zeus/CVector2f.hpp | 22 +++++++++------------- include/zeus/CVector3d.hpp | 26 ++++++++++---------------- include/zeus/CVector3f.hpp | 25 ++++++++++--------------- include/zeus/CVector4f.hpp | 25 ++++++++++--------------- 6 files changed, 52 insertions(+), 80 deletions(-) diff --git a/include/zeus/CColor.hpp b/include/zeus/CColor.hpp index 3eba2c1..41aa432 100644 --- a/include/zeus/CColor.hpp +++ b/include/zeus/CColor.hpp @@ -259,16 +259,13 @@ public: { #if __SSE__ TVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1); - return result.v[0]; - } -#endif - +#if __SSE4_1__ + result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1); + return result.v[0]; +#else result.mVec128 = _mm_mul_ps(mVec128, mVec128); return result.v[0] + result.v[1] + result.v[2] + result.v[3]; +#endif #else return r * r + g * g + b * b + a * a; #endif @@ -295,15 +292,13 @@ public: { #if __SSE__ TVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71); - return result.v[0]; - } -#endif +#if __SSE4_1__ + result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71); + return result.v[0]; +#else result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); return result.v[0] + result.v[1] + result.v[2]; +#endif #else return (r * rhs.r) + (g * rhs.g) + (b * rhs.b); #endif diff --git a/include/zeus/CQuaternion.hpp b/include/zeus/CQuaternion.hpp index be7c3c5..24b29cd 100644 --- a/include/zeus/CQuaternion.hpp +++ b/include/zeus/CQuaternion.hpp @@ -175,16 +175,13 @@ public: { #if __SSE__ TVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { +#if __SSE4_1__ result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1); return result.v[0]; - } -#endif - +#else result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); return result.v[0] + result.v[1] + result.v[2] + result.v[3]; +#endif #else return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w); #endif diff --git a/include/zeus/CVector2f.hpp b/include/zeus/CVector2f.hpp index e80350f..34094f8 100644 --- a/include/zeus/CVector2f.hpp +++ b/include/zeus/CVector2f.hpp @@ -280,14 +280,12 @@ public: #if __SSE__ TVectorUnion result; #if __SSE4_1__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31); - return result.v[0]; - } -#endif + result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31); + return result.v[0]; +#else result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); return result.v[0] + result.v[1]; +#endif #else return (x * rhs.x) + (y * rhs.y); #endif @@ -296,15 +294,13 @@ public: { #if __SSE__ TVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31); - return result.v[0]; - } -#endif +#if __SSE4_1__ + result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31); + return result.v[0]; +#else result.mVec128 = _mm_mul_ps(mVec128, mVec128); return result.v[0] + result.v[1]; +#endif #else return x * x + y * y; #endif diff --git a/include/zeus/CVector3d.hpp b/include/zeus/CVector3d.hpp index 02135fa..e3bb919 100644 --- a/include/zeus/CVector3d.hpp +++ b/include/zeus/CVector3d.hpp @@ -74,16 +74,14 @@ public: { #if __SSE__ TDblVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31); - return result.v[0] + (v[2] * v[2]); - } -#endif +#if __SSE4_1__ + result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31); + return result.v[0] + (v[2] * v[2]); +#else result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]); result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]); return result.v[0] + result.v[1] + result.v[2]; +#endif #else return x * x + y * y + z * z; #endif @@ -97,20 +95,16 @@ public: double dot(const CVector3d& rhs) const { - #if __SSE__ TDblVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31); - return result.v[0] + (v[2] * rhs.v[2]); - } -#endif - +#if __SSE4_1__ + result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31); + return result.v[0] + (v[2] * rhs.v[2]); +#else result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]); result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]); return result.v[0] + result.v[1] + result.v[2]; +#endif #else return (x * rhs.x) + (y * rhs.y) + (z * rhs.z); #endif diff --git a/include/zeus/CVector3f.hpp b/include/zeus/CVector3f.hpp index a0515ac..bae7efc 100644 --- a/include/zeus/CVector3f.hpp +++ b/include/zeus/CVector3f.hpp @@ -280,15 +280,13 @@ public: { #if __SSE__ TVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71); - return result.v[0]; - } -#endif +#if __SSE4_1__ + result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71); + return result.v[0]; +#else result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); return result.v[0] + result.v[1] + result.v[2]; +#endif #else return (x * rhs.x) + (y * rhs.y) + (z * rhs.z); #endif @@ -298,16 +296,13 @@ public: { #if __SSE__ TVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71); - return result.v[0]; - } -#endif - +#if __SSE4_1__ + result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71); + return result.v[0]; +#else result.mVec128 = _mm_mul_ps(mVec128, mVec128); return result.v[0] + result.v[1] + result.v[2]; +#endif #else return x * x + y * y + z * z; #endif diff --git a/include/zeus/CVector4f.hpp b/include/zeus/CVector4f.hpp index 935ded0..44366d5 100644 --- a/include/zeus/CVector4f.hpp +++ b/include/zeus/CVector4f.hpp @@ -312,16 +312,13 @@ public: { #if __SSE__ TVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1); - return result.v[0]; - } -#endif - +#if __SSE4_1__ + result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1); + return result.v[0]; +#else result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128); return result.v[0] + result.v[1] + result.v[2] + result.v[3]; +#endif #else return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w); #endif @@ -330,15 +327,13 @@ public: { #if __SSE__ TVectorUnion result; -#if __SSE4_1__ || __SSE4_2__ - if (cpuFeatures().SSE41 || cpuFeatures().SSE42) - { - result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1); - return result.v[0]; - } -#endif +#if __SSE4_1__ + result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1); + return result.v[0]; +#else result.mVec128 = _mm_mul_ps(mVec128, mVec128); return result.v[0] + result.v[1] + result.v[2]; +#endif #else return x * x + y * y + z * z + w * w; #endif