diff --git a/CMakeLists.txt b/CMakeLists.txt index b8fbd4d..bf639b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,40 +27,8 @@ set(SOURCES src/CAABox.cpp src/CEulerAngles.cpp) -# SSELegacy.cpp compiled separately to escape the effects of link-time optimization -if(NOT MSVC) -set_source_files_properties(${SOURCES} PROPERTIES COMPILE_FLAGS "-msse4.1 -msse4.2") -if(CUSTOM_FLAGS) - string(REPLACE "-flto=thin" "" CUSTOM_FLAGS ${CMAKE_CXX_FLAGS}) -endif(CUSTOM_FLAGS) -if (CMAKE_OSX_SYSROOT) - set(CUSTOM_FLAGS "${CUSTOM_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") -endif() -if (CMAKE_OSX_DEPLOYMENT_TARGET AND NOT CMAKE_OSX_DEPLOYMENT_TARGET STREQUAL "") - set(CUSTOM_FLAGS "${CUSTOM_FLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET} -O3 -msse3 -std=c++14") -else() - set(CUSTOM_FLAGS "${CUSTOM_FLAGS} -O3 -msse3 -std=c++14") -endif() - -set(SSE_LEGACY_OBJ ${CMAKE_CURRENT_BINARY_DIR}/SSELegacy.o) -separate_arguments(CUSTOM_FLAGS UNIX_COMMAND ${CUSTOM_FLAGS}) -add_custom_command( - OUTPUT ${SSE_LEGACY_OBJ} - COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${CUSTOM_FLAGS} -c ${CMAKE_CURRENT_SOURCE_DIR}/src/SSELegacy.cpp - -o ${SSE_LEGACY_OBJ} - -I ${CMAKE_CURRENT_SOURCE_DIR}/include - -I ${ATHENA_INCLUDE_DIR} - MAIN_DEPENDENCY src/SSELegacy.cpp) - -else() -set(SSE_LEGACY_OBJ src/SSELegacy.cpp) -endif() - add_library(zeus ${SOURCES} - ${SSE_LEGACY_OBJ} - include/zeus/Math.hpp include/zeus/CQuaternion.hpp include/zeus/CMatrix3f.hpp diff --git a/include/zeus/Math.hpp b/include/zeus/Math.hpp index b573de2..0a1b12d 100644 --- a/include/zeus/Math.hpp +++ b/include/zeus/Math.hpp @@ -48,6 +48,7 @@ struct CPUInfo */ void detectCPU(); const CPUInfo& cpuFeatures(); +std::pair validateCPU(); class CVector3f; class CVector2f; class CTransform; diff --git a/src/CMatrix3f.cpp b/src/CMatrix3f.cpp index 9c19867..db588e9 100644 --- a/src/CMatrix3f.cpp +++ b/src/CMatrix3f.cpp @@ -33,11 +33,6 @@ CMatrix3f::CMatrix3f(const CQuaternion& quat) void CMatrix3f::transpose() { #if __SSE__ - if (!cpuFeatures().SSE41) - { - transposeSSE3(); - return; - } __m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); __m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); @@ -66,8 +61,6 @@ void CMatrix3f::transpose() CMatrix3f CMatrix3f::transposed() const { #if __SSE__ - if (!cpuFeatures().SSE41) - return transposedSSE3(); __m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); __m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); diff --git a/src/CMatrix4f.cpp b/src/CMatrix4f.cpp index 8f5b5d7..f7b2655 100644 --- a/src/CMatrix4f.cpp +++ b/src/CMatrix4f.cpp @@ -9,8 +9,6 @@ CMatrix4f CMatrix4f::transposed() const { CMatrix4f ret; #if __SSE__ - if (!cpuFeatures().SSE41) - return transposedSSE3(); __m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128); __m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); diff --git a/src/CQuaternion.cpp b/src/CQuaternion.cpp index a5eaef3..b8bf16a 100644 --- a/src/CQuaternion.cpp +++ b/src/CQuaternion.cpp @@ -48,10 +48,10 @@ CQuaternion CQuaternion::operator*(const CQuaternion& q) const CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const { - return CQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}), - y * q.z - z * q.y + w * q.x + x * q.w, - z * q.x - x * q.z + w * q.y + y * q.w, - x * q.y - y * q.x + w * q.z + z * q.w); + return CNUQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}), + y * q.z - z * q.y + w * q.x + x * q.w, + z * q.x - x * q.z + w * q.y + y * q.w, + x * q.y - y * q.x + w * q.z + z * q.w); } CQuaternion CQuaternion::operator/(const CQuaternion& q) const diff --git a/src/Math.cpp b/src/Math.cpp index ff35fb3..c868176 100644 --- a/src/Math.cpp +++ b/src/Math.cpp @@ -12,7 +12,8 @@ namespace zeus { static bool isCPUInit = false; -static CPUInfo g_cpuFeatures; +static CPUInfo g_cpuFeatures = {}; +static CPUInfo g_missingFeatures = {}; void getCpuInfo(int level, int regs[4]) { @@ -68,6 +69,64 @@ void detectCPU() const CPUInfo& cpuFeatures() { detectCPU(); return g_cpuFeatures; } +std::pair validateCPU() +{ + detectCPU(); + bool ret = true; + +#if __SSE4A__ + if (!g_cpuFeatures.SSE4a) + { + *(bool*) &g_missingFeatures.SSE4a = true; + ret = false; + } +#endif +#if __SSE4_2__ + if (!g_cpuFeatures.SSE42) + { + *(bool*) &g_missingFeatures.SSE42 = true; + ret = false; + } +#endif +#if __SSE4_1__ + if (!g_cpuFeatures.SSE41) + { + *(bool*) &g_missingFeatures.SSE41 = true; + ret = false; + } +#endif +#if __SSSE3__ + if (!g_cpuFeatures.SSSE3) + { + *(bool*) &g_missingFeatures.SSSE3 = true; + ret = false; + } +#endif +#if __SSE3__ + if (!g_cpuFeatures.SSE3) + { + *(bool*) &g_missingFeatures.SSE3 = true; + ret = false; + } +#endif +#if __SSE2__ + if (!g_cpuFeatures.SSE2) + { + *(bool*) &g_missingFeatures.SSE2 = true; + ret = false; + } +#endif +#if __SSE__ + if (!g_cpuFeatures.SSE1) + { + *(bool*) &g_missingFeatures.SSE1 = true; + ret = false; + } +#endif + + return {ret, g_missingFeatures}; +} + CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up) { CVector3f vLook, vRight, vUp; diff --git a/src/SSELegacy.cpp b/src/SSELegacy.cpp deleted file mode 100644 index 1b2f413..0000000 --- a/src/SSELegacy.cpp +++ /dev/null @@ -1,99 +0,0 @@ -#include "zeus/CMatrix3f.hpp" -#include "zeus/CMatrix4f.hpp" - -namespace zeus -{ - -void CMatrix3f::transposeSSE3() -{ -#if __SSE__ - __m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); - __m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); - __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); - __m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); - __m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero); - vec[0].mVec128 = _mm_movelh_ps(T0, T2); - vec[1].mVec128 = _mm_movehl_ps(T2, T0); - vec[2].mVec128 = _mm_movelh_ps(T1, T3); -#else - float tmp; - - tmp = m[0][1]; - m[0][1] = m[1][0]; - m[1][0] = tmp; - - tmp = m[0][2]; - m[0][2] = m[2][0]; - m[2][0] = tmp; - - tmp = m[1][2]; - m[1][2] = m[2][1]; - m[2][1] = tmp; -#endif -} - -CMatrix3f CMatrix3f::transposedSSE3() const -{ -#if __SSE__ - __m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); - __m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); - __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); - __m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); - __m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero); - return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3)); -#else - CMatrix3f ret(*this); - float tmp; - - tmp = ret.m[0][1]; - ret.m[0][1] = ret.m[1][0]; - ret.m[1][0] = tmp; - - tmp = m[0][2]; - ret.m[0][2] = ret.m[2][0]; - ret.m[2][0] = tmp; - - tmp = m[1][2]; - ret.m[1][2] = ret.m[2][1]; - ret.m[2][1] = tmp; - - return ret; -#endif -} - -CMatrix4f CMatrix4f::transposedSSE3() const -{ - CMatrix4f ret; -#if __SSE__ - __m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); - __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128); - __m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); - __m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128); - ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2); - ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0); - ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3); - ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1); -#else - ret.m[0][0] = m[0][0]; - ret.m[1][0] = m[0][1]; - ret.m[2][0] = m[0][2]; - ret.m[3][0] = m[0][3]; - - ret.m[0][1] = m[1][0]; - ret.m[1][1] = m[1][1]; - ret.m[2][1] = m[1][2]; - ret.m[3][1] = m[1][3]; - - ret.m[0][2] = m[2][0]; - ret.m[1][2] = m[2][1]; - ret.m[2][2] = m[2][2]; - ret.m[3][2] = m[2][3]; - - ret.m[0][3] = m[3][0]; - ret.m[1][3] = m[3][1]; - ret.m[2][3] = m[3][2]; - ret.m[3][3] = m[3][3]; -#endif - return ret; -} -}