diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b02181..d78e608 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,9 @@ set(SOURCES # SSELegacy.cpp compiled separately to escape the effects of link-time optimization if(NOT MSVC) set_source_files_properties(${SOURCES} PROPERTIES COMPILE_FLAGS "-msse4.1 -msse4.2 -std=c++14") +if(CUSTOM_FLAGS) string(REPLACE "-flto=thin" "" CUSTOM_FLAGS ${CMAKE_CXX_FLAGS}) +endif(CUSTOM_FLAGS) if (CMAKE_OSX_DEPLOYMENT_TARGET AND NOT CMAKE_OSX_DEPLOYMENT_TARGET STREQUAL "") set(CUSTOM_FLAGS "${CUSTOM_FLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET} -O3 -msse3 -std=c++14") else() diff --git a/include/zeus/CQuaternion.hpp b/include/zeus/CQuaternion.hpp index 10d549b..09e0423 100644 --- a/include/zeus/CQuaternion.hpp +++ b/include/zeus/CQuaternion.hpp @@ -58,6 +58,35 @@ public: #endif } + operator atVec4f() + { + atVec4f ret; +#if __SSE__ + ret.mVec128 = mVec128; +#else + ret.vec[0] = w; + ret.vec[1] = x; + ret.vec[2] = y; + ret.vec[3] = z; +#endif + return ret; + } + operator atVec4f() const + { + atVec4f ret; +#if __SSE__ + ret.mVec128 = mVec128; +#else + ret.vec[0] = w; + ret.vec[1] = x; + ret.vec[2] = y; + ret.vec[3] = z; +#endif + return ret; + } + +#endif + CQuaternion(const CMatrix3f& mat) { float trace = mat[0][0] + mat[1][1] + mat[2][2]; @@ -97,35 +126,7 @@ public: } } } - - operator atVec4f() - { - atVec4f ret; -#if __SSE__ - ret.mVec128 = mVec128; -#else - ret.vec[0] = w; - ret.vec[1] = x; - ret.vec[2] = y; - ret.vec[3] = z; -#endif - return ret; - } - operator atVec4f() const - { - atVec4f ret; -#if __SSE__ - ret.mVec128 = mVec128; -#else - ret.vec[0] = w; - ret.vec[1] = x; - ret.vec[2] = y; - ret.vec[3] = z; -#endif - return ret; - } - -#endif + CQuaternion(const CVector3f& vec) { fromVector3f(vec); } CQuaternion(const CVector4f& vec) { diff --git a/include/zeus/CTransform.hpp b/include/zeus/CTransform.hpp index c8ed256..3edcecf 100644 --- a/include/zeus/CTransform.hpp +++ b/include/zeus/CTransform.hpp @@ -5,6 +5,8 @@ #include "zeus/CMatrix3f.hpp" #include "zeus/CMatrix4f.hpp" #include "zeus/CVector3f.hpp" +#include +#include namespace zeus { diff --git a/include/zeus/CVector3d.hpp b/include/zeus/CVector3d.hpp index 1c24e58..b35e4f9 100644 --- a/include/zeus/CVector3d.hpp +++ b/include/zeus/CVector3d.hpp @@ -145,8 +145,9 @@ public: inline CVector3d operator+(const CVector3d& rhs) const { #if __SSE__ - return CVector3d({_mm_add_pd(mVec128[0], rhs.mVec128[0]), - _mm_add_pd(mVec128[1], rhs.mVec128[1])}); + const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]), + _mm_add_pd(mVec128[1], rhs.mVec128[1])}; + return CVector3d(tmpVec128); #elif __GEKKO_PS__ return CVector3d(__mm_gekko_add_pd(mVec128, rhs.mVec128)); #else @@ -156,8 +157,9 @@ public: inline CVector3d operator-(const CVector3d& rhs) const { #if __SSE__ - return CVector3d({_mm_sub_pd(mVec128[0], rhs.mVec128[0]), - _mm_sub_pd(mVec128[1], rhs.mVec128[1])}); + const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]), + _mm_add_pd(mVec128[1], rhs.mVec128[1])}; + return CVector3d(tmpVec128); #else return CVector3d(x - rhs.x, y - rhs.y, z - rhs.z); #endif @@ -165,8 +167,9 @@ public: inline CVector3d operator*(const CVector3d& rhs) const { #if __SSE__ - return CVector3d({_mm_mul_pd(mVec128[0], rhs.mVec128[0]), - _mm_mul_pd(mVec128[1], rhs.mVec128[1])}); + const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]), + _mm_add_pd(mVec128[1], rhs.mVec128[1])}; + return CVector3d(tmpVec128); #else return CVector3d(x * rhs.x, y * rhs.y, z * rhs.z); #endif @@ -174,8 +177,9 @@ public: inline CVector3d operator/(const CVector3d& rhs) const { #if __SSE__ - return CVector3d({_mm_div_pd(mVec128[0], rhs.mVec128[0]), - _mm_div_pd(mVec128[1], rhs.mVec128[1])}); + const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]), + _mm_add_pd(mVec128[1], rhs.mVec128[1])}; + return CVector3d(tmpVec128); #else return CVector3d(x / rhs.x, y / rhs.y, z / rhs.z); #endif diff --git a/include/zeus/CVector3f.hpp b/include/zeus/CVector3f.hpp index 3f217eb..6087b3a 100644 --- a/include/zeus/CVector3f.hpp +++ b/include/zeus/CVector3f.hpp @@ -71,8 +71,6 @@ public: return ret; } - CVector3f(const CVector3d& vec); - void readBig(athena::io::IStreamReader& input) { x = input.readFloatBig(); @@ -89,6 +87,8 @@ public: } #endif + CVector3f(const CVector3d& vec); + CVector3f(float xyz) { splat(xyz); } void assign(float x, float y, float z) {