diff --git a/include/zeus/CPlane.hpp b/include/zeus/CPlane.hpp index 75fda4d..4e3b082 100644 --- a/include/zeus/CPlane.hpp +++ b/include/zeus/CPlane.hpp @@ -53,6 +53,11 @@ public: return pos.dot(vec) - d; } + const CVector3f& normal() const { return vec; } + + inline float& operator[](size_t idx) { return p[idx]; } + inline const float& operator[](size_t idx) const { return p[idx]; } + union { struct { diff --git a/include/zeus/CVector3d.hpp b/include/zeus/CVector3d.hpp index 317517d..3c5468b 100644 --- a/include/zeus/CVector3d.hpp +++ b/include/zeus/CVector3d.hpp @@ -37,9 +37,15 @@ public: CVector3d(const CVector3f& vec) { +#if __SSE__ + mVec128[0] = _mm_cvtps_pd(vec.mVec128); + v[2] = vec[2]; + v[3] = 0.0; +#else v[0] = vec[0]; v[1] = vec[1]; v[2] = vec[2]; +#endif } CVector3d(double x, double y, double z) @@ -72,9 +78,8 @@ public: #if __SSE4_1__ || __SSE4_2__ if (cpuFeatures().SSE41 || cpuFeatures().SSE42) { - result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x71); - result.mVec128[1] = _mm_dp_pd(mVec128[1], mVec128[1], 0x71); - return result.v[0] + result.v[2]; + result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31); + return result.v[0] + (v[2] * v[2]); } #endif result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]); @@ -99,9 +104,8 @@ public: #if __SSE4_1__ || __SSE4_2__ if (cpuFeatures().SSE41 || cpuFeatures().SSE42) { - result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x71); - result.mVec128[1] = _mm_dp_pd(mVec128[1], rhs.mVec128[1], 0x71); - return result.v[0] + result.v[2]; + result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31); + return result.v[0] + (v[2] * rhs.v[2]); } #endif @@ -147,6 +151,48 @@ public: #endif } + inline CVector3d operator+(const CVector3d& rhs) const + { +#if __SSE__ + return CVector3d({_mm_add_pd(mVec128[0], rhs.mVec128[0]), + _mm_add_pd(mVec128[1], rhs.mVec128[1])}); +#elif __GEKKO_PS__ + return CVector3d(__mm_gekko_add_pd(mVec128, rhs.mVec128)); +#else + return CVector3d(x + rhs.x, y + rhs.y, z + rhs.z); +#endif + } + inline CVector3d operator-(const CVector3d& rhs) const + { +#if __SSE__ + return CVector3d({_mm_sub_pd(mVec128[0], rhs.mVec128[0]), + _mm_sub_pd(mVec128[1], rhs.mVec128[1])}); +#else + return CVector3d(x - rhs.x, y - rhs.y, z - rhs.z); +#endif + } + inline CVector3d operator*(const CVector3d& rhs) const + { +#if __SSE__ + return CVector3d({_mm_mul_pd(mVec128[0], rhs.mVec128[0]), + _mm_mul_pd(mVec128[1], rhs.mVec128[1])}); +#else + return CVector3d(x * rhs.x, y * rhs.y, z * rhs.z); +#endif + } + inline CVector3d operator/(const CVector3d& rhs) const + { +#if __SSE__ + return CVector3d({_mm_div_pd(mVec128[0], rhs.mVec128[0]), + _mm_div_pd(mVec128[1], rhs.mVec128[1])}); +#else + return CVector3d(x / rhs.x, y / rhs.y, z / rhs.z); +#endif + } + + inline double& operator[](size_t idx) { return v[idx]; } + inline const double& operator[](size_t idx) const { return v[idx]; } + union { struct { @@ -171,15 +217,15 @@ static inline CVector3d operator+(double lhs, const CVector3d& rhs) #endif } -static inline CVector3d operator+(const CVector3d& lhs, const CVector3d& rhs) +static inline CVector3d operator-(double lhs, const CVector3d& rhs) { #if __SSE__ - TDblVectorUnion res; - res.mVec128[0] = _mm_add_pd(lhs.mVec128[0], rhs.mVec128[0]); - res.mVec128[1] = _mm_add_pd(lhs.mVec128[1], rhs.mVec128[1]); - return {res.mVec128}; + TDblVectorUnion splat{lhs, lhs, lhs, 0}; + splat.mVec128[0] = _mm_sub_pd(splat.mVec128[0], rhs.mVec128[0]); + splat.mVec128[1] = _mm_sub_pd(splat.mVec128[1], rhs.mVec128[1]); + return {splat.mVec128}; #else - return {lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z}; + return {lhs - rhs.x, lhs - rhs.y, lhs - rhs.z}; #endif } @@ -195,30 +241,6 @@ static inline CVector3d operator*(double lhs, const CVector3d& rhs) #endif } -static inline CVector3d operator*(const CVector3d& lhs, const CVector3d& rhs) -{ -#if __SSE__ - TDblVectorUnion splat; - splat.mVec128[0] = _mm_mul_pd(lhs.mVec128[0], rhs.mVec128[0]); - splat.mVec128[1] = _mm_mul_pd(lhs.mVec128[1], rhs.mVec128[1]); - return {splat.mVec128}; -#else - return {lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z}; -#endif -} - -static inline CVector3d operator/(const CVector3d& lhs, const CVector3d& rhs) -{ -#if __SSE__ - TDblVectorUnion splat; - splat.mVec128[0] = _mm_div_pd(lhs.mVec128[0], rhs.mVec128[0]); - splat.mVec128[1] = _mm_div_pd(lhs.mVec128[1], rhs.mVec128[1]); - return {splat.mVec128}; -#else - return {lhs.x / rhs.x, lhs.y / rhs.y, lhs.z / rhs.z}; -#endif -} - static inline CVector3d operator/(double lhs, const CVector3d& rhs) { #if __SSE__ diff --git a/include/zeus/CVector3f.hpp b/include/zeus/CVector3f.hpp index bc50ae4..eb8b3c6 100644 --- a/include/zeus/CVector3f.hpp +++ b/include/zeus/CVector3f.hpp @@ -11,6 +11,7 @@ namespace zeus { +class CVector3d; class alignas(16) CVector3f { #if __atdna__ @@ -70,6 +71,8 @@ public: return ret; } + CVector3f(const CVector3d& vec); + void readBig(athena::io::IStreamReader& input) { x = input.readFloatBig(); @@ -280,7 +283,7 @@ public: #if __SSE4_1__ || __SSE4_2__ if (cpuFeatures().SSE41 || cpuFeatures().SSE42) { - result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1); + result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71); return result.v[0]; } #endif diff --git a/src/CVector3f.cpp b/src/CVector3f.cpp index 553d9ca..4a9146f 100644 --- a/src/CVector3f.cpp +++ b/src/CVector3f.cpp @@ -1,4 +1,5 @@ #include "zeus/CVector3f.hpp" +#include "zeus/CVector3d.hpp" #include #include #include @@ -18,6 +19,17 @@ const CVector3f CVector3f::skDown(0.f, 0.f, -1.f); const CVector3f CVector3f::skRadToDegVec(180.0f / M_PIF); const CVector3f CVector3f::skDegToRadVec(M_PIF / 180.0f); +CVector3f::CVector3f(const CVector3d& vec) +{ +#if __SSE__ + mVec128 = _mm_cvtpd_ps(vec.mVec128[0]); + v[2] = vec.v[2]; +#else + v[0] = vec.v[0]; + v[1] = vec.v[1]; + v[2] = vec.v[2]; +#endif +} float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b) {