diff --git a/include/zeus/CVector2f.hpp b/include/zeus/CVector2f.hpp index e66944c..b06659b 100644 --- a/include/zeus/CVector2f.hpp +++ b/include/zeus/CVector2f.hpp @@ -188,7 +188,7 @@ public: return (diffVec.x() <= epsilon && diffVec.y() <= epsilon); } - [[nodiscard]] constexpr simd::reference operator[](size_t idx) { + [[nodiscard]] simd::reference operator[](size_t idx) { assert(idx < 2); return mSimd[idx]; } @@ -201,8 +201,8 @@ public: [[nodiscard]] constexpr float x() const { return mSimd[0]; } [[nodiscard]] constexpr float y() const { return mSimd[1]; } - [[nodiscard]] constexpr simd::reference x() { return mSimd[0]; } - [[nodiscard]] constexpr simd::reference y() { return mSimd[1]; } + [[nodiscard]] simd::reference x() { return mSimd[0]; } + [[nodiscard]] simd::reference y() { return mSimd[1]; } }; constexpr inline CVector2f skOne2f(1.f); constexpr inline CVector2f skNegOne2f(-1.f); diff --git a/include/zeus/simd/simd_sse.hpp b/include/zeus/simd/simd_sse.hpp index aa93d40..f5a2a21 100644 --- a/include/zeus/simd/simd_sse.hpp +++ b/include/zeus/simd/simd_sse.hpp @@ -30,8 +30,25 @@ class __simd_storage { public: using storage_type = __m128; storage_type __storage_{}; - [[nodiscard]] inline float __get(size_t __index) const noexcept { return __storage_[__index]; } - inline void __set(size_t __index, float __val) noexcept { __storage_[__index] = __val; } + [[nodiscard]] inline float __get(size_t __index) const noexcept { +#if _MSC_VER && !defined(__clang__) + alignas(16) std::array sse_data; + _mm_store_ps(sse_data.data(), __storage_); + return sse_data[__index]; +#else + return __storage_[__index]; +#endif + } + inline void __set(size_t __index, float __val) noexcept { +#if _MSC_VER && !defined(__clang__) + alignas(16) std::array sse_data; + _mm_store_ps(sse_data.data(), __storage_); + sse_data[__index] = __val; + __storage_ = _mm_load_ps(sse_data.data()); +#else + __storage_[__index] = __val; +#endif + } constexpr __simd_storage(float a, float b, float c, float d) : __storage_{a, b, c, d} {} constexpr void __set4(float a, float b, float c, float d) noexcept { __storage_ = storage_type{a, b, c, d}; } constexpr explicit __simd_storage(float rv) : __storage_{rv, rv, rv, rv} {} @@ -192,8 +209,25 @@ class __simd_storage { public: using storage_type = std::array<__m128d, 2>; storage_type __storage_{}; - [[nodiscard]] inline double __get(size_t __index) const noexcept { return __storage_[__index / 2][__index % 2]; } - inline void __set(size_t __index, double __val) noexcept { __storage_[__index / 2][__index % 2] = __val; } + [[nodiscard]] inline double __get(size_t __index) const noexcept { +#if _MSC_VER && !defined(__clang__) + alignas(16) std::array sse_data; + _mm_store_pd(sse_data.data(), __storage_[__index / 2]); + return sse_data[__index % 2]; +#else + return __storage_[__index / 2][__index % 2]; +#endif + } + inline void __set(size_t __index, double __val) noexcept { +#if _MSC_VER && !defined(__clang__) + alignas(16) std::array sse_data; + _mm_store_pd(sse_data.data(), __storage_[__index / 2]); + sse_data[__index % 2] = __val; + __storage_[__index / 2] = _mm_load_pd(sse_data.data()); +#else + __storage_[__index / 2][__index % 2] = __val; +#endif + } // Make GCC happy static constexpr storage_type __make_array(__m128d a, __m128d b) { return {a, b}; } constexpr __simd_storage(double a, double b, double c, double d) : __storage_(__make_array(__m128d{a, b}, __m128d{c, d})) {} diff --git a/src/Math.cpp b/src/Math.cpp index 81b17e0..925293d 100644 --- a/src/Math.cpp +++ b/src/Math.cpp @@ -21,7 +21,7 @@ static CPUInfo g_cpuFeatures = {}; static CPUInfo g_missingFeatures = {}; void getCpuInfo(int eax, int regs[4]) { -#if __x86_64__ +#if defined(__x86_64__) || defined(_M_X64) #if _WIN32 __cpuid(regs, eax); #else @@ -31,7 +31,7 @@ void getCpuInfo(int eax, int regs[4]) { } void getCpuInfoEx(int eax, int ecx, int regs[4]) { -#if __x86_64__ +#if defined(__x86_64__) || defined(_M_X64) #if _WIN32 __cpuidex(regs, eax, ecx); #else @@ -41,7 +41,7 @@ void getCpuInfoEx(int eax, int ecx, int regs[4]) { } void detectCPU() { -#if __x86_64__ +#if defined(__x86_64__) || defined(_M_X64) if (isCPUInit) return;