Fixes for MSVC arch detection & SIMD compilation

2025-07-01 02:43:39 +00:00 · 2021-02-15 21:03:44 -05:00 · 2021-02-15 21:03:44 -05:00 · b3806c03a5
commit b3806c03a5
parent 3c4bcf37d2
3 changed files with 44 additions and 10 deletions
--- a/include/zeus/CVector2f.hpp
+++ b/include/zeus/CVector2f.hpp
@ -188,7 +188,7 @@ public:
    return (diffVec.x() <= epsilon && diffVec.y() <= epsilon);
  }

-  [[nodiscard]] constexpr simd<float>::reference operator[](size_t idx) {
+  [[nodiscard]] simd<float>::reference operator[](size_t idx) {
    assert(idx < 2);
    return mSimd[idx];
  }
@ -201,8 +201,8 @@ public:
  [[nodiscard]] constexpr float x() const { return mSimd[0]; }
  [[nodiscard]] constexpr float y() const { return mSimd[1]; }

-  [[nodiscard]] constexpr simd<float>::reference x() { return mSimd[0]; }
-  [[nodiscard]] constexpr simd<float>::reference y() { return mSimd[1]; }
+  [[nodiscard]] simd<float>::reference x() { return mSimd[0]; }
+  [[nodiscard]] simd<float>::reference y() { return mSimd[1]; }
 };
 constexpr inline CVector2f skOne2f(1.f);
 constexpr inline CVector2f skNegOne2f(-1.f);
--- a/include/zeus/simd/simd_sse.hpp
+++ b/include/zeus/simd/simd_sse.hpp
@ -30,8 +30,25 @@ class __simd_storage<float, m128_abi> {
 public:
  using storage_type = __m128;
  storage_type __storage_{};
-  [[nodiscard]] inline float __get(size_t __index) const noexcept { return __storage_[__index]; }
-  inline void __set(size_t __index, float __val) noexcept { __storage_[__index] = __val; }
+  [[nodiscard]] inline float __get(size_t __index) const noexcept {
+#if _MSC_VER && !defined(__clang__)
+    alignas(16) std::array<float, 4> sse_data;
+    _mm_store_ps(sse_data.data(), __storage_);
+    return sse_data[__index];
+#else
+    return __storage_[__index];
+#endif
+  }
+  inline void __set(size_t __index, float __val) noexcept {
+#if _MSC_VER && !defined(__clang__)
+    alignas(16) std::array<float, 4> sse_data;
+    _mm_store_ps(sse_data.data(), __storage_);
+    sse_data[__index] = __val;
+    __storage_ = _mm_load_ps(sse_data.data());
+#else
+    __storage_[__index] = __val;
+#endif
+  }
  constexpr __simd_storage(float a, float b, float c, float d) : __storage_{a, b, c, d} {}
  constexpr void __set4(float a, float b, float c, float d) noexcept { __storage_ = storage_type{a, b, c, d}; }
  constexpr explicit __simd_storage(float rv) : __storage_{rv, rv, rv, rv} {}
@ -192,8 +209,25 @@ class __simd_storage<double, m128d_abi> {
 public:
  using storage_type = std::array<__m128d, 2>;
  storage_type __storage_{};
-  [[nodiscard]] inline double __get(size_t __index) const noexcept { return __storage_[__index / 2][__index % 2]; }
-  inline void __set(size_t __index, double __val) noexcept { __storage_[__index / 2][__index % 2] = __val; }
+  [[nodiscard]] inline double __get(size_t __index) const noexcept {
+#if _MSC_VER && !defined(__clang__)
+    alignas(16) std::array<double, 2> sse_data;
+    _mm_store_pd(sse_data.data(), __storage_[__index / 2]);
+    return sse_data[__index % 2];
+#else
+    return __storage_[__index / 2][__index % 2];
+#endif
+  }
+  inline void __set(size_t __index, double __val) noexcept {
+#if _MSC_VER && !defined(__clang__)
+    alignas(16) std::array<double, 2> sse_data;
+    _mm_store_pd(sse_data.data(), __storage_[__index / 2]);
+    sse_data[__index % 2] = __val;
+    __storage_[__index / 2] = _mm_load_pd(sse_data.data());
+#else
+    __storage_[__index / 2][__index % 2] = __val;
+#endif
+  }
  // Make GCC happy
  static constexpr storage_type __make_array(__m128d a, __m128d b) { return {a, b}; }
  constexpr __simd_storage(double a, double b, double c, double d) : __storage_(__make_array(__m128d{a, b}, __m128d{c, d})) {}
--- a/src/Math.cpp
+++ b/src/Math.cpp
@ -21,7 +21,7 @@ static CPUInfo g_cpuFeatures = {};
 static CPUInfo g_missingFeatures = {};

 void getCpuInfo(int eax, int regs[4]) {
-#if __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
  #if _WIN32
  __cpuid(regs, eax);
 #else
@ -31,7 +31,7 @@ void getCpuInfo(int eax, int regs[4]) {
 }

 void getCpuInfoEx(int eax, int ecx, int regs[4]) {
-#if __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
  #if _WIN32
  __cpuidex(regs, eax, ecx);
 #else
@ -41,7 +41,7 @@ void getCpuInfoEx(int eax, int ecx, int regs[4]) {
 }

 void detectCPU() {
-#if __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
  if (isCPUInit)
    return;