diff --git a/include/athena/simd/parallelism_v2_simd.hpp b/include/athena/simd/parallelism_v2_simd.hpp index 5004a05..526624f 100644 --- a/include/athena/simd/parallelism_v2_simd.hpp +++ b/include/athena/simd/parallelism_v2_simd.hpp @@ -1274,9 +1274,10 @@ public: __s_.__broadcast(v); } #endif - simd(_Tp __rv) { __s_.__broadcast(__rv); } - simd(_Tp a, _Tp b, _Tp c = {}, _Tp d = {}) { __s_.__set4(a, b, c, d); } + constexpr simd(_Tp __rv) : __s_(__rv) {} + + constexpr simd(_Tp a, _Tp b, _Tp c = {}, _Tp d = {}) : __s_(a, b, c, d) {} // generator constructor template & other) const noexcept { alignas(32) std::array sse_data; diff --git a/include/athena/simd/simd_sse.hpp b/include/athena/simd/simd_sse.hpp index 5aa7e44..c5bc25f 100644 --- a/include/athena/simd/simd_sse.hpp +++ b/include/athena/simd/simd_sse.hpp @@ -41,7 +41,9 @@ public: sse_data[__index] = __val; __storage_ = _mm_load_ps(sse_data.data()); } + constexpr __simd_storage(float a, float b, float c, float d) : __storage_{a, b, c, d} {} void __set4(float a, float b, float c, float d) noexcept { __storage_ = _mm_set_ps(d, c, b, a); } + constexpr __simd_storage(float rv) : __storage_{rv, rv, rv, rv} {} void __broadcast(float __val) noexcept { __storage_ = _mm_set1_ps(__val); } float __dot2(const __simd_storage& other) const noexcept { #if __SSE4_1__ @@ -219,10 +221,12 @@ public: sse_data[__index % 2] = __val; __storage_[__index / 2] = _mm_load_pd(sse_data.data()); } + constexpr __simd_storage(double a, double b, double c, double d) : __storage_{__m128d{a, b}, __m128d{c, d}} {} void __set4(double a, double b, double c, double d) noexcept { __storage_[0] = _mm_set_pd(b, a); __storage_[1] = _mm_set_pd(d, c); } + constexpr __simd_storage(double rv) : __storage_{__m128d{rv, rv}, __m128d{rv, rv}} {} void __broadcast(double __val) noexcept { for (int i = 0; i < 2; ++i) __storage_[i] = _mm_set1_pd(__val);