From e9ec10a3823a3c734e70c99b4aa50ae2a7e7e81f Mon Sep 17 00:00:00 2001 From: Luke Street Date: Wed, 3 Aug 2022 18:15:45 -0400 Subject: [PATCH] Add simd_none implementation --- include/zeus/simd/parallelism_v2_simd.hpp | 3 + include/zeus/simd/simd.hpp | 5 +- include/zeus/simd/simd_none.hpp | 224 ++++++++++++++++++++++ src/CMatrix3f.cpp | 12 +- 4 files changed, 236 insertions(+), 8 deletions(-) create mode 100644 include/zeus/simd/simd_none.hpp diff --git a/include/zeus/simd/parallelism_v2_simd.hpp b/include/zeus/simd/parallelism_v2_simd.hpp index d19d438..e946a29 100644 --- a/include/zeus/simd/parallelism_v2_simd.hpp +++ b/include/zeus/simd/parallelism_v2_simd.hpp @@ -1477,6 +1477,9 @@ private: friend class simd_mask; public: + constexpr __simd_storage(_Tp __rv) : __storage_{__rv, __rv, __rv, __rv} {} + constexpr __simd_storage(_Tp a, _Tp b, _Tp c, _Tp d) : __storage_{a, b, c, d} {} + constexpr _Tp __get(size_t __index) const noexcept { return __storage_[__index]; }; constexpr void __set(size_t __index, _Tp __val) noexcept { __storage_[__index] = __val; } constexpr std::enable_if_t<__num_element >= 4> __set4(float a, float b, float c, float d) noexcept { diff --git a/include/zeus/simd/simd.hpp b/include/zeus/simd/simd.hpp index a503b39..8570cc0 100644 --- a/include/zeus/simd/simd.hpp +++ b/include/zeus/simd/simd.hpp @@ -18,7 +18,7 @@ using namespace std; #elif __ARM_NEON #include "simd_neon.hpp" #else -namespace simd_abi { +namespace zeus::_simd::simd_abi { template struct zeus_native {}; template <> @@ -29,7 +29,8 @@ template <> struct zeus_native { using type = fixed_size<4>; }; -} // namespace simd_abi +} // namespace zeus::_simd::simd_abi +#include "simd_none.hpp" #endif #ifdef __GNUC__ #pragma GCC diagnostic pop diff --git a/include/zeus/simd/simd_none.hpp b/include/zeus/simd/simd_none.hpp new file mode 100644 index 0000000..7b48a27 --- /dev/null +++ b/include/zeus/simd/simd_none.hpp @@ -0,0 +1,224 @@ +#pragma once +#ifndef _ZEUS_SIMD_INCLUDED +#error simd_none.hpp must not be included directly. Include simd.hpp instead. +#endif +namespace zeus::_simd { +using m128_abi = __simd_abi<_StorageKind::_Array, 4>; +using m128d_abi = __simd_abi<_StorageKind::_Array, 4>; + +// m128 ABI +template <> +inline simd simd::operator-() const { + return {-__s_.__storage_[0], -__s_.__storage_[1], -__s_.__storage_[2], -__s_.__storage_[3]}; +} + +inline simd operator+(const simd& a, const simd& b) { + return {a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]}; +} + +inline simd operator-(const simd& a, const simd& b) { + return {a[0] - b[0], a[1] - b[1], a[2] - b[2], a[3] - b[3]}; +} + +inline simd operator*(const simd& a, const simd& b) { + return {a[0] * b[0], a[1] * b[1], a[2] * b[2], a[3] * b[3]}; +} + +inline simd operator/(const simd& a, const simd& b) { + return {a[0] / b[0], a[1] / b[1], a[2] / b[2], a[3] / b[3]}; +} + +inline simd& operator+=(simd& a, const simd& b) { + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + return a; +} + +inline simd& operator-=(simd& a, const simd& b) { + a[0] -= b[0]; + a[1] -= b[1]; + a[2] -= b[2]; + a[3] -= b[3]; + return a; +} + +inline simd& operator*=(simd& a, const simd& b) { + a[0] *= b[0]; + a[1] *= b[1]; + a[2] *= b[2]; + a[3] *= b[3]; + return a; +} + +inline simd& operator/=(simd& a, const simd& b) { + a[0] /= b[0]; + a[1] /= b[1]; + a[2] /= b[2]; + a[3] /= b[3]; + return a; +} + +inline simd::mask_type operator==(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] == b[0]; + ret[1] = a[1] == b[1]; + ret[2] = a[2] == b[2]; + ret[3] = a[3] == b[3]; + return ret; +} + +inline simd::mask_type operator!=(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] != b[0]; + ret[1] = a[1] != b[1]; + ret[2] = a[2] != b[2]; + ret[3] = a[3] != b[3]; + return ret; +} + +inline simd::mask_type operator>=(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] >= b[0]; + ret[1] = a[1] >= b[1]; + ret[2] = a[2] >= b[2]; + ret[3] = a[3] >= b[3]; + return ret; +} + +inline simd::mask_type operator<=(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] <= b[0]; + ret[1] = a[1] <= b[1]; + ret[2] = a[2] <= b[2]; + ret[3] = a[3] <= b[3]; + return ret; +} + +inline simd::mask_type operator>(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] > b[0]; + ret[1] = a[1] > b[1]; + ret[2] = a[2] > b[2]; + ret[3] = a[3] > b[3]; + return ret; +} + +inline simd::mask_type operator<(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] < b[0]; + ret[1] = a[1] < b[1]; + ret[2] = a[2] < b[2]; + ret[3] = a[3] < b[3]; + return ret; +} + +// m128d ABI +template <> +inline simd simd::operator-() const { + return {-__s_.__storage_[0], -__s_.__storage_[1], -__s_.__storage_[2], -__s_.__storage_[3]}; +} + +inline simd operator+(const simd& a, const simd& b) { + return {a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]}; +} + +inline simd operator-(const simd& a, const simd& b) { + return {a[0] - b[0], a[1] - b[1], a[2] - b[2], a[3] - b[3]}; +} + +inline simd operator*(const simd& a, const simd& b) { + return {a[0] * b[0], a[1] * b[1], a[2] * b[2], a[3] * b[3]}; +} + +inline simd operator/(const simd& a, const simd& b) { + return {a[0] / b[0], a[1] / b[1], a[2] / b[2], a[3] / b[3]}; +} + +inline simd& operator+=(simd& a, const simd& b) { + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + return a; +} + +inline simd& operator-=(simd& a, const simd& b) { + a[0] -= b[0]; + a[1] -= b[1]; + a[2] -= b[2]; + a[3] -= b[3]; + return a; +} + +inline simd& operator*=(simd& a, const simd& b) { + a[0] *= b[0]; + a[1] *= b[1]; + a[2] *= b[2]; + a[3] *= b[3]; + return a; +} + +inline simd& operator/=(simd& a, const simd& b) { + a[0] /= b[0]; + a[1] /= b[1]; + a[2] /= b[2]; + a[3] /= b[3]; + return a; +} + +inline simd::mask_type operator==(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] == b[0]; + ret[1] = a[1] == b[1]; + ret[2] = a[2] == b[2]; + ret[3] = a[3] == b[3]; + return ret; +} + +inline simd::mask_type operator!=(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] != b[0]; + ret[1] = a[1] != b[1]; + ret[2] = a[2] != b[2]; + ret[3] = a[3] != b[3]; + return ret; +} + +inline simd::mask_type operator>=(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] >= b[0]; + ret[1] = a[1] >= b[1]; + ret[2] = a[2] >= b[2]; + ret[3] = a[3] >= b[3]; + return ret; +} + +inline simd::mask_type operator<=(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] <= b[0]; + ret[1] = a[1] <= b[1]; + ret[2] = a[2] <= b[2]; + ret[3] = a[3] <= b[3]; + return ret; +} + +inline simd::mask_type operator>(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] > b[0]; + ret[1] = a[1] > b[1]; + ret[2] = a[2] > b[2]; + ret[3] = a[3] > b[3]; + return ret; +} + +inline simd::mask_type operator<(const simd& a, const simd& b) { + simd::mask_type ret; + ret[0] = a[0] < b[0]; + ret[1] = a[1] < b[1]; + ret[2] = a[2] < b[2]; + ret[3] = a[3] < b[3]; + return ret; +} +} // namespace zeus::_simd diff --git a/src/CMatrix3f.cpp b/src/CMatrix3f.cpp index 823cd30..441d84c 100644 --- a/src/CMatrix3f.cpp +++ b/src/CMatrix3f.cpp @@ -46,15 +46,15 @@ void CMatrix3f::transpose() { float tmp; tmp = m[0][1]; - m[0][1] = m[1][0]; + m[0][1] = m[1][0].operator float(); m[1][0] = tmp; tmp = m[0][2]; - m[0][2] = m[2][0]; + m[0][2] = m[2][0].operator float(); m[2][0] = tmp; tmp = m[1][2]; - m[1][2] = m[2][1]; + m[1][2] = m[2][1].operator float(); m[2][1] = tmp; #endif } @@ -80,15 +80,15 @@ CMatrix3f CMatrix3f::transposed() const { float tmp; tmp = ret.m[0][1]; - ret.m[0][1] = ret.m[1][0]; + ret.m[0][1] = ret.m[1][0].operator float(); ret.m[1][0] = tmp; tmp = m[0][2]; - ret.m[0][2] = ret.m[2][0]; + ret.m[0][2] = ret.m[2][0].operator float(); ret.m[2][0] = tmp; tmp = m[1][2]; - ret.m[1][2] = ret.m[2][1]; + ret.m[1][2] = ret.m[2][1].operator float(); ret.m[2][1] = tmp; return ret;