mirror of
https://github.com/AxioDL/zeus.git
synced 2025-12-08 21:17:49 +00:00
Compare commits
5 Commits
hsh
...
bb9b4c83af
| Author | SHA1 | Date | |
|---|---|---|---|
|
bb9b4c83af
|
|||
|
9130bf977e
|
|||
| b3806c03a5 | |||
| 3c4bcf37d2 | |||
| 9ea070c2d7 |
@@ -131,7 +131,7 @@ public:
|
|||||||
|
|
||||||
const CVector2f& operator/=(float rhs) {
|
const CVector2f& operator/=(float rhs) {
|
||||||
float oorhs = 1.f / rhs;
|
float oorhs = 1.f / rhs;
|
||||||
mSimd /= simd<float>(oorhs);
|
mSimd *= simd<float>(oorhs);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -155,7 +155,7 @@ public:
|
|||||||
|
|
||||||
[[nodiscard]] constexpr float magSquared() const { return mSimd.dot2(mSimd); }
|
[[nodiscard]] constexpr float magSquared() const { return mSimd.dot2(mSimd); }
|
||||||
|
|
||||||
[[nodiscard]] constexpr float magnitude() const { return std::sqrt(magSquared()); }
|
[[nodiscard]] float magnitude() const { return std::sqrt(magSquared()); }
|
||||||
|
|
||||||
constexpr void zeroOut() { mSimd = 0.f; }
|
constexpr void zeroOut() { mSimd = 0.f; }
|
||||||
|
|
||||||
@@ -173,9 +173,9 @@ public:
|
|||||||
|
|
||||||
[[nodiscard]] static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
|
[[nodiscard]] static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
|
||||||
|
|
||||||
[[nodiscard]] constexpr bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
|
[[nodiscard]] bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
|
||||||
|
|
||||||
[[nodiscard]] constexpr bool canBeNormalized() const {
|
[[nodiscard]] bool canBeNormalized() const {
|
||||||
if (std::isinf(x()) || std::isinf(y()))
|
if (std::isinf(x()) || std::isinf(y()))
|
||||||
return false;
|
return false;
|
||||||
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON;
|
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON;
|
||||||
@@ -188,7 +188,7 @@ public:
|
|||||||
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon);
|
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] constexpr simd<float>::reference operator[](size_t idx) {
|
[[nodiscard]] simd<float>::reference operator[](size_t idx) {
|
||||||
assert(idx < 2);
|
assert(idx < 2);
|
||||||
return mSimd[idx];
|
return mSimd[idx];
|
||||||
}
|
}
|
||||||
@@ -201,8 +201,8 @@ public:
|
|||||||
[[nodiscard]] constexpr float x() const { return mSimd[0]; }
|
[[nodiscard]] constexpr float x() const { return mSimd[0]; }
|
||||||
[[nodiscard]] constexpr float y() const { return mSimd[1]; }
|
[[nodiscard]] constexpr float y() const { return mSimd[1]; }
|
||||||
|
|
||||||
[[nodiscard]] constexpr simd<float>::reference x() { return mSimd[0]; }
|
[[nodiscard]] simd<float>::reference x() { return mSimd[0]; }
|
||||||
[[nodiscard]] constexpr simd<float>::reference y() { return mSimd[1]; }
|
[[nodiscard]] simd<float>::reference y() { return mSimd[1]; }
|
||||||
};
|
};
|
||||||
constexpr inline CVector2f skOne2f(1.f);
|
constexpr inline CVector2f skOne2f(1.f);
|
||||||
constexpr inline CVector2f skNegOne2f(-1.f);
|
constexpr inline CVector2f skNegOne2f(-1.f);
|
||||||
|
|||||||
@@ -139,6 +139,7 @@ public:
|
|||||||
[[nodiscard]] bool isNotInf() const { return !(std::isinf(x()) || std::isinf(y()) || std::isinf(z())); }
|
[[nodiscard]] bool isNotInf() const { return !(std::isinf(x()) || std::isinf(y()) || std::isinf(z())); }
|
||||||
|
|
||||||
[[nodiscard]] bool isMagnitudeSafe() const { return isNotInf() && magSquared() >= 9.9999994e-29; }
|
[[nodiscard]] bool isMagnitudeSafe() const { return isNotInf() && magSquared() >= 9.9999994e-29; }
|
||||||
|
[[nodiscard]] bool isNaN() const { return std::isnan(x()) || std::isnan(y()) || std::isnan(z()); }
|
||||||
|
|
||||||
void zeroOut() { mSimd.broadcast(0.f); }
|
void zeroOut() { mSimd.broadcast(0.f); }
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class __simd_storage<float, m128_abi> {
|
|||||||
public:
|
public:
|
||||||
using storage_type = float32x4_t;
|
using storage_type = float32x4_t;
|
||||||
storage_type __storage_{};
|
storage_type __storage_{};
|
||||||
[[nodiscard]] constexpr float __get(size_t __index) const noexcept { return __storage_[__index]; }
|
[[nodiscard]] float __get(size_t __index) const noexcept { return __storage_[__index]; }
|
||||||
inline void __set(size_t __index, float __val) noexcept { __storage_[__index] = __val; }
|
inline void __set(size_t __index, float __val) noexcept { __storage_[__index] = __val; }
|
||||||
constexpr __simd_storage(float a, float b, float c, float d) : __storage_{a, b, c, d} {}
|
constexpr __simd_storage(float a, float b, float c, float d) : __storage_{a, b, c, d} {}
|
||||||
constexpr void __set4(float a, float b, float c, float d) noexcept { __storage_ = storage_type{a, b, c, d}; }
|
constexpr void __set4(float a, float b, float c, float d) noexcept { __storage_ = storage_type{a, b, c, d}; }
|
||||||
@@ -35,12 +35,7 @@ public:
|
|||||||
}
|
}
|
||||||
template <int x, int y, int z, int w>
|
template <int x, int y, int z, int w>
|
||||||
[[nodiscard]] inline __simd_storage __shuffle() const noexcept {
|
[[nodiscard]] inline __simd_storage __shuffle() const noexcept {
|
||||||
storage_type ret;
|
return __simd_storage{__storage_[x], __storage_[y], __storage_[z], __storage_[w]};
|
||||||
ret = vmovq_n_f32(vgetq_lane_f32(__storage_, x));
|
|
||||||
ret = vsetq_lane_f32(vgetq_lane_f32(__storage_, y), ret, 1);
|
|
||||||
ret = vsetq_lane_f32(vgetq_lane_f32(__storage_, z), ret, 2);
|
|
||||||
ret = vsetq_lane_f32(vgetq_lane_f32(__storage_, w), ret, 3);
|
|
||||||
return __simd_storage(ret);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void __copy_from(const simd_data<simd<float, m128_abi>>& __buffer) noexcept {
|
inline void __copy_from(const simd_data<simd<float, m128_abi>>& __buffer) noexcept {
|
||||||
@@ -71,8 +66,7 @@ public:
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
inline simd<float, m128_abi> simd<float, m128_abi>::operator-() const {
|
inline simd<float, m128_abi> simd<float, m128_abi>::operator-() const {
|
||||||
return vreinterpretq_f32_s32(
|
return vnegq_f32(__s_.__storage_);
|
||||||
veorq_s32(vreinterpretq_s32_f32(__s_.__storage_), vreinterpretq_s32_f32(vdupq_n_f32(-0.f))));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline simd<float, m128_abi> operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
|
inline simd<float, m128_abi> operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
|
||||||
@@ -172,7 +166,7 @@ public:
|
|||||||
[[nodiscard]] inline double __dot3(const __simd_storage<double, m128d_abi>& other) const noexcept {
|
[[nodiscard]] inline double __dot3(const __simd_storage<double, m128d_abi>& other) const noexcept {
|
||||||
const vector_type mul1 = vmulq_f64(__storage_.val[0], other.__storage_.val[0]);
|
const vector_type mul1 = vmulq_f64(__storage_.val[0], other.__storage_.val[0]);
|
||||||
const vector_type mul2 = vmulq_f64(__storage_.val[1], other.__storage_.val[1]);
|
const vector_type mul2 = vmulq_f64(__storage_.val[1], other.__storage_.val[1]);
|
||||||
return vaddvq_f64(vcombine_f64(vcreate_f64(vaddvq_f64(mul1)), vget_low_f64(mul2)));
|
return vaddvq_f64(vcombine_f64(vdup_n_f64(vaddvq_f64(mul1)), vget_low_f64(mul2)));
|
||||||
}
|
}
|
||||||
[[nodiscard]] inline double __dot4(const __simd_storage<double, m128d_abi>& other) const noexcept {
|
[[nodiscard]] inline double __dot4(const __simd_storage<double, m128d_abi>& other) const noexcept {
|
||||||
const vector_type mul1 = vmulq_f64(__storage_.val[0], other.__storage_.val[0]);
|
const vector_type mul1 = vmulq_f64(__storage_.val[0], other.__storage_.val[0]);
|
||||||
@@ -215,8 +209,7 @@ template <>
|
|||||||
inline simd<double, m128d_abi> simd<double, m128d_abi>::operator-() const {
|
inline simd<double, m128d_abi> simd<double, m128d_abi>::operator-() const {
|
||||||
simd<double, m128d_abi> ret;
|
simd<double, m128d_abi> ret;
|
||||||
for (int i = 0; i < 2; ++i)
|
for (int i = 0; i < 2; ++i)
|
||||||
ret.__s_.__storage_.val[i] = vreinterpretq_f64_s64(
|
ret.__s_.__storage_.val[i] = vnegq_f64(__s_.__storage_.val[i]);
|
||||||
veorq_s64(vreinterpretq_s64_f64(__s_.__storage_.val[i]), vreinterpretq_s64_f64(vdupq_n_f64(-0.0))));
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -30,8 +30,25 @@ class __simd_storage<float, m128_abi> {
|
|||||||
public:
|
public:
|
||||||
using storage_type = __m128;
|
using storage_type = __m128;
|
||||||
storage_type __storage_{};
|
storage_type __storage_{};
|
||||||
[[nodiscard]] inline float __get(size_t __index) const noexcept { return __storage_[__index]; }
|
[[nodiscard]] inline float __get(size_t __index) const noexcept {
|
||||||
inline void __set(size_t __index, float __val) noexcept { __storage_[__index] = __val; }
|
#if _MSC_VER && !defined(__clang__)
|
||||||
|
alignas(16) std::array<float, 4> sse_data;
|
||||||
|
_mm_store_ps(sse_data.data(), __storage_);
|
||||||
|
return sse_data[__index];
|
||||||
|
#else
|
||||||
|
return __storage_[__index];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
inline void __set(size_t __index, float __val) noexcept {
|
||||||
|
#if _MSC_VER && !defined(__clang__)
|
||||||
|
alignas(16) std::array<float, 4> sse_data;
|
||||||
|
_mm_store_ps(sse_data.data(), __storage_);
|
||||||
|
sse_data[__index] = __val;
|
||||||
|
__storage_ = _mm_load_ps(sse_data.data());
|
||||||
|
#else
|
||||||
|
__storage_[__index] = __val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
constexpr __simd_storage(float a, float b, float c, float d) : __storage_{a, b, c, d} {}
|
constexpr __simd_storage(float a, float b, float c, float d) : __storage_{a, b, c, d} {}
|
||||||
constexpr void __set4(float a, float b, float c, float d) noexcept { __storage_ = storage_type{a, b, c, d}; }
|
constexpr void __set4(float a, float b, float c, float d) noexcept { __storage_ = storage_type{a, b, c, d}; }
|
||||||
constexpr explicit __simd_storage(float rv) : __storage_{rv, rv, rv, rv} {}
|
constexpr explicit __simd_storage(float rv) : __storage_{rv, rv, rv, rv} {}
|
||||||
@@ -192,8 +209,25 @@ class __simd_storage<double, m128d_abi> {
|
|||||||
public:
|
public:
|
||||||
using storage_type = std::array<__m128d, 2>;
|
using storage_type = std::array<__m128d, 2>;
|
||||||
storage_type __storage_{};
|
storage_type __storage_{};
|
||||||
[[nodiscard]] inline double __get(size_t __index) const noexcept { return __storage_[__index / 2][__index % 2]; }
|
[[nodiscard]] inline double __get(size_t __index) const noexcept {
|
||||||
inline void __set(size_t __index, double __val) noexcept { __storage_[__index / 2][__index % 2] = __val; }
|
#if _MSC_VER && !defined(__clang__)
|
||||||
|
alignas(16) std::array<double, 2> sse_data;
|
||||||
|
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
|
||||||
|
return sse_data[__index % 2];
|
||||||
|
#else
|
||||||
|
return __storage_[__index / 2][__index % 2];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
inline void __set(size_t __index, double __val) noexcept {
|
||||||
|
#if _MSC_VER && !defined(__clang__)
|
||||||
|
alignas(16) std::array<double, 2> sse_data;
|
||||||
|
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
|
||||||
|
sse_data[__index % 2] = __val;
|
||||||
|
__storage_[__index / 2] = _mm_load_pd(sse_data.data());
|
||||||
|
#else
|
||||||
|
__storage_[__index / 2][__index % 2] = __val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
// Make GCC happy
|
// Make GCC happy
|
||||||
static constexpr storage_type __make_array(__m128d a, __m128d b) { return {a, b}; }
|
static constexpr storage_type __make_array(__m128d a, __m128d b) { return {a, b}; }
|
||||||
constexpr __simd_storage(double a, double b, double c, double d) : __storage_(__make_array(__m128d{a, b}, __m128d{c, d})) {}
|
constexpr __simd_storage(double a, double b, double c, double d) : __storage_(__make_array(__m128d{a, b}, __m128d{c, d})) {}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ static CPUInfo g_cpuFeatures = {};
|
|||||||
static CPUInfo g_missingFeatures = {};
|
static CPUInfo g_missingFeatures = {};
|
||||||
|
|
||||||
void getCpuInfo(int eax, int regs[4]) {
|
void getCpuInfo(int eax, int regs[4]) {
|
||||||
#if __x86_64__
|
#if defined(__x86_64__) || defined(_M_X64)
|
||||||
#if _WIN32
|
#if _WIN32
|
||||||
__cpuid(regs, eax);
|
__cpuid(regs, eax);
|
||||||
#else
|
#else
|
||||||
@@ -31,7 +31,7 @@ void getCpuInfo(int eax, int regs[4]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void getCpuInfoEx(int eax, int ecx, int regs[4]) {
|
void getCpuInfoEx(int eax, int ecx, int regs[4]) {
|
||||||
#if __x86_64__
|
#if defined(__x86_64__) || defined(_M_X64)
|
||||||
#if _WIN32
|
#if _WIN32
|
||||||
__cpuidex(regs, eax, ecx);
|
__cpuidex(regs, eax, ecx);
|
||||||
#else
|
#else
|
||||||
@@ -41,7 +41,7 @@ void getCpuInfoEx(int eax, int ecx, int regs[4]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void detectCPU() {
|
void detectCPU() {
|
||||||
#if __x86_64__
|
#if defined(__x86_64__) || defined(_M_X64)
|
||||||
if (isCPUInit)
|
if (isCPUInit)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user