More reimplementations

2025-10-11 20:38:59 +00:00 · 2015-11-02 10:44:46 -08:00 · 2015-11-02 10:44:46 -08:00 · 40ca0c3219
commit 40ca0c3219
parent 9965f5846d
13 changed files with 208 additions and 68 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -7,7 +7,7 @@ endif()
 include_directories(include ${ATHENA_INCLUDE_DIR})

 if(NOT WIN32)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -std=c++14")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2 -std=c++14")
 endif()

 add_library(Math
@ -48,7 +48,8 @@ add_library(Math
    include/COBBox.hpp
    include/CLine.hpp
    include/CSphere.hpp
-    include/CUnitVector.hpp)
+    include/CUnitVector.hpp
+    include/CMRay.hpp)

 add_subdirectory(test)

--- a/include/CAABox.hpp
+++ b/include/CAABox.hpp
@ -6,6 +6,7 @@
 #include "CTransform.hpp"
 #include "CPlane.hpp"
 #include "CLine.hpp"
+#include "CSphere.hpp"
 #include "Math.hpp"
 #if ZE_ATHENA_TYPES
 #include <Athena/IStreamReader.hpp>
@ -62,16 +63,37 @@ public:
    }
 #if ZE_ATHENA_TYPES
    CAABox(Athena::io::IStreamReader& in) {readBoundingBox(in);}
-#endif
    
    inline void readBoundingBox(Athena::io::IStreamReader& in)
    {
-        m_min[0] = in.readFloat();
-        m_min[1] = in.readFloat();
-        m_min[2] = in.readFloat();
-        m_max[0] = in.readFloat();
-        m_max[1] = in.readFloat();
-        m_max[2] = in.readFloat();
+        m_min = CVector3f(in);
+        m_max = CVector3f(in);
+    }
+#endif
+
+    float distanceFromPointSquared(const CVector3f& other) const
+    {
+        float dist = 0;
+        for (int i = 0; i < 3; i++)
+        {
+            if (other[i] < m_min[i])
+            {
+                const float tmp = (m_min[i] - other[i]);
+                dist += tmp * tmp;
+            }
+            else if (other[i] > m_max[i])
+            {
+                const float tmp = (other[i] - m_max[i]);
+                dist += tmp * tmp;
+            }
+        }
+
+        return dist;
+    }
+
+    float distanceFromPoint(const CVector3f &other) const
+    {
+        return Math::sqrtF(distanceFromPointSquared(other));
    }
    
    inline bool intersects(const CAABox& other) const
@ -84,6 +106,10 @@ public:
        bool z2 = (m_min[2] > other.m_max[2]);
        return x1 && x2 && y1 && y2 && z1 && z2;
    }
+    bool intersects(const CSphere& other) const
+    {
+        return distanceFromPointSquared(other.position) <= other.radius * other.radius;
+    }

    inline bool inside(const CAABox& other) const
    {
@ -290,6 +316,7 @@ public:
        negZ.m_min = m_min;
    }

+
    inline bool invalid() {return (m_max.x < m_min.x || m_max.y < m_min.y || m_max.z < m_min.z);}
 };

--- a/include/CAxisAngle.hpp
+++ b/include/CAxisAngle.hpp
@ -12,8 +12,8 @@ struct alignas(16) CAxisAngle : CVector3f
    ZE_DECLARE_ALIGNED_ALLOCATOR();
    
    CAxisAngle() = default;
-    CAxisAngle(const CUnitVector3f& axis, float angle)
-        : CVector3f(axis * angle)
+    CAxisAngle(const CUnitVector3f& axis, float distance)
+        : CVector3f(distance * axis)
    {}

    CAxisAngle(const CVector3f& axisAngle)
--- a/include/CLine.hpp
+++ b/include/CLine.hpp
@ -17,8 +17,7 @@ public:
        start = a;
        if (ab.x != 0.0f || ab.y != 0.0f || ab.z != 0.0f)
            normal = ab;
-        else
-            normal = CVector3f::skZero;
+
        end = b;
    }

--- a/include/CSphere.hpp
+++ b/include/CSphere.hpp
@ -10,18 +10,20 @@ class alignas(16) CSphere
 public:
    ZE_DECLARE_ALIGNED_ALLOCATOR();

-    CSphere(const CVector3f& position, float radius) { vec = position; r = radius; }
-    inline CVector3f getSurfaceNormal(const CVector3f& coord) { return (vec - coord).normalized(); }
+    CSphere(const CVector3f& position, float radius)
+        : position(position), radius(radius) { }

-    union
+    inline CVector3f getSurfaceNormal(const CVector3f& coord)
+    { return (position - coord).normalized(); }
+
+    inline bool intersects(const CSphere& other)
    {
-        struct { float x, y, z, r; };
-        float s[4];
-        CVector3f vec;
-#if __SSE__
-        __m128 mVec128;
-#endif
-    };
+        float dist = (position - other.position).magnitude();
+        return dist < (radius + other.radius);
+    }
+
+    CVector3f position;
+    float radius;
 };
 }

--- a/include/CVector2f.hpp
+++ b/include/CVector2f.hpp
@ -222,12 +222,15 @@ class alignas(16) CVector2f
    }
    inline float dot(const CVector2f& rhs) const
    {
-#if __SSE4_1__
+#if __SSE__
        TVectorUnion result;
+#if __SSE4_1__
+        if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
+        {
            result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
            return result.v[0];
-#elif __SSE__
-        TVectorUnion result;
+        }
+#endif
        result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
        return result.v[0] + result.v[1];
 #else
@ -236,12 +239,15 @@ class alignas(16) CVector2f
    }
    inline float magSquared() const
    {
-#if __SSE4_1__
+#if __SSE__
        TVectorUnion result;
+#if __SSE4_1__ || __SSE4_2__
+        if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
+        {
            result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
            return result.v[0];
-#elif __SSE__
-        TVectorUnion result;
+        }
+#endif
        result.mVec128 = _mm_mul_ps(mVec128, mVec128);
        return result.v[0] + result.v[1];
 #else
--- a/include/CVector3d.hpp
+++ b/include/CVector3d.hpp
@ -60,15 +60,16 @@ public:

    double magSquared() const
    {
-/*
-#if __SSE4_1__
-        TDblVectorUnion result;
-        result.mVec128 = _mm_dp_pd(mVec128, mVec128, 0x71);
-        return result.v[0];
-#elif __SSE__
-*/
 #if __SSE__
        TDblVectorUnion result;
+#if __SSE4_1__ || __SSE4_2__
+        if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
+        {
+            result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x71);
+            result.mVec128[1] = _mm_dp_pd(mVec128[1], mVec128[1], 0x71);
+            return result.v[0] + result.v[2];
+        }
+#endif
        result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
        result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
        return result.v[0] + result.v[1] + result.v[2];
@ -83,15 +84,18 @@ public:

    double dot(const CVector3d& rhs) const
    {
-/*
-#if __SSE4_1__
-        TDblVectorUnion result;
-        result.mVec128 = _mm_dp_pd(mVec128, mVec128, 0x71);
-        return result.v[0];
-#elif __SSE__
-*/
+
 #if __SSE__
        TDblVectorUnion result;
+#if __SSE4_1__ || __SSE4_2__
+        if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
+        {
+            result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x71);
+            result.mVec128[1] = _mm_dp_pd(mVec128[1], rhs.mVec128[1], 0x71);
+            return result.v[0] + result.v[2];
+        }
+#endif
+
        result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
        result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
        return result.v[0] + result.v[1] + result.v[2];
--- a/include/CVector3f.hpp
+++ b/include/CVector3f.hpp
@ -203,12 +203,15 @@ public:

    inline float dot(const CVector3f& rhs) const
    {
-#if __SSE4_1__
+#if __SSE__
        TVectorUnion result;
-        result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
+#if __SSE4_1__ || __SSE4_2__
+        if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
+        {
+            result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
            return result.v[0];
-#elif __SSE__
-        TVectorUnion result;
+        }
+#endif
        result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
        return result.v[0] + result.v[1] + result.v[2];
 #else
@ -217,12 +220,16 @@ public:
    }
    inline float magSquared() const
    {
-#if __SSE4_1__
+#if __SSE__
        TVectorUnion result;
+#if __SSE4_1__ || __SSE4_2__
+        if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
+        {
            result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
            return result.v[0];
-#elif __SSE__
-        TVectorUnion result;
+        }
+#endif
+
        result.mVec128 = _mm_mul_ps(mVec128, mVec128);
        return result.v[0] + result.v[1] + result.v[2];
 #else
--- a/include/CVector4f.hpp
+++ b/include/CVector4f.hpp
@ -240,12 +240,16 @@ class alignas(16) CVector4f

    inline float dot(const CVector4f& rhs) const
    {
-#if __SSE4_1__
+#if __SSE__
        TVectorUnion result;
+#if __SSE4_1__ || __SSE4_2__
+        if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
+        {
            result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
            return result.v[0];
-#elif __SSE__
-        TVectorUnion result;
+        }
+#endif
+
        result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
        return result.v[0] + result.v[1] + result.v[2] + result.v[3];
 #else
@ -254,12 +258,15 @@ class alignas(16) CVector4f
    }
    inline float magSquared() const
    {
-#if __SSE4_1__
+#if __SSE__
        TVectorUnion result;
-        result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
+#if __SSE4_1__ || __SSE4_2__
+        if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
+        {
+            result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
            return result.v[0];
-#elif __SSE__
-        TVectorUnion result;
+        }
+#endif
        result.mVec128 = _mm_mul_ps(mVec128, mVec128);
        return result.v[0] + result.v[1] + result.v[2];
 #else
--- a/include/Math.hpp
+++ b/include/Math.hpp
@ -12,6 +12,25 @@

 namespace Zeus
 {
+struct CPUInfo
+{
+const char cpuBrand [32] = {0};
+const char cpuVendor[32] = {0};
+const bool isIntel       = false;
+const bool SSE1          = false;
+const bool SSE2          = false;
+const bool SSE3          = false;
+const bool SSSE3         = false;
+const bool SSE41         = false;
+const bool SSE42         = false;
+const bool SSE4a         = false;
+const bool AESNI         = false;
+};
+/**
+ * Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
+ */
+void detectCPU();
+const CPUInfo cpuFeatures();
 class CVector3f;
 class CTransform;
 namespace Math
--- a/src/Math.cpp
+++ b/src/Math.cpp
@ -1,9 +1,67 @@
 #include "Math.hpp"
 #include "CTransform.hpp"
 #include "CVector3f.hpp"
+#include <cpuid.h>

 namespace Zeus
 {
+
+static CPUInfo g_cpuFeatures;
+
+void getCpuInfo(int level,
+              unsigned int* eax,
+              unsigned int* ebx,
+              unsigned int* ecx,
+              unsigned int* edx)
+{
+#if !GEKKO
+#if _WIN32
+    unsigned int regs[4];
+    __cpuid(regs, level);
+    *eax = regs[0];
+    *ebx = regs[1];
+    *ecx = regs[2];
+    *edx = regs[3];
+#else
+    __cpuid(level, *eax, *ebx, *ecx, *edx);
+#endif
+#endif
+}
+
+void detectCPU()
+{
+#if !GEKKO
+    static bool isInit = false;
+    if (isInit)
+        return;
+
+    unsigned int eax, ebx, ecx, edx;
+    getCpuInfo(0, &eax, &ebx, &ecx, &edx);
+    *reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor) = ebx;
+    *reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 4) = edx;
+    *reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 8) = ecx;
+    getCpuInfo(0x80000000, &eax, &ebx, &ecx, &edx);
+    *reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand) = ebx;
+    *reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand + 4) = edx;
+    *reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand + 8) = ecx;
+    getCpuInfo(1, &eax, &ebx, &ecx, &edx);
+
+    memset((bool*)&g_cpuFeatures.AESNI, ((ecx & 0x02000000) != 0), 1);
+    memset((bool*)&g_cpuFeatures.SSE1,  ((edx & 0x02000000) != 0), 1);
+    memset((bool*)&g_cpuFeatures.SSE2,  ((edx & 0x04000000) != 0), 1);
+    memset((bool*)&g_cpuFeatures.SSE3,  ((ecx & 0x00000001) != 0), 1);
+    memset((bool*)&g_cpuFeatures.SSSE3, ((ecx & 0x00000200) != 0), 1);
+    memset((bool*)&g_cpuFeatures.SSE41, ((ecx & 0x00080000) != 0), 1);
+    memset((bool*)&g_cpuFeatures.SSE42, ((ecx & 0x00100000) != 0), 1);
+
+
+    isInit = true;
+#endif
+}
+
+
+const CPUInfo cpuFeatures() { return g_cpuFeatures; }
+
 namespace Math
 {
 const CVector3f kUpVec(0.0, 0.0, 1.0);
@ -249,4 +307,5 @@ CVector3f radToDeg(const CVector3f& rad) {return rad * kRadToDegVec;}
 CVector3f degToRad(const CVector3f& deg) {return deg * kDegToRadVec;}

 }
+
 }
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -7,4 +7,4 @@ add_executable(zeustest
    main.cpp)

 target_link_libraries(zeustest
-    Math)
+    Math GL)
--- a/test/main.cpp
+++ b/test/main.cpp
@ -13,6 +13,7 @@ union Color

 int main()
 {
+    Zeus::detectCPU();
    assert(!CAABox({100, 100, 100}, {100, 100, 100}).invalid());
    assert(CAABox().invalid());
    CVector3f vec{320, 632162.f, 800.f};
@ -37,6 +38,11 @@ int main()
    assert(test3.inside(test));
    assert(!test4.inside(test));

+    CAABox aabb({-1}, {1});
+    CSphere s1({0}, 1);
+    CSphere s2({1, 0, 0}, 1);
+    CSphere s3({3, 0, 0}, 1);
+
    std::cout << Math::min(1, 3) << std::endl;
    std::cout << Math::min(2, 1) << std::endl;
    std::cout << Math::max(1, 3) << std::endl;
@ -46,6 +52,9 @@ int main()
    std::cout << Math::powF(6.66663489, 2) << std::endl;
    std::cout << Math::invSqrtF(1) << std::endl;
    std::cout << Math::floorPowerOfTwo(256) << std::endl;
+    std::cout << " Test 1 " << ( aabb.intersects(s1) ? "succeeded" : "failed" ) << std::endl;
+    std::cout << " Test 2 " << ( aabb.intersects(s2) ? "succeeded" : "failed" ) << std::endl;
+    std::cout << " Test 3 " << ( aabb.intersects(s3) ? "succeeded" : "failed" ) << std::endl;
    CLine line({-89.120926, 59.328712, 3.265882}, CUnitVector3f({-90.120926, 59.328712, 3.265882}));

    CColor ctest1;