Complete CMath reimplementation

2025-08-19 18:31:31 +00:00 · 2015-10-12 01:46:45 -07:00 · 2015-10-12 01:46:45 -07:00 · e849476a7a
commit e849476a7a
parent 5e2b997266
3 changed files with 212 additions and 56 deletions
--- a/include/Math.hpp
+++ b/include/Math.hpp
@ -2,7 +2,7 @@
 #define MATH_HPP

 #define _USE_MATH_DEFINES 1
-#include <math.h>
+#include <cmath>
 #include "CVector3f.hpp"
 #include "CTransform.hpp"

@ -11,7 +11,12 @@ namespace Zeus
 namespace Math
 {
    template<typename T>
-    inline T clamp(T min, T val, T max) {return std::max(min, std::min(max, val));}
+    inline T min(T a, T b) { return a < b ? a : b; }
+    template<typename T>
+    inline T max(T a, T b) { return a > b ? a : b; }
+
+    template<typename T>
+    inline T clamp(T a, T val, T b) {return max<T>(a, min<T>(b, val));}
    inline float radToDeg(float rad) {return rad * 180.f / M_PI;}
    inline float degToRad(float deg) {return deg * M_PI / 180;}

@ -20,63 +25,44 @@ namespace Math
    inline CVector3f radToDeg(CVector3f rad) {return rad * kRadToDegVec;}
    inline CVector3f degToRad(CVector3f deg) {return deg * kDegToRadVec;}

-    // Since round(double) doesn't exist in some <cmath> implementations
-    // we'll define our own
-    inline float round(double val) { return (val < 0.0 ? ceil(val - 0.5) : floor(val + 0.5)); }
-
    extern const CVector3f kUpVec;
    CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up=kUpVec);
    inline CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary)
    { return bary.x * p0 + bary.y * p1 + bary.z * p2; }

    CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
-    float getCatmullRomSplinePoint(float p0, float p1,
-                                   float p2, float p3, float t);
-    CVector3f getCatmullRomSplinePoint(const CVector3f& p0, const CVector3f& p1,
-                                       const CVector3f& p2, const CVector3f& p3, float t);
+    float getCatmullRomSplinePoint(float a, float b,
+                                   float c, float d, float t);
+    CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b,
+                                       const CVector3f& c, const CVector3f& d, float t);

-    inline float slowCosineR(float val) { return float(cos(val));  }
-    inline float slowSineR(float val)   { return float(sin(val));  }
-    inline float arcSineR(float val)    { return float(asin(val)); }
-    inline float arcTangentR(float val) { return float(atan(val)); }
-    inline float fastArcCosR(float val)
+    inline float slowCosineR(float val)  { return float(cos(val));  }
+    inline float slowSineR(float val)    { return float(sin(val));  }
+    inline float slowTangentR(float val) { return float(tan(val)); }
+    inline float arcSineR(float val)     { return float(asin(val)); }
+    inline float arcTangentR(float val)  { return float(atan(val)); }
+    inline float arcCosineR(float val)      { return float(acos(val)); }
+    inline float powF(float a, float b)  { return float(exp(b * log(a))); }
+    inline float floorF(float val)       { return float(floor(val)); }
+    inline float ceilingF(float val)
    {
-        double f2 = fabs(val);
-        if (f2 <= 0.925000011920929)
-            return float(acos(val));
-
-        float f4 = val * val;
-        float f5 = 1.5707964f;
-        float f0 = -0.99822718f;
-        float f3 = -0.20586604f;
-        f5 = (val * f5) + f0;
-        f2 = 0.11425424f;
-        float f1 = val * f4;
-        f0 = -0.29697824f;
-        f5 = (f1 * f5) + f3;
-        f1 = (f1 * f4);
-        f5 = (f1 * f5) + f2;
-        f1 = (f1 * f4);
-        f5 = (f1 * f5) + f0;
-        return f5;
+        float tmp = floorF(val);
+        return (tmp == val ? tmp : tmp + 1.0);
    }

-    inline int floorPowerOfTwo(int x)
-    {
-        if (x == 0)
-            return 0;
-        /*
-         * we want to ensure that we always get the previous power,
-         * but if we have values like 256, we'll always get the same value,
-         * x-1 ensures that we always get the previous power.
-         */
-        x = (x - 1) | (x >> 1);
-        x = x | (x >> 2);
-        x = x | (x >> 4);
-        x = x | (x >> 8);
-        x = x | (x >> 16);
-        return x - (x >> 1);
-    }
+    // Since round(double) doesn't exist in some <cmath> implementations
+    // we'll define our own
+    inline double round(double val) { return (val < 0.0 ? ceilingF(val - 0.5) : ceilingF(val + 0.5)); }
+    inline double powD(float a, float b) { return exp(a * log(b)); }
+
+    double sqrtD(double val);
+    inline double invSqrtD(double val)  { return 1.0 / sqrtD(val); }
+    inline float invSqrtF(float val)    { return float(1.0 / sqrtD(val)); }
+    inline float sqrtF(float val)       { return float(sqrtD(val)); }
+    float fastArcCosR(float val);
+    float fastCosR(float val);
+    float fastSinR(float val);
+    int floorPowerOfTwo(int x);
 }
 }

--- a/src/Math.cpp
+++ b/src/Math.cpp
@ -1,5 +1,4 @@
 #include "Math.hpp"
-#include <x86intrin.h>

 namespace Zeus
 {
@ -35,14 +34,178 @@ CVector3f getBezierPoint(const CVector3f& p0, const CVector3f& p1, const CVector
    return ret;
 }

-float getCatmullRomSplinePoint(float p0, float p1, float p2, float p3, float t)
+double sqrtD(double val)
 {
-    return 0.0f;
+    if (val <= 0.0)
+    {
+        // Dunnno what retro is doing here,
+        // but this shouldn't come up anyway.
+        if (val != 0.0)
+            return 1.0 / (float)0x7FFFFFFF;
+        if (val == 0.0)
+            return 1.0 / (float)0x7F800000;
+    }
+    double q;
+#if __SSE__
+    __m128d splat { val };
+    q = _mm_sqrt_pd(splat)[0];
+#else
+    // le sigh, let's use Carmack's inverse square -.-
+    union
+    {
+        double v;
+        int i;
+    } p;
+
+    double x = val * 0.5F;
+    p.v = val;
+    p.i = 0x5fe6eb50c7b537a9 - (p.i >> 1);
+    p.v *= (1.5f - (x * p.v * p.v));
+    p.v *= (1.5f - (x * p.v * p.v));
+    q = p.v;
+#endif
+
+    static const double half = 0.5;
+    static const double three = 3.0;
+    double sq = q * q;
+    q = half * q;
+    sq = -((val * three) - sq);
+    q = q * sq;
+    sq = q * q;
+    q = q * q;
+    sq = -((val * three) - sq);
+    q = q * sq;
+    sq = q * q;
+    q = half * q;
+    sq = -((val * three) - sq);
+    q = q * sq;
+    sq = q * q;
+    q = half * q;
+    sq = -((val * three) - sq);
+    sq = q * sq;
+    q = val * sq;
+    return q;
 }

-CVector3f getCatmullRomSplinePoint(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& p3, float t)
+float fastArcCosR(float val)
 {
-    return CVector3f::skZero;
+    /* If we're not at a low enough value,
+     * the approximation below won't provide any benefit,
+     * and we simply fall back to the standard implementation
+     */
+    if (fabs(val) >= 0.925000011920929)
+        return float(acos(val));
+
+    /* Fast Arc Cosine approximation using Taylor Polynomials
+     * while this implementation is fast, it's also not as accurate.
+     * This is a straight reimplementation of Retro's CMath::FastArcCosR
+     * and as a result of the polynomials, it returns the inverse value,
+     * I'm not certain if this was intended originally, but we'll leave it
+     * in order to be as accurate as possible.
+     */
+    double mag = (val * val);
+    double a = ((val * 1.5707964f) + -0.99822718f);
+    double b = (val * mag);
+    a = ((b * a) + -0.20586604f);
+    b *= mag;
+    a = ((b * a) + 0.1142542f);
+    b *= mag;
+    return ((b * a) + -0.2969782f);
+}
+
+int floorPowerOfTwo(int x)
+{
+    if (x == 0)
+        return 0;
+    /*
+     * we want to ensure that we always get the previous power,
+     * but if we have values like 256, we'll always get the same value,
+     * x-1 ensures that we always get the previous power.
+     */
+    x = (x - 1) | (x >> 1);
+    x = x | (x >> 2);
+    x = x | (x >> 4);
+    x = x | (x >> 8);
+    x = x | (x >> 16);
+    return x - (x >> 1);
+}
+
+float fastCosR(float val)
+{
+    if (fabs(val) > M_PI)
+    {
+        float rVal = float(uint32_t(val));
+        val = -((rVal * val) - 6.2831855);
+        if (val <= M_PI && val < -M_PI)
+            val += 6.2831855;
+        else
+            val -= 6.2831855;
+    }
+
+    float sq = val * val;
+    float b = sq * sq;
+    val = sq + -0.4999803;
+    val = (b * val) + 0.041620344;
+    b = b * sq;
+    val = (b * val) + -0.0013636103;
+    b = b * sq;
+    val = (b * val) + 0.000020169435;
+    return val;
+}
+
+float fastSinR(float val)
+{
+    if (fabs(val) > M_PI)
+    {
+        float rVal = float(uint32_t(val));
+        val = -((rVal * val) - 6.2831855);
+        if (val <= M_PI && val < -M_PI)
+            val += 6.2831855;
+        else
+            val -= 6.2831855;
+    }
+
+    float sq = val * val;
+    float ret = val * 0.99980587;
+    val = val * sq;
+    ret = (val * ret) + -0.16621658;
+    val = val * sq;
+    ret = (val * ret) + 0.0080871079;
+    val = val * sq;
+    ret = (val * ret) + -0.00015297699;
+    return ret;
+}
+
+float getCatmullRomSplinePoint(float a, float b, float c, float d, float t)
+{
+    if (t <= 0.0f)
+        return b;
+    if (t >= 1.0)
+        return c;
+
+    const float t2 = t  * t;
+    const float t3 = t2 * t;
+
+    return (a * (-0.5f * t3 + t2 - 0.5f * t) +
+            b * ( 1.5f * t3 + -2.5f * t2 + 1.0f) +
+            c * (-1.5f * t3 +  2.0f * t2 + 0.5f * t) +
+            d * ( 0.5f * t3 -  0.5f * t2));
+}
+
+CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t)
+{
+    if (t <= 0.0f)
+        return b;
+    if (t >= 1.0)
+        return c;
+
+    const float t2 = t  * t;
+    const float t3 = t2 * t;
+
+    return (a * (-0.5f * t3 + t2 - 0.5f * t) +
+            b * ( 1.5f * t3 + -2.5f * t2 + 1.0f) +
+            c * (-1.5f * t3 +  2.0f * t2 + 0.5f * t) +
+            d * ( 0.5f * t3 -  0.5f * t2));
 }

 }
--- a/test/main.cpp
+++ b/test/main.cpp
@ -31,8 +31,15 @@ int main()
    assert(test3.inside(test));
    assert(!test4.inside(test));

-   std::cout << std::setprecision(16) << (double)Math::fastArcCosR(1.802073) << std::endl;
-   std::cout << Math::floorPowerOfTwo(256) << std::endl;
+    std::cout << Math::min(1, 3) << std::endl;
+    std::cout << Math::min(2, 1) << std::endl;
+    std::cout << Math::max(1, 3) << std::endl;
+    std::cout << Math::max(2, 1) << std::endl;
+    std::cout << Math::clamp(-50,  100, 50) << std::endl;
+    std::cout << Math::clamp(-50, -100, 50) << std::endl;
+    std::cout << Math::powF(6.66663489, 2) << std::endl;
+    std::cout << Math::invSqrtF(1) << std::endl;
+    std::cout << Math::floorPowerOfTwo(256) << std::endl;

    return 0;
 }