Complete CMath reimplementation

2025-12-11 06:27:47 +00:00 · 2015-10-12 01:46:45 -07:00
parent 5e2b997266
commit e849476a7a
3 changed files with 212 additions and 56 deletions
--- a/include/Math.hpp
+++ b/include/Math.hpp
@@ -2,7 +2,7 @@
 #define MATH_HPP
 #define _USE_MATH_DEFINES 1
-#include <math.h>
+#include <cmath>
 #include "CVector3f.hpp"
 #include "CTransform.hpp"
@@ -11,7 +11,12 @@ namespace Zeus
 namespace Math
 {
    template<typename T>
-    inline T clamp(T min, T val, T max) {return std::max(min, std::min(max, val));}
+    inline T min(T a, T b) { return a < b ? a : b; }
    template<typename T>
    inline T max(T a, T b) { return a > b ? a : b; }
    template<typename T>
    inline T clamp(T a, T val, T b) {return max<T>(a, min<T>(b, val));}
    inline float radToDeg(float rad) {return rad * 180.f / M_PI;}
    inline float degToRad(float deg) {return deg * M_PI / 180;}
@@ -20,63 +25,44 @@ namespace Math
    inline CVector3f radToDeg(CVector3f rad) {return rad * kRadToDegVec;}
    inline CVector3f degToRad(CVector3f deg) {return deg * kDegToRadVec;}
    // Since round(double) doesn't exist in some <cmath> implementations
    // we'll define our own
    inline float round(double val) { return (val < 0.0 ? ceil(val - 0.5) : floor(val + 0.5)); }
    extern const CVector3f kUpVec;
    CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up=kUpVec);
    inline CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary)
    { return bary.x * p0 + bary.y * p1 + bary.z * p2; }
    CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
-    float getCatmullRomSplinePoint(float p0, float p1,
+    float getCatmullRomSplinePoint(float a, float b,
-                                   float p2, float p3, float t);
+                                   float c, float d, float t);
-    CVector3f getCatmullRomSplinePoint(const CVector3f& p0, const CVector3f& p1,
+    CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b,
-                                       const CVector3f& p2, const CVector3f& p3, float t);
+                                       const CVector3f& c, const CVector3f& d, float t);
-    inline float slowCosineR(float val) { return float(cos(val));  }
+    inline float slowCosineR(float val)  { return float(cos(val));  }
-    inline float slowSineR(float val)   { return float(sin(val));  }
+    inline float slowSineR(float val)    { return float(sin(val));  }
-    inline float arcSineR(float val)    { return float(asin(val)); }
+    inline float slowTangentR(float val) { return float(tan(val)); }
-    inline float arcTangentR(float val) { return float(atan(val)); }
+    inline float arcSineR(float val)     { return float(asin(val)); }
-    inline float fastArcCosR(float val)
+    inline float arcTangentR(float val)  { return float(atan(val)); }
    inline float arcCosineR(float val)      { return float(acos(val)); }
    inline float powF(float a, float b)  { return float(exp(b * log(a))); }
    inline float floorF(float val)       { return float(floor(val)); }
    inline float ceilingF(float val)
    {
-        double f2 = fabs(val);
+        float tmp = floorF(val);
-        if (f2 <= 0.925000011920929)
+        return (tmp == val ? tmp : tmp + 1.0);
            return float(acos(val));
        float f4 = val * val;
        float f5 = 1.5707964f;
        float f0 = -0.99822718f;
        float f3 = -0.20586604f;
        f5 = (val * f5) + f0;
        f2 = 0.11425424f;
        float f1 = val * f4;
        f0 = -0.29697824f;
        f5 = (f1 * f5) + f3;
        f1 = (f1 * f4);
        f5 = (f1 * f5) + f2;
        f1 = (f1 * f4);
        f5 = (f1 * f5) + f0;
        return f5;
    }
-    inline int floorPowerOfTwo(int x)
+    // Since round(double) doesn't exist in some <cmath> implementations
-    {
+    // we'll define our own
-        if (x == 0)
+    inline double round(double val) { return (val < 0.0 ? ceilingF(val - 0.5) : ceilingF(val + 0.5)); }
-            return 0;
+    inline double powD(float a, float b) { return exp(a * log(b)); }
-        /*
+
-         * we want to ensure that we always get the previous power,
+    double sqrtD(double val);
-         * but if we have values like 256, we'll always get the same value,
+    inline double invSqrtD(double val)  { return 1.0 / sqrtD(val); }
-         * x-1 ensures that we always get the previous power.
+    inline float invSqrtF(float val)    { return float(1.0 / sqrtD(val)); }
-         */
+    inline float sqrtF(float val)       { return float(sqrtD(val)); }
-        x = (x - 1) | (x >> 1);
+    float fastArcCosR(float val);
-        x = x | (x >> 2);
+    float fastCosR(float val);
-        x = x | (x >> 4);
+    float fastSinR(float val);
-        x = x | (x >> 8);
+    int floorPowerOfTwo(int x);
        x = x | (x >> 16);
        return x - (x >> 1);
    }
 }
 }
--- a/src/Math.cpp
+++ b/src/Math.cpp
@@ -1,5 +1,4 @@
 #include "Math.hpp"
 #include <x86intrin.h>
 namespace Zeus
 {
@@ -35,14 +34,178 @@ CVector3f getBezierPoint(const CVector3f& p0, const CVector3f& p1, const CVector
    return ret;
 }
-float getCatmullRomSplinePoint(float p0, float p1, float p2, float p3, float t)
+double sqrtD(double val)
 {
-    return 0.0f;
+    if (val <= 0.0)
    {
        // Dunnno what retro is doing here,
        // but this shouldn't come up anyway.
        if (val != 0.0)
            return 1.0 / (float)0x7FFFFFFF;
        if (val == 0.0)
            return 1.0 / (float)0x7F800000;
    }
    double q;
 #if __SSE__
    __m128d splat { val };
    q = _mm_sqrt_pd(splat)[0];
 #else
    // le sigh, let's use Carmack's inverse square -.-
    union
    {
        double v;
        int i;
    } p;
    double x = val * 0.5F;
    p.v = val;
    p.i = 0x5fe6eb50c7b537a9 - (p.i >> 1);
    p.v *= (1.5f - (x * p.v * p.v));
    p.v *= (1.5f - (x * p.v * p.v));
    q = p.v;
 #endif
    static const double half = 0.5;
    static const double three = 3.0;
    double sq = q * q;
    q = half * q;
    sq = -((val * three) - sq);
    q = q * sq;
    sq = q * q;
    q = q * q;
    sq = -((val * three) - sq);
    q = q * sq;
    sq = q * q;
    q = half * q;
    sq = -((val * three) - sq);
    q = q * sq;
    sq = q * q;
    q = half * q;
    sq = -((val * three) - sq);
    sq = q * sq;
    q = val * sq;
    return q;
 }
-CVector3f getCatmullRomSplinePoint(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& p3, float t)
+float fastArcCosR(float val)
 {
-    return CVector3f::skZero;
+    /* If we're not at a low enough value,
     * the approximation below won't provide any benefit,
     * and we simply fall back to the standard implementation
     */
    if (fabs(val) >= 0.925000011920929)
        return float(acos(val));
    /* Fast Arc Cosine approximation using Taylor Polynomials
     * while this implementation is fast, it's also not as accurate.
     * This is a straight reimplementation of Retro's CMath::FastArcCosR
     * and as a result of the polynomials, it returns the inverse value,
     * I'm not certain if this was intended originally, but we'll leave it
     * in order to be as accurate as possible.
     */
    double mag = (val * val);
    double a = ((val * 1.5707964f) + -0.99822718f);
    double b = (val * mag);
    a = ((b * a) + -0.20586604f);
    b *= mag;
    a = ((b * a) + 0.1142542f);
    b *= mag;
    return ((b * a) + -0.2969782f);
 }
 int floorPowerOfTwo(int x)
 {
    if (x == 0)
        return 0;
    /*
     * we want to ensure that we always get the previous power,
     * but if we have values like 256, we'll always get the same value,
     * x-1 ensures that we always get the previous power.
     */
    x = (x - 1) | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >> 16);
    return x - (x >> 1);
 }
 float fastCosR(float val)
 {
    if (fabs(val) > M_PI)
    {
        float rVal = float(uint32_t(val));
        val = -((rVal * val) - 6.2831855);
        if (val <= M_PI && val < -M_PI)
            val += 6.2831855;
        else
            val -= 6.2831855;
    }
    float sq = val * val;
    float b = sq * sq;
    val = sq + -0.4999803;
    val = (b * val) + 0.041620344;
    b = b * sq;
    val = (b * val) + -0.0013636103;
    b = b * sq;
    val = (b * val) + 0.000020169435;
    return val;
 }
 float fastSinR(float val)
 {
    if (fabs(val) > M_PI)
    {
        float rVal = float(uint32_t(val));
        val = -((rVal * val) - 6.2831855);
        if (val <= M_PI && val < -M_PI)
            val += 6.2831855;
        else
            val -= 6.2831855;
    }
    float sq = val * val;
    float ret = val * 0.99980587;
    val = val * sq;
    ret = (val * ret) + -0.16621658;
    val = val * sq;
    ret = (val * ret) + 0.0080871079;
    val = val * sq;
    ret = (val * ret) + -0.00015297699;
    return ret;
 }
 float getCatmullRomSplinePoint(float a, float b, float c, float d, float t)
 {
    if (t <= 0.0f)
        return b;
    if (t >= 1.0)
        return c;
    const float t2 = t  * t;
    const float t3 = t2 * t;
    return (a * (-0.5f * t3 + t2 - 0.5f * t) +
            b * ( 1.5f * t3 + -2.5f * t2 + 1.0f) +
            c * (-1.5f * t3 +  2.0f * t2 + 0.5f * t) +
            d * ( 0.5f * t3 -  0.5f * t2));
 }
 CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t)
 {
    if (t <= 0.0f)
        return b;
    if (t >= 1.0)
        return c;
    const float t2 = t  * t;
    const float t3 = t2 * t;
    return (a * (-0.5f * t3 + t2 - 0.5f * t) +
            b * ( 1.5f * t3 + -2.5f * t2 + 1.0f) +
            c * (-1.5f * t3 +  2.0f * t2 + 0.5f * t) +
            d * ( 0.5f * t3 -  0.5f * t2));
 }
 }
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -31,8 +31,15 @@ int main()
    assert(test3.inside(test));
    assert(!test4.inside(test));
-   std::cout << std::setprecision(16) << (double)Math::fastArcCosR(1.802073) << std::endl;
+    std::cout << Math::min(1, 3) << std::endl;
-   std::cout << Math::floorPowerOfTwo(256) << std::endl;
+    std::cout << Math::min(2, 1) << std::endl;
    std::cout << Math::max(1, 3) << std::endl;
    std::cout << Math::max(2, 1) << std::endl;
    std::cout << Math::clamp(-50,  100, 50) << std::endl;
    std::cout << Math::clamp(-50, -100, 50) << std::endl;
    std::cout << Math::powF(6.66663489, 2) << std::endl;
    std::cout << Math::invSqrtF(1) << std::endl;
    std::cout << Math::floorPowerOfTwo(256) << std::endl;
    return 0;
 }