Complete CMath reimplementation

This commit is contained in:
Phillip Stephens 2015-10-12 01:46:45 -07:00
parent 5e2b997266
commit e849476a7a
3 changed files with 212 additions and 56 deletions

View File

@ -2,7 +2,7 @@
#define MATH_HPP
#define _USE_MATH_DEFINES 1
#include <math.h>
#include <cmath>
#include "CVector3f.hpp"
#include "CTransform.hpp"
@ -11,7 +11,12 @@ namespace Zeus
namespace Math
{
template<typename T>
inline T clamp(T min, T val, T max) {return std::max(min, std::min(max, val));}
inline T min(T a, T b) { return a < b ? a : b; }
template<typename T>
inline T max(T a, T b) { return a > b ? a : b; }
template<typename T>
inline T clamp(T a, T val, T b) {return max<T>(a, min<T>(b, val));}
inline float radToDeg(float rad) {return rad * 180.f / M_PI;}
inline float degToRad(float deg) {return deg * M_PI / 180;}
@ -20,63 +25,44 @@ namespace Math
inline CVector3f radToDeg(CVector3f rad) {return rad * kRadToDegVec;}
inline CVector3f degToRad(CVector3f deg) {return deg * kDegToRadVec;}
// Since round(double) doesn't exist in some <cmath> implementations
// we'll define our own
inline float round(double val) { return (val < 0.0 ? ceil(val - 0.5) : floor(val + 0.5)); }
extern const CVector3f kUpVec;
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up=kUpVec);
inline CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary)
{ return bary.x * p0 + bary.y * p1 + bary.z * p2; }
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
float getCatmullRomSplinePoint(float p0, float p1,
float p2, float p3, float t);
CVector3f getCatmullRomSplinePoint(const CVector3f& p0, const CVector3f& p1,
const CVector3f& p2, const CVector3f& p3, float t);
float getCatmullRomSplinePoint(float a, float b,
float c, float d, float t);
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b,
const CVector3f& c, const CVector3f& d, float t);
inline float slowCosineR(float val) { return float(cos(val)); }
inline float slowSineR(float val) { return float(sin(val)); }
inline float arcSineR(float val) { return float(asin(val)); }
inline float arcTangentR(float val) { return float(atan(val)); }
inline float fastArcCosR(float val)
inline float slowCosineR(float val) { return float(cos(val)); }
inline float slowSineR(float val) { return float(sin(val)); }
inline float slowTangentR(float val) { return float(tan(val)); }
inline float arcSineR(float val) { return float(asin(val)); }
inline float arcTangentR(float val) { return float(atan(val)); }
inline float arcCosineR(float val) { return float(acos(val)); }
inline float powF(float a, float b) { return float(exp(b * log(a))); }
inline float floorF(float val) { return float(floor(val)); }
inline float ceilingF(float val)
{
double f2 = fabs(val);
if (f2 <= 0.925000011920929)
return float(acos(val));
float f4 = val * val;
float f5 = 1.5707964f;
float f0 = -0.99822718f;
float f3 = -0.20586604f;
f5 = (val * f5) + f0;
f2 = 0.11425424f;
float f1 = val * f4;
f0 = -0.29697824f;
f5 = (f1 * f5) + f3;
f1 = (f1 * f4);
f5 = (f1 * f5) + f2;
f1 = (f1 * f4);
f5 = (f1 * f5) + f0;
return f5;
float tmp = floorF(val);
return (tmp == val ? tmp : tmp + 1.0);
}
inline int floorPowerOfTwo(int x)
{
if (x == 0)
return 0;
/*
* we want to ensure that we always get the previous power,
* but if we have values like 256, we'll always get the same value,
* x-1 ensures that we always get the previous power.
*/
x = (x - 1) | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x - (x >> 1);
}
// Since round(double) doesn't exist in some <cmath> implementations
// we'll define our own
inline double round(double val) { return (val < 0.0 ? ceilingF(val - 0.5) : ceilingF(val + 0.5)); }
inline double powD(float a, float b) { return exp(a * log(b)); }
double sqrtD(double val);
inline double invSqrtD(double val) { return 1.0 / sqrtD(val); }
inline float invSqrtF(float val) { return float(1.0 / sqrtD(val)); }
inline float sqrtF(float val) { return float(sqrtD(val)); }
float fastArcCosR(float val);
float fastCosR(float val);
float fastSinR(float val);
int floorPowerOfTwo(int x);
}
}

View File

@ -1,5 +1,4 @@
#include "Math.hpp"
#include <x86intrin.h>
namespace Zeus
{
@ -35,14 +34,178 @@ CVector3f getBezierPoint(const CVector3f& p0, const CVector3f& p1, const CVector
return ret;
}
float getCatmullRomSplinePoint(float p0, float p1, float p2, float p3, float t)
double sqrtD(double val)
{
return 0.0f;
if (val <= 0.0)
{
// Dunnno what retro is doing here,
// but this shouldn't come up anyway.
if (val != 0.0)
return 1.0 / (float)0x7FFFFFFF;
if (val == 0.0)
return 1.0 / (float)0x7F800000;
}
double q;
#if __SSE__
__m128d splat { val };
q = _mm_sqrt_pd(splat)[0];
#else
// le sigh, let's use Carmack's inverse square -.-
union
{
double v;
int i;
} p;
double x = val * 0.5F;
p.v = val;
p.i = 0x5fe6eb50c7b537a9 - (p.i >> 1);
p.v *= (1.5f - (x * p.v * p.v));
p.v *= (1.5f - (x * p.v * p.v));
q = p.v;
#endif
static const double half = 0.5;
static const double three = 3.0;
double sq = q * q;
q = half * q;
sq = -((val * three) - sq);
q = q * sq;
sq = q * q;
q = q * q;
sq = -((val * three) - sq);
q = q * sq;
sq = q * q;
q = half * q;
sq = -((val * three) - sq);
q = q * sq;
sq = q * q;
q = half * q;
sq = -((val * three) - sq);
sq = q * sq;
q = val * sq;
return q;
}
CVector3f getCatmullRomSplinePoint(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& p3, float t)
float fastArcCosR(float val)
{
return CVector3f::skZero;
/* If we're not at a low enough value,
* the approximation below won't provide any benefit,
* and we simply fall back to the standard implementation
*/
if (fabs(val) >= 0.925000011920929)
return float(acos(val));
/* Fast Arc Cosine approximation using Taylor Polynomials
* while this implementation is fast, it's also not as accurate.
* This is a straight reimplementation of Retro's CMath::FastArcCosR
* and as a result of the polynomials, it returns the inverse value,
* I'm not certain if this was intended originally, but we'll leave it
* in order to be as accurate as possible.
*/
double mag = (val * val);
double a = ((val * 1.5707964f) + -0.99822718f);
double b = (val * mag);
a = ((b * a) + -0.20586604f);
b *= mag;
a = ((b * a) + 0.1142542f);
b *= mag;
return ((b * a) + -0.2969782f);
}
int floorPowerOfTwo(int x)
{
if (x == 0)
return 0;
/*
* we want to ensure that we always get the previous power,
* but if we have values like 256, we'll always get the same value,
* x-1 ensures that we always get the previous power.
*/
x = (x - 1) | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x - (x >> 1);
}
float fastCosR(float val)
{
if (fabs(val) > M_PI)
{
float rVal = float(uint32_t(val));
val = -((rVal * val) - 6.2831855);
if (val <= M_PI && val < -M_PI)
val += 6.2831855;
else
val -= 6.2831855;
}
float sq = val * val;
float b = sq * sq;
val = sq + -0.4999803;
val = (b * val) + 0.041620344;
b = b * sq;
val = (b * val) + -0.0013636103;
b = b * sq;
val = (b * val) + 0.000020169435;
return val;
}
float fastSinR(float val)
{
if (fabs(val) > M_PI)
{
float rVal = float(uint32_t(val));
val = -((rVal * val) - 6.2831855);
if (val <= M_PI && val < -M_PI)
val += 6.2831855;
else
val -= 6.2831855;
}
float sq = val * val;
float ret = val * 0.99980587;
val = val * sq;
ret = (val * ret) + -0.16621658;
val = val * sq;
ret = (val * ret) + 0.0080871079;
val = val * sq;
ret = (val * ret) + -0.00015297699;
return ret;
}
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t)
{
if (t <= 0.0f)
return b;
if (t >= 1.0)
return c;
const float t2 = t * t;
const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) +
b * ( 1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * ( 0.5f * t3 - 0.5f * t2));
}
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t)
{
if (t <= 0.0f)
return b;
if (t >= 1.0)
return c;
const float t2 = t * t;
const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) +
b * ( 1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * ( 0.5f * t3 - 0.5f * t2));
}
}

View File

@ -31,8 +31,15 @@ int main()
assert(test3.inside(test));
assert(!test4.inside(test));
std::cout << std::setprecision(16) << (double)Math::fastArcCosR(1.802073) << std::endl;
std::cout << Math::floorPowerOfTwo(256) << std::endl;
std::cout << Math::min(1, 3) << std::endl;
std::cout << Math::min(2, 1) << std::endl;
std::cout << Math::max(1, 3) << std::endl;
std::cout << Math::max(2, 1) << std::endl;
std::cout << Math::clamp(-50, 100, 50) << std::endl;
std::cout << Math::clamp(-50, -100, 50) << std::endl;
std::cout << Math::powF(6.66663489, 2) << std::endl;
std::cout << Math::invSqrtF(1) << std::endl;
std::cout << Math::floorPowerOfTwo(256) << std::endl;
return 0;
}