mirror of https://github.com/AxioDL/zeus.git
More reimplementations
This commit is contained in:
parent
9965f5846d
commit
40ca0c3219
|
@ -7,7 +7,7 @@ endif()
|
|||
include_directories(include ${ATHENA_INCLUDE_DIR})
|
||||
|
||||
if(NOT WIN32)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -std=c++14")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2 -std=c++14")
|
||||
endif()
|
||||
|
||||
add_library(Math
|
||||
|
@ -48,7 +48,8 @@ add_library(Math
|
|||
include/COBBox.hpp
|
||||
include/CLine.hpp
|
||||
include/CSphere.hpp
|
||||
include/CUnitVector.hpp)
|
||||
include/CUnitVector.hpp
|
||||
include/CMRay.hpp)
|
||||
|
||||
add_subdirectory(test)
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "CTransform.hpp"
|
||||
#include "CPlane.hpp"
|
||||
#include "CLine.hpp"
|
||||
#include "CSphere.hpp"
|
||||
#include "Math.hpp"
|
||||
#if ZE_ATHENA_TYPES
|
||||
#include <Athena/IStreamReader.hpp>
|
||||
|
@ -62,16 +63,37 @@ public:
|
|||
}
|
||||
#if ZE_ATHENA_TYPES
|
||||
CAABox(Athena::io::IStreamReader& in) {readBoundingBox(in);}
|
||||
#endif
|
||||
|
||||
inline void readBoundingBox(Athena::io::IStreamReader& in)
|
||||
{
|
||||
m_min[0] = in.readFloat();
|
||||
m_min[1] = in.readFloat();
|
||||
m_min[2] = in.readFloat();
|
||||
m_max[0] = in.readFloat();
|
||||
m_max[1] = in.readFloat();
|
||||
m_max[2] = in.readFloat();
|
||||
m_min = CVector3f(in);
|
||||
m_max = CVector3f(in);
|
||||
}
|
||||
#endif
|
||||
|
||||
float distanceFromPointSquared(const CVector3f& other) const
|
||||
{
|
||||
float dist = 0;
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
if (other[i] < m_min[i])
|
||||
{
|
||||
const float tmp = (m_min[i] - other[i]);
|
||||
dist += tmp * tmp;
|
||||
}
|
||||
else if (other[i] > m_max[i])
|
||||
{
|
||||
const float tmp = (other[i] - m_max[i]);
|
||||
dist += tmp * tmp;
|
||||
}
|
||||
}
|
||||
|
||||
return dist;
|
||||
}
|
||||
|
||||
float distanceFromPoint(const CVector3f &other) const
|
||||
{
|
||||
return Math::sqrtF(distanceFromPointSquared(other));
|
||||
}
|
||||
|
||||
inline bool intersects(const CAABox& other) const
|
||||
|
@ -84,6 +106,10 @@ public:
|
|||
bool z2 = (m_min[2] > other.m_max[2]);
|
||||
return x1 && x2 && y1 && y2 && z1 && z2;
|
||||
}
|
||||
bool intersects(const CSphere& other) const
|
||||
{
|
||||
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
|
||||
}
|
||||
|
||||
inline bool inside(const CAABox& other) const
|
||||
{
|
||||
|
@ -290,6 +316,7 @@ public:
|
|||
negZ.m_min = m_min;
|
||||
}
|
||||
|
||||
|
||||
inline bool invalid() {return (m_max.x < m_min.x || m_max.y < m_min.y || m_max.z < m_min.z);}
|
||||
};
|
||||
|
||||
|
|
|
@ -12,8 +12,8 @@ struct alignas(16) CAxisAngle : CVector3f
|
|||
ZE_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
CAxisAngle() = default;
|
||||
CAxisAngle(const CUnitVector3f& axis, float angle)
|
||||
: CVector3f(axis * angle)
|
||||
CAxisAngle(const CUnitVector3f& axis, float distance)
|
||||
: CVector3f(distance * axis)
|
||||
{}
|
||||
|
||||
CAxisAngle(const CVector3f& axisAngle)
|
||||
|
|
|
@ -17,8 +17,7 @@ public:
|
|||
start = a;
|
||||
if (ab.x != 0.0f || ab.y != 0.0f || ab.z != 0.0f)
|
||||
normal = ab;
|
||||
else
|
||||
normal = CVector3f::skZero;
|
||||
|
||||
end = b;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,18 +10,20 @@ class alignas(16) CSphere
|
|||
public:
|
||||
ZE_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
CSphere(const CVector3f& position, float radius) { vec = position; r = radius; }
|
||||
inline CVector3f getSurfaceNormal(const CVector3f& coord) { return (vec - coord).normalized(); }
|
||||
CSphere(const CVector3f& position, float radius)
|
||||
: position(position), radius(radius) { }
|
||||
|
||||
union
|
||||
inline CVector3f getSurfaceNormal(const CVector3f& coord)
|
||||
{ return (position - coord).normalized(); }
|
||||
|
||||
inline bool intersects(const CSphere& other)
|
||||
{
|
||||
struct { float x, y, z, r; };
|
||||
float s[4];
|
||||
CVector3f vec;
|
||||
#if __SSE__
|
||||
__m128 mVec128;
|
||||
#endif
|
||||
};
|
||||
float dist = (position - other.position).magnitude();
|
||||
return dist < (radius + other.radius);
|
||||
}
|
||||
|
||||
CVector3f position;
|
||||
float radius;
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -222,12 +222,15 @@ class alignas(16) CVector2f
|
|||
}
|
||||
inline float dot(const CVector2f& rhs) const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
|
||||
return result.v[0];
|
||||
#elif __SSE__
|
||||
TVectorUnion result;
|
||||
}
|
||||
#endif
|
||||
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
|
||||
return result.v[0] + result.v[1];
|
||||
#else
|
||||
|
@ -236,12 +239,15 @@ class alignas(16) CVector2f
|
|||
}
|
||||
inline float magSquared() const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
|
||||
return result.v[0];
|
||||
#elif __SSE__
|
||||
TVectorUnion result;
|
||||
}
|
||||
#endif
|
||||
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
|
||||
return result.v[0] + result.v[1];
|
||||
#else
|
||||
|
|
|
@ -60,15 +60,16 @@ public:
|
|||
|
||||
double magSquared() const
|
||||
{
|
||||
/*
|
||||
#if __SSE4_1__
|
||||
TDblVectorUnion result;
|
||||
result.mVec128 = _mm_dp_pd(mVec128, mVec128, 0x71);
|
||||
return result.v[0];
|
||||
#elif __SSE__
|
||||
*/
|
||||
#if __SSE__
|
||||
TDblVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x71);
|
||||
result.mVec128[1] = _mm_dp_pd(mVec128[1], mVec128[1], 0x71);
|
||||
return result.v[0] + result.v[2];
|
||||
}
|
||||
#endif
|
||||
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
|
||||
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
|
@ -83,15 +84,18 @@ public:
|
|||
|
||||
double dot(const CVector3d& rhs) const
|
||||
{
|
||||
/*
|
||||
#if __SSE4_1__
|
||||
TDblVectorUnion result;
|
||||
result.mVec128 = _mm_dp_pd(mVec128, mVec128, 0x71);
|
||||
return result.v[0];
|
||||
#elif __SSE__
|
||||
*/
|
||||
|
||||
#if __SSE__
|
||||
TDblVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x71);
|
||||
result.mVec128[1] = _mm_dp_pd(mVec128[1], rhs.mVec128[1], 0x71);
|
||||
return result.v[0] + result.v[2];
|
||||
}
|
||||
#endif
|
||||
|
||||
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
|
||||
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
|
|
|
@ -203,12 +203,15 @@ public:
|
|||
|
||||
inline float dot(const CVector3f& rhs) const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
#elif __SSE__
|
||||
TVectorUnion result;
|
||||
}
|
||||
#endif
|
||||
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#else
|
||||
|
@ -217,12 +220,16 @@ public:
|
|||
}
|
||||
inline float magSquared() const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
|
||||
return result.v[0];
|
||||
#elif __SSE__
|
||||
TVectorUnion result;
|
||||
}
|
||||
#endif
|
||||
|
||||
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#else
|
||||
|
|
|
@ -240,12 +240,16 @@ class alignas(16) CVector4f
|
|||
|
||||
inline float dot(const CVector4f& rhs) const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
#elif __SSE__
|
||||
TVectorUnion result;
|
||||
}
|
||||
#endif
|
||||
|
||||
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
|
||||
#else
|
||||
|
@ -254,12 +258,15 @@ class alignas(16) CVector4f
|
|||
}
|
||||
inline float magSquared() const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
#if __SSE__
|
||||
TVectorUnion result;
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
|
||||
#if __SSE4_1__ || __SSE4_2__
|
||||
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
|
||||
{
|
||||
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
|
||||
return result.v[0];
|
||||
#elif __SSE__
|
||||
TVectorUnion result;
|
||||
}
|
||||
#endif
|
||||
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
|
||||
return result.v[0] + result.v[1] + result.v[2];
|
||||
#else
|
||||
|
|
|
@ -12,6 +12,25 @@
|
|||
|
||||
namespace Zeus
|
||||
{
|
||||
struct CPUInfo
|
||||
{
|
||||
const char cpuBrand [32] = {0};
|
||||
const char cpuVendor[32] = {0};
|
||||
const bool isIntel = false;
|
||||
const bool SSE1 = false;
|
||||
const bool SSE2 = false;
|
||||
const bool SSE3 = false;
|
||||
const bool SSSE3 = false;
|
||||
const bool SSE41 = false;
|
||||
const bool SSE42 = false;
|
||||
const bool SSE4a = false;
|
||||
const bool AESNI = false;
|
||||
};
|
||||
/**
|
||||
* Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
|
||||
*/
|
||||
void detectCPU();
|
||||
const CPUInfo cpuFeatures();
|
||||
class CVector3f;
|
||||
class CTransform;
|
||||
namespace Math
|
||||
|
|
59
src/Math.cpp
59
src/Math.cpp
|
@ -1,9 +1,67 @@
|
|||
#include "Math.hpp"
|
||||
#include "CTransform.hpp"
|
||||
#include "CVector3f.hpp"
|
||||
#include <cpuid.h>
|
||||
|
||||
namespace Zeus
|
||||
{
|
||||
|
||||
static CPUInfo g_cpuFeatures;
|
||||
|
||||
void getCpuInfo(int level,
|
||||
unsigned int* eax,
|
||||
unsigned int* ebx,
|
||||
unsigned int* ecx,
|
||||
unsigned int* edx)
|
||||
{
|
||||
#if !GEKKO
|
||||
#if _WIN32
|
||||
unsigned int regs[4];
|
||||
__cpuid(regs, level);
|
||||
*eax = regs[0];
|
||||
*ebx = regs[1];
|
||||
*ecx = regs[2];
|
||||
*edx = regs[3];
|
||||
#else
|
||||
__cpuid(level, *eax, *ebx, *ecx, *edx);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void detectCPU()
|
||||
{
|
||||
#if !GEKKO
|
||||
static bool isInit = false;
|
||||
if (isInit)
|
||||
return;
|
||||
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
getCpuInfo(0, &eax, &ebx, &ecx, &edx);
|
||||
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor) = ebx;
|
||||
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 4) = edx;
|
||||
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 8) = ecx;
|
||||
getCpuInfo(0x80000000, &eax, &ebx, &ecx, &edx);
|
||||
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand) = ebx;
|
||||
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand + 4) = edx;
|
||||
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand + 8) = ecx;
|
||||
getCpuInfo(1, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
memset((bool*)&g_cpuFeatures.AESNI, ((ecx & 0x02000000) != 0), 1);
|
||||
memset((bool*)&g_cpuFeatures.SSE1, ((edx & 0x02000000) != 0), 1);
|
||||
memset((bool*)&g_cpuFeatures.SSE2, ((edx & 0x04000000) != 0), 1);
|
||||
memset((bool*)&g_cpuFeatures.SSE3, ((ecx & 0x00000001) != 0), 1);
|
||||
memset((bool*)&g_cpuFeatures.SSSE3, ((ecx & 0x00000200) != 0), 1);
|
||||
memset((bool*)&g_cpuFeatures.SSE41, ((ecx & 0x00080000) != 0), 1);
|
||||
memset((bool*)&g_cpuFeatures.SSE42, ((ecx & 0x00100000) != 0), 1);
|
||||
|
||||
|
||||
isInit = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
const CPUInfo cpuFeatures() { return g_cpuFeatures; }
|
||||
|
||||
namespace Math
|
||||
{
|
||||
const CVector3f kUpVec(0.0, 0.0, 1.0);
|
||||
|
@ -249,4 +307,5 @@ CVector3f radToDeg(const CVector3f& rad) {return rad * kRadToDegVec;}
|
|||
CVector3f degToRad(const CVector3f& deg) {return deg * kDegToRadVec;}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -7,4 +7,4 @@ add_executable(zeustest
|
|||
main.cpp)
|
||||
|
||||
target_link_libraries(zeustest
|
||||
Math)
|
||||
Math GL)
|
||||
|
|
|
@ -13,6 +13,7 @@ union Color
|
|||
|
||||
int main()
|
||||
{
|
||||
Zeus::detectCPU();
|
||||
assert(!CAABox({100, 100, 100}, {100, 100, 100}).invalid());
|
||||
assert(CAABox().invalid());
|
||||
CVector3f vec{320, 632162.f, 800.f};
|
||||
|
@ -37,6 +38,11 @@ int main()
|
|||
assert(test3.inside(test));
|
||||
assert(!test4.inside(test));
|
||||
|
||||
CAABox aabb({-1}, {1});
|
||||
CSphere s1({0}, 1);
|
||||
CSphere s2({1, 0, 0}, 1);
|
||||
CSphere s3({3, 0, 0}, 1);
|
||||
|
||||
std::cout << Math::min(1, 3) << std::endl;
|
||||
std::cout << Math::min(2, 1) << std::endl;
|
||||
std::cout << Math::max(1, 3) << std::endl;
|
||||
|
@ -46,6 +52,9 @@ int main()
|
|||
std::cout << Math::powF(6.66663489, 2) << std::endl;
|
||||
std::cout << Math::invSqrtF(1) << std::endl;
|
||||
std::cout << Math::floorPowerOfTwo(256) << std::endl;
|
||||
std::cout << " Test 1 " << ( aabb.intersects(s1) ? "succeeded" : "failed" ) << std::endl;
|
||||
std::cout << " Test 2 " << ( aabb.intersects(s2) ? "succeeded" : "failed" ) << std::endl;
|
||||
std::cout << " Test 3 " << ( aabb.intersects(s3) ? "succeeded" : "failed" ) << std::endl;
|
||||
CLine line({-89.120926, 59.328712, 3.265882}, CUnitVector3f({-90.120926, 59.328712, 3.265882}));
|
||||
|
||||
CColor ctest1;
|
||||
|
|
Loading…
Reference in New Issue