More reimplementations

This commit is contained in:
Phillip Stephens 2015-11-02 10:44:46 -08:00
parent 9965f5846d
commit 40ca0c3219
13 changed files with 208 additions and 68 deletions

View File

@ -7,7 +7,7 @@ endif()
include_directories(include ${ATHENA_INCLUDE_DIR})
if(NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -std=c++14")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2 -std=c++14")
endif()
add_library(Math
@ -48,7 +48,8 @@ add_library(Math
include/COBBox.hpp
include/CLine.hpp
include/CSphere.hpp
include/CUnitVector.hpp)
include/CUnitVector.hpp
include/CMRay.hpp)
add_subdirectory(test)

View File

@ -6,6 +6,7 @@
#include "CTransform.hpp"
#include "CPlane.hpp"
#include "CLine.hpp"
#include "CSphere.hpp"
#include "Math.hpp"
#if ZE_ATHENA_TYPES
#include <Athena/IStreamReader.hpp>
@ -62,16 +63,37 @@ public:
}
#if ZE_ATHENA_TYPES
CAABox(Athena::io::IStreamReader& in) {readBoundingBox(in);}
#endif
inline void readBoundingBox(Athena::io::IStreamReader& in)
{
m_min[0] = in.readFloat();
m_min[1] = in.readFloat();
m_min[2] = in.readFloat();
m_max[0] = in.readFloat();
m_max[1] = in.readFloat();
m_max[2] = in.readFloat();
m_min = CVector3f(in);
m_max = CVector3f(in);
}
#endif
float distanceFromPointSquared(const CVector3f& other) const
{
float dist = 0;
for (int i = 0; i < 3; i++)
{
if (other[i] < m_min[i])
{
const float tmp = (m_min[i] - other[i]);
dist += tmp * tmp;
}
else if (other[i] > m_max[i])
{
const float tmp = (other[i] - m_max[i]);
dist += tmp * tmp;
}
}
return dist;
}
float distanceFromPoint(const CVector3f &other) const
{
return Math::sqrtF(distanceFromPointSquared(other));
}
inline bool intersects(const CAABox& other) const
@ -84,6 +106,10 @@ public:
bool z2 = (m_min[2] > other.m_max[2]);
return x1 && x2 && y1 && y2 && z1 && z2;
}
bool intersects(const CSphere& other) const
{
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
}
inline bool inside(const CAABox& other) const
{
@ -290,6 +316,7 @@ public:
negZ.m_min = m_min;
}
inline bool invalid() {return (m_max.x < m_min.x || m_max.y < m_min.y || m_max.z < m_min.z);}
};

View File

@ -12,8 +12,8 @@ struct alignas(16) CAxisAngle : CVector3f
ZE_DECLARE_ALIGNED_ALLOCATOR();
CAxisAngle() = default;
CAxisAngle(const CUnitVector3f& axis, float angle)
: CVector3f(axis * angle)
CAxisAngle(const CUnitVector3f& axis, float distance)
: CVector3f(distance * axis)
{}
CAxisAngle(const CVector3f& axisAngle)

View File

@ -17,8 +17,7 @@ public:
start = a;
if (ab.x != 0.0f || ab.y != 0.0f || ab.z != 0.0f)
normal = ab;
else
normal = CVector3f::skZero;
end = b;
}

View File

@ -10,18 +10,20 @@ class alignas(16) CSphere
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CSphere(const CVector3f& position, float radius) { vec = position; r = radius; }
inline CVector3f getSurfaceNormal(const CVector3f& coord) { return (vec - coord).normalized(); }
CSphere(const CVector3f& position, float radius)
: position(position), radius(radius) { }
union
inline CVector3f getSurfaceNormal(const CVector3f& coord)
{ return (position - coord).normalized(); }
inline bool intersects(const CSphere& other)
{
struct { float x, y, z, r; };
float s[4];
CVector3f vec;
#if __SSE__
__m128 mVec128;
#endif
};
float dist = (position - other.position).magnitude();
return dist < (radius + other.radius);
}
CVector3f position;
float radius;
};
}

View File

@ -222,12 +222,15 @@ class alignas(16) CVector2f
}
inline float dot(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
TVectorUnion result;
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0];
#elif __SSE__
TVectorUnion result;
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0];
}
#endif
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1];
#else
@ -236,12 +239,15 @@ class alignas(16) CVector2f
}
inline float magSquared() const
{
#if __SSE4_1__
TVectorUnion result;
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0];
#elif __SSE__
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0];
}
#endif
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1];
#else

View File

@ -60,15 +60,16 @@ public:
double magSquared() const
{
/*
#if __SSE4_1__
TDblVectorUnion result;
result.mVec128 = _mm_dp_pd(mVec128, mVec128, 0x71);
return result.v[0];
#elif __SSE__
*/
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x71);
result.mVec128[1] = _mm_dp_pd(mVec128[1], mVec128[1], 0x71);
return result.v[0] + result.v[2];
}
#endif
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
@ -83,15 +84,18 @@ public:
double dot(const CVector3d& rhs) const
{
/*
#if __SSE4_1__
TDblVectorUnion result;
result.mVec128 = _mm_dp_pd(mVec128, mVec128, 0x71);
return result.v[0];
#elif __SSE__
*/
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x71);
result.mVec128[1] = _mm_dp_pd(mVec128[1], rhs.mVec128[1], 0x71);
return result.v[0] + result.v[2];
}
#endif
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];

View File

@ -203,12 +203,15 @@ public:
inline float dot(const CVector3f& rhs) const
{
#if __SSE4_1__
TVectorUnion result;
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#elif __SSE__
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
}
#endif
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#else
@ -217,12 +220,16 @@ public:
}
inline float magSquared() const
{
#if __SSE4_1__
TVectorUnion result;
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
#elif __SSE__
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
}
#endif
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#else

View File

@ -240,12 +240,16 @@ class alignas(16) CVector4f
inline float dot(const CVector4f& rhs) const
{
#if __SSE4_1__
TVectorUnion result;
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#elif __SSE__
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
}
#endif
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#else
@ -254,12 +258,15 @@ class alignas(16) CVector4f
}
inline float magSquared() const
{
#if __SSE4_1__
TVectorUnion result;
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
#elif __SSE__
#if __SSE__
TVectorUnion result;
#if __SSE4_1__ || __SSE4_2__
if (cpuFeatures().SSE41 || cpuFeatures().SSE42)
{
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
}
#endif
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#else

View File

@ -12,6 +12,25 @@
namespace Zeus
{
struct CPUInfo
{
const char cpuBrand [32] = {0};
const char cpuVendor[32] = {0};
const bool isIntel = false;
const bool SSE1 = false;
const bool SSE2 = false;
const bool SSE3 = false;
const bool SSSE3 = false;
const bool SSE41 = false;
const bool SSE42 = false;
const bool SSE4a = false;
const bool AESNI = false;
};
/**
* Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
*/
void detectCPU();
const CPUInfo cpuFeatures();
class CVector3f;
class CTransform;
namespace Math

View File

@ -1,9 +1,67 @@
#include "Math.hpp"
#include "CTransform.hpp"
#include "CVector3f.hpp"
#include <cpuid.h>
namespace Zeus
{
static CPUInfo g_cpuFeatures;
void getCpuInfo(int level,
unsigned int* eax,
unsigned int* ebx,
unsigned int* ecx,
unsigned int* edx)
{
#if !GEKKO
#if _WIN32
unsigned int regs[4];
__cpuid(regs, level);
*eax = regs[0];
*ebx = regs[1];
*ecx = regs[2];
*edx = regs[3];
#else
__cpuid(level, *eax, *ebx, *ecx, *edx);
#endif
#endif
}
void detectCPU()
{
#if !GEKKO
static bool isInit = false;
if (isInit)
return;
unsigned int eax, ebx, ecx, edx;
getCpuInfo(0, &eax, &ebx, &ecx, &edx);
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor) = ebx;
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 4) = edx;
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 8) = ecx;
getCpuInfo(0x80000000, &eax, &ebx, &ecx, &edx);
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand) = ebx;
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand + 4) = edx;
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuBrand + 8) = ecx;
getCpuInfo(1, &eax, &ebx, &ecx, &edx);
memset((bool*)&g_cpuFeatures.AESNI, ((ecx & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE1, ((edx & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE2, ((edx & 0x04000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE3, ((ecx & 0x00000001) != 0), 1);
memset((bool*)&g_cpuFeatures.SSSE3, ((ecx & 0x00000200) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE41, ((ecx & 0x00080000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE42, ((ecx & 0x00100000) != 0), 1);
isInit = true;
#endif
}
const CPUInfo cpuFeatures() { return g_cpuFeatures; }
namespace Math
{
const CVector3f kUpVec(0.0, 0.0, 1.0);
@ -249,4 +307,5 @@ CVector3f radToDeg(const CVector3f& rad) {return rad * kRadToDegVec;}
CVector3f degToRad(const CVector3f& deg) {return deg * kDegToRadVec;}
}
}

View File

@ -7,4 +7,4 @@ add_executable(zeustest
main.cpp)
target_link_libraries(zeustest
Math)
Math GL)

View File

@ -13,6 +13,7 @@ union Color
int main()
{
Zeus::detectCPU();
assert(!CAABox({100, 100, 100}, {100, 100, 100}).invalid());
assert(CAABox().invalid());
CVector3f vec{320, 632162.f, 800.f};
@ -37,6 +38,11 @@ int main()
assert(test3.inside(test));
assert(!test4.inside(test));
CAABox aabb({-1}, {1});
CSphere s1({0}, 1);
CSphere s2({1, 0, 0}, 1);
CSphere s3({3, 0, 0}, 1);
std::cout << Math::min(1, 3) << std::endl;
std::cout << Math::min(2, 1) << std::endl;
std::cout << Math::max(1, 3) << std::endl;
@ -46,6 +52,9 @@ int main()
std::cout << Math::powF(6.66663489, 2) << std::endl;
std::cout << Math::invSqrtF(1) << std::endl;
std::cout << Math::floorPowerOfTwo(256) << std::endl;
std::cout << " Test 1 " << ( aabb.intersects(s1) ? "succeeded" : "failed" ) << std::endl;
std::cout << " Test 2 " << ( aabb.intersects(s2) ? "succeeded" : "failed" ) << std::endl;
std::cout << " Test 3 " << ( aabb.intersects(s3) ? "succeeded" : "failed" ) << std::endl;
CLine line({-89.120926, 59.328712, 3.265882}, CUnitVector3f({-90.120926, 59.328712, 3.265882}));
CColor ctest1;