Add AVX intrinsics for CVector3d

This commit is contained in:
Jack Andersen 2017-12-18 17:02:59 -10:00
parent b29b181570
commit 692dc1adfb
6 changed files with 91 additions and 19 deletions

View File

@ -333,7 +333,12 @@ public:
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
inline bool canBeNormalized() const { return !isNormalized(); }
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON;
}
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; }

View File

@ -1,6 +1,7 @@
#ifndef CVECTOR3D_HPP
#define CVECTOR3D_HPP
#include <athena/Types.hpp>
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
@ -8,12 +9,19 @@
namespace zeus
{
class alignas(16) CVector3d
class alignas(32) CVector3d
{
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
ZE_DECLARE_ALIGNED_ALLOCATOR32();
CVector3d() { zeroOut(); }
#if __SSE__
#if __AVX__
CVector3d(const __m256d& mVec256)
{
this->mVec256 = mVec256;
v[3] = 0.0;
}
#elif __SSE__
CVector3d(const __m128d mVec128[2])
{
this->mVec128[0] = mVec128[0];
@ -24,7 +32,9 @@ public:
#if ZE_ATHENA_TYPES
CVector3d(const atVec3d& vec)
{
#if __SSE__
#if __AVX__
mVec256 = vec.mVec256;
#elif __SSE__
mVec128[0] = vec.mVec128[0];
mVec128[1] = vec.mVec128[1];
#else
@ -37,20 +47,25 @@ public:
CVector3d(const CVector3f& vec)
{
#if __SSE__
#if __AVX__
mVec256 = _mm256_cvtps_pd(vec.mVec128);
#elif __SSE__
mVec128[0] = _mm_cvtps_pd(vec.mVec128);
v[2] = vec[2];
v[3] = 0.0;
#else
v[0] = vec[0];
v[1] = vec[1];
v[2] = vec[2];
v[3] = 0.0;
#endif
}
CVector3d(double x, double y, double z)
{
#if __SSE__
#if __AVX__
TDblVectorUnion splat{x, y, z, 0.0};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat{x, y, z, 0.0};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
@ -58,12 +73,17 @@ public:
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = 0.0;
#endif
}
CVector3f asCVector3f()
{
#if __AVX__
return CVector3f(_mm256_cvtpd_ps(mVec256));
#else
return CVector3f(float(x), float(y), float(z));
#endif
}
double magSquared() const
@ -115,7 +135,10 @@ public:
void splat(double xyz)
{
#if __SSE__
#if __AVX__
TDblVectorUnion splat = {xyz, xyz, xyz, 0.0};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat = {xyz, xyz, xyz, 0.0};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
@ -134,7 +157,9 @@ public:
inline CVector3d operator+(const CVector3d& rhs) const
{
#if __SSE__
#if __AVX__
return _mm256_add_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]),
_mm_add_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
@ -144,7 +169,9 @@ public:
}
inline CVector3d operator-(const CVector3d& rhs) const
{
#if __SSE__
#if __AVX__
return _mm256_sub_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_sub_pd(mVec128[0], rhs.mVec128[0]),
_mm_sub_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
@ -154,7 +181,9 @@ public:
}
inline CVector3d operator*(const CVector3d& rhs) const
{
#if __SSE__
#if __AVX__
return _mm256_mul_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_mul_pd(mVec128[0], rhs.mVec128[0]),
_mm_mul_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
@ -164,7 +193,9 @@ public:
}
inline CVector3d operator/(const CVector3d& rhs) const
{
#if __SSE__
#if __AVX__
return _mm256_div_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_div_pd(mVec128[0], rhs.mVec128[0]),
_mm_div_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
@ -182,6 +213,9 @@ public:
double x, y, z;
};
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif
@ -192,7 +226,10 @@ public:
static inline CVector3d operator+(double lhs, const CVector3d& rhs)
{
#if __SSE__
#if __AVX__
TDblVectorUnion splat{lhs, lhs, lhs, 0};
return _mm256_add_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{lhs, lhs, lhs, 0};
splat.mVec128[0] = _mm_add_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_add_pd(splat.mVec128[1], rhs.mVec128[1]);
@ -204,7 +241,10 @@ static inline CVector3d operator+(double lhs, const CVector3d& rhs)
static inline CVector3d operator-(double lhs, const CVector3d& rhs)
{
#if __SSE__
#if __AVX__
TDblVectorUnion splat{lhs, lhs, lhs, 0};
return _mm256_sub_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{lhs, lhs, lhs, 0};
splat.mVec128[0] = _mm_sub_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_sub_pd(splat.mVec128[1], rhs.mVec128[1]);
@ -216,7 +256,10 @@ static inline CVector3d operator-(double lhs, const CVector3d& rhs)
static inline CVector3d operator*(double lhs, const CVector3d& rhs)
{
#if __SSE__
#if __AVX__
TDblVectorUnion splat{lhs, lhs, lhs, 0};
return _mm256_mul_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{lhs, lhs, lhs, 0};
splat.mVec128[0] = _mm_mul_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_mul_pd(splat.mVec128[1], rhs.mVec128[1]);
@ -228,7 +271,10 @@ static inline CVector3d operator*(double lhs, const CVector3d& rhs)
static inline CVector3d operator/(double lhs, const CVector3d& rhs)
{
#if __SSE__
#if __AVX__
TDblVectorUnion splat{lhs, lhs, lhs, 0};
return _mm256_div_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{lhs, lhs, lhs, 0};
splat.mVec128[0] = _mm_div_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_div_pd(splat.mVec128[1], rhs.mVec128[1]);

View File

@ -338,7 +338,9 @@ public:
inline bool canBeNormalized() const
{
return (x < FLT_EPSILON || y < FLT_EPSILON || z < FLT_EPSILON);
if (std::isinf(x) || std::isinf(y) || std::isinf(z))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON;
}
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; }

View File

@ -363,7 +363,12 @@ public:
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
inline bool canBeNormalized() const { return !isNormalized(); }
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y) || std::isinf(z) || std::isinf(w))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON || std::fabs(w) >= FLT_EPSILON;
}
inline bool isEqu(const CVector4f& other, float epsilon = 1.1920929e-7f)
{

View File

@ -29,8 +29,19 @@
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#else
#define ZE_DECLARE_ALIGNED_ALLOCATOR() void __unused__()
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() void __unused__()
#endif
#if __SSE__

View File

@ -12,6 +12,9 @@ typedef union {
typedef union {
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif