SIMD refactor

This commit is contained in:
Jack Andersen 2018-12-07 15:16:50 -10:00
parent d881e58f62
commit e8dfecbb6e
49 changed files with 6047 additions and 4721 deletions

View File

@ -1,5 +1,5 @@
--- ---
IndentWidth: 4 IndentWidth: 2
ColumnLimit: 128 ColumnLimit: 128
UseTab: Never UseTab: Never
--- ---

View File

@ -40,7 +40,6 @@ add_library(zeus
include/zeus/CColor.hpp include/zeus/CColor.hpp
include/zeus/Global.hpp include/zeus/Global.hpp
include/zeus/zeus.hpp include/zeus/zeus.hpp
include/zeus/TVectorUnion.hpp
include/zeus/CVector2i.hpp include/zeus/CVector2i.hpp
include/zeus/CVector2f.hpp include/zeus/CVector2f.hpp
include/zeus/CVector3f.hpp include/zeus/CVector3f.hpp
@ -56,7 +55,11 @@ add_library(zeus
include/zeus/CSphere.hpp include/zeus/CSphere.hpp
include/zeus/CUnitVector.hpp include/zeus/CUnitVector.hpp
include/zeus/CMRay.hpp include/zeus/CMRay.hpp
include/zeus/CEulerAngles.hpp) include/zeus/CEulerAngles.hpp
include/zeus/simd/simd.hpp
include/zeus/simd/simd_sse.hpp
include/zeus/simd/simd_avx.hpp
include/zeus/simd/parallelism_v2_simd.hpp)
add_subdirectory(test) add_subdirectory(test)

View File

@ -6,19 +6,17 @@
#include "zeus/CLineSeg.hpp" #include "zeus/CLineSeg.hpp"
#include "zeus/CSphere.hpp" #include "zeus/CSphere.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp> #include <athena/IStreamReader.hpp>
#endif #endif
namespace zeus namespace zeus {
{ class CAABox {
class alignas(16) CAABox
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); enum class EBoxEdgeId {
enum class EBoxEdgeId
{
Z0, Z0,
X0, X0,
Z1, Z1,
@ -33,8 +31,7 @@ public:
Y3 Y3
}; };
enum class EBoxFaceID enum class EBoxFaceID {
{
}; };
static const CAABox skInvertedBox; static const CAABox skInvertedBox;
@ -51,17 +48,17 @@ public:
CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {} CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {}
CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ) CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ) : min(minX, minY, minZ), max(maxX, maxY, maxZ) {
{
} }
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
inline void readBoundingBoxBig(athena::io::IStreamReader& in)
{ void readBoundingBoxBig(athena::io::IStreamReader& in) {
min.readBig(in); min.readBig(in);
max.readBig(in); max.readBig(in);
} }
static inline CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in)
{ static CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in) {
CAABox ret; CAABox ret;
ret.readBoundingBoxBig(in); ret.readBoundingBoxBig(in);
return ret; return ret;
@ -69,18 +66,13 @@ public:
#endif #endif
float distanceFromPointSquared(const CVector3f& other) const float distanceFromPointSquared(const CVector3f& other) const {
{
float dist = 0; float dist = 0;
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++) {
{ if (other[i] < min[i]) {
if (other[i] < min[i])
{
const float tmp = (min[i] - other[i]); const float tmp = (min[i] - other[i]);
dist += tmp * tmp; dist += tmp * tmp;
} } else if (other[i] > max[i]) {
else if (other[i] > max[i])
{
const float tmp = (other[i] - max[i]); const float tmp = (other[i] - max[i]);
dist += tmp * tmp; dist += tmp * tmp;
} }
@ -91,8 +83,7 @@ public:
float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); } float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); }
inline bool intersects(const CAABox& other) const bool intersects(const CAABox& other) const {
{
bool x1 = (max[0] >= other.min[0]); bool x1 = (max[0] >= other.min[0]);
bool x2 = (min[0] <= other.max[0]); bool x2 = (min[0] <= other.max[0]);
bool y1 = (max[1] >= other.min[1]); bool y1 = (max[1] >= other.min[1]);
@ -102,41 +93,30 @@ public:
return x1 && x2 && y1 && y2 && z1 && z2; return x1 && x2 && y1 && y2 && z1 && z2;
} }
bool intersects(const CSphere& other) const bool intersects(const CSphere& other) const {
{
return distanceFromPointSquared(other.position) <= other.radius * other.radius; return distanceFromPointSquared(other.position) <= other.radius * other.radius;
} }
float intersectionRadius(const CSphere& other) const float intersectionRadius(const CSphere& other) const {
{
float dist = distanceFromPoint(other.position); float dist = distanceFromPoint(other.position);
return (dist < other.radius) ? dist : -1.f; return (dist < other.radius) ? dist : -1.f;
} }
inline CAABox booleanIntersection(const CAABox& other) const CAABox booleanIntersection(const CAABox& other) const {
{
CVector3f minVec = CVector3f::skZero; CVector3f minVec = CVector3f::skZero;
CVector3f maxVec = CVector3f::skZero; CVector3f maxVec = CVector3f::skZero;
for (int i = 0; i < 3; ++i) for (int i = 0; i < 3; ++i) {
{ if (min[i] <= other.min[i] && max[i] >= other.max[i]) {
if (min[i] <= other.min[i] && max[i] >= other.max[i])
{
minVec[i] = other.min[i]; minVec[i] = other.min[i];
maxVec[i] = other.max[i]; maxVec[i] = other.max[i];
} } else if (other.min[i] <= min[i] && other.max[i] >= max[i]) {
else if (other.min[i] <= min[i] && other.max[i] >= max[i])
{
minVec[i] = min[i]; minVec[i] = min[i];
maxVec[i] = max[i]; maxVec[i] = max[i];
} } else if (other.min[i] <= min[i] && other.max[i] >= min[i]) {
else if (other.min[i] <= min[i] && other.max[i] >= min[i])
{
minVec[i] = min[i]; minVec[i] = min[i];
maxVec[i] = other.max[i]; maxVec[i] = other.max[i];
} } else if (other.min[i] <= max[i] && other.max[i] >= max[i]) {
else if (other.min[i] <= max[i] && other.max[i] >= max[i])
{
minVec[i] = other.min[i]; minVec[i] = other.min[i];
maxVec[i] = max[i]; maxVec[i] = max[i];
} }
@ -145,75 +125,73 @@ public:
return {minVec, maxVec}; return {minVec, maxVec};
} }
inline bool inside(const CAABox& other) const bool inside(const CAABox& other) const {
{
bool x = min[0] >= other.min[0] && max[0] <= other.max[0]; bool x = min[0] >= other.min[0] && max[0] <= other.max[0];
bool y = min[1] >= other.min[1] && max[1] <= other.max[1]; bool y = min[1] >= other.min[1] && max[1] <= other.max[1];
bool z = min[2] >= other.min[2] && max[2] <= other.max[2]; bool z = min[2] >= other.min[2] && max[2] <= other.max[2];
return x && y && z; return x && y && z;
} }
inline bool insidePlane(const CPlane& plane) const bool insidePlane(const CPlane& plane) const {
{
CVector3f vmax; CVector3f vmax;
/* X axis */ /* X axis */
if (plane.a >= 0) if (plane.x() >= 0.f)
vmax[0] = max[0]; vmax[0] = max[0];
else else
vmax[0] = min[0]; vmax[0] = min[0];
/* Y axis */ /* Y axis */
if (plane.b >= 0) if (plane.y() >= 0.f)
vmax[1] = max[1]; vmax[1] = max[1];
else else
vmax[1] = min[1]; vmax[1] = min[1];
/* Z axis */ /* Z axis */
if (plane.c >= 0) if (plane.z() >= 0.f)
vmax[2] = max[2]; vmax[2] = max[2];
else else
vmax[2] = min[2]; vmax[2] = min[2];
return plane.vec.dot(vmax) + plane.d >= 0.f; return plane.normal().dot(vmax) + plane.d() >= 0.f;
} }
CVector3f center() const { return (min + max) * 0.5f; } CVector3f center() const { return (min + max) * 0.5f; }
CVector3f extents() const { return (max - min) * 0.5f; } CVector3f extents() const { return (max - min) * 0.5f; }
float volume() const { return (max.x - min.x) * (max.y - min.y) * (max.z - min.z); } float volume() const {
auto delta = max - min;
return delta.x() * delta.y() * delta.z();
}
inline CLineSeg getEdge(EBoxEdgeId id) const CLineSeg getEdge(EBoxEdgeId id) const {
{ switch (id) {
switch (id)
{
case EBoxEdgeId::Z0: case EBoxEdgeId::Z0:
default: default:
return CLineSeg({min.x, min.y, max.z}, {min.x, min.y, min.z}); return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), min.y(), min.z()});
case EBoxEdgeId::X0: case EBoxEdgeId::X0:
return CLineSeg({min.x, min.y, min.z}, {max.x, min.y, min.z}); return CLineSeg({min.x(), min.y(), min.z()}, {max.x(), min.y(), min.z()});
case EBoxEdgeId::Z1: case EBoxEdgeId::Z1:
return CLineSeg({max.x, min.y, min.z}, {max.x, min.y, max.z}); return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), min.y(), max.z()});
case EBoxEdgeId::X1: case EBoxEdgeId::X1:
return CLineSeg({max.x, min.y, max.z}, {min.x, min.y, max.z}); return CLineSeg({max.x(), min.y(), max.z()}, {min.x(), min.y(), max.z()});
case EBoxEdgeId::Z2: case EBoxEdgeId::Z2:
return CLineSeg({max.x, max.y, max.z}, {max.x, max.y, min.z}); return CLineSeg({max.x(), max.y(), max.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::X2: case EBoxEdgeId::X2:
return CLineSeg({max.x, max.y, min.z}, {min.x, max.y, min.z}); return CLineSeg({max.x(), max.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Z3: case EBoxEdgeId::Z3:
return CLineSeg({min.x, max.y, min.z}, {min.x, max.y, max.z}); return CLineSeg({min.x(), max.y(), min.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::X3: case EBoxEdgeId::X3:
return CLineSeg({min.x, max.y, max.z}, {max.x, max.y, max.z}); return CLineSeg({min.x(), max.y(), max.z()}, {max.x(), max.y(), max.z()});
case EBoxEdgeId::Y0: case EBoxEdgeId::Y0:
return CLineSeg({min.x, min.y, max.z}, {min.x, max.y, max.z}); return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::Y1: case EBoxEdgeId::Y1:
return CLineSeg({min.x, min.y, min.z}, {min.x, max.y, min.z}); return CLineSeg({min.x(), min.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Y2: case EBoxEdgeId::Y2:
return CLineSeg({max.x, min.y, min.z}, {max.x, max.y, min.z}); return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::Y3: case EBoxEdgeId::Y3:
return CLineSeg({max.x, min.y, max.z}, {max.x, max.y, max.z}); return CLineSeg({max.x(), min.y(), max.z()}, {max.x(), max.y(), max.z()});
} }
} }
inline CAABox getTransformedAABox(const CTransform& xfrm) const CAABox getTransformedAABox(const CTransform& xfrm) const {
{
CAABox box; CAABox box;
CVector3f point = xfrm * getPoint(0); CVector3f point = xfrm * getPoint(0);
box.accumulateBounds(point); box.accumulateBounds(point);
@ -234,97 +212,81 @@ public:
return box; return box;
} }
inline void accumulateBounds(const CVector3f& point) void accumulateBounds(const CVector3f& point) {
{ if (min.x() > point.x())
if (min.x > point.x) min.x() = point.x();
min.x = point.x; if (min.y() > point.y())
if (min.y > point.y) min.y() = point.y();
min.y = point.y; if (min.z() > point.z())
if (min.z > point.z) min.z() = point.z();
min.z = point.z; if (max.x() < point.x())
if (max.x < point.x) max.x() = point.x();
max.x = point.x; if (max.y() < point.y())
if (max.y < point.y) max.y() = point.y();
max.y = point.y; if (max.z() < point.z())
if (max.z < point.z) max.z() = point.z();
max.z = point.z;
} }
inline void accumulateBounds(const CAABox& other) void accumulateBounds(const CAABox& other) {
{
accumulateBounds(other.min); accumulateBounds(other.min);
accumulateBounds(other.max); accumulateBounds(other.max);
} }
inline bool pointInside(const CVector3f& other) const bool pointInside(const CVector3f& other) const {
{ return (min.x() <= other.x() && other.x() <= max.x() &&
return (min.x <= other.x && other.x <= max.x && min.y() <= other.y() && other.y() <= max.y() &&
min.y <= other.y && other.y <= max.y && min.z() <= other.z() && other.z() <= max.z());
min.z <= other.z && other.z <= max.z);
} }
inline CVector3f closestPointAlongVector(const CVector3f& other) const CVector3f closestPointAlongVector(const CVector3f& other) const {
{ return {(other.x() >= 0.f ? min.x() : max.x()),
return {(other.x >= 0.f ? min.x : max.x), (other.y() >= 0.f ? min.y() : max.y()),
(other.y >= 0.f ? min.y : max.y), (other.z() >= 0.f ? min.z() : max.z())};
(other.z >= 0.f ? min.z : max.z)};
} }
inline CVector3f furthestPointAlongVector(const CVector3f& other) const CVector3f furthestPointAlongVector(const CVector3f& other) const {
{ return {(other.x() >= 0.f ? max.x() : min.x()),
return {(other.x >= 0.f ? max.x : min.x), (other.y() >= 0.f ? max.y() : min.y()),
(other.y >= 0.f ? max.y : min.y), (other.z() >= 0.f ? max.z() : min.z())};
(other.z >= 0.f ? max.z : min.z)};
} }
inline float distanceBetween(const CAABox& other) float distanceBetween(const CAABox& other) {
{
int intersects = 0; int intersects = 0;
if (max.x >= other.min.x && min.x <= other.max.x) if (max.x() >= other.min.x() && min.x() <= other.max.x())
intersects |= 0x1; intersects |= 0x1;
if (max.y >= other.min.y && min.y <= other.max.y) if (max.y() >= other.min.y() && min.y() <= other.max.y())
intersects |= 0x2; intersects |= 0x2;
if (max.z >= other.min.z && min.z <= other.max.z) if (max.z() >= other.min.z() && min.z() <= other.max.z())
intersects |= 0x4; intersects |= 0x4;
float minX, maxX; float minX, maxX;
if (max.x < other.min.x) if (max.x() < other.min.x()) {
{ minX = max.x();
minX = max.x; maxX = other.min.x();
maxX = other.min.x; } else {
} minX = min.x();
else maxX = other.max.x();
{
minX = min.x;
maxX = other.max.x;
} }
float minY, maxY; float minY, maxY;
if (max.y < other.min.y) if (max.y() < other.min.y()) {
{ minY = max.y();
minY = max.y; maxY = other.min.y();
maxY = other.min.y; } else {
} minY = min.y();
else maxY = other.max.y();
{
minY = min.y;
maxY = other.max.y;
} }
float minZ, maxZ; float minZ, maxZ;
if (max.z < other.min.z) if (max.z() < other.min.z()) {
{ minZ = max.z();
minZ = max.z; maxZ = other.min.z();
maxZ = other.min.z; } else {
} minZ = min.z();
else maxZ = other.max.z();
{
minZ = min.z;
maxZ = other.max.z;
} }
switch (intersects) switch (intersects) {
{
case 0: case 0:
return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude(); return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude();
case 1: case 1:
@ -345,72 +307,65 @@ public:
} }
} }
inline CVector3f getPoint(const int point) const CVector3f getPoint(const int point) const {
{
const CVector3f* vecs = &min; const CVector3f* vecs = &min;
return CVector3f(vecs[(point & 1) != 0].x, vecs[(point & 2) != 0].y, vecs[(point & 4) != 0].z); return CVector3f(vecs[(point & 1) != 0].x(), vecs[(point & 2) != 0].y(), vecs[(point & 4) != 0].z());
} }
inline CVector3f clampToBox(const CVector3f& vec) CVector3f clampToBox(const CVector3f& vec) const {
{
CVector3f ret = vec; CVector3f ret = vec;
clamp(min.x, ret.x, max.x); ret.x() = clamp(min.x(), float(ret.x()), max.x());
clamp(min.y, ret.y, max.y); ret.y() = clamp(min.y(), float(ret.y()), max.y());
clamp(min.z, ret.z, max.z); ret.z() = clamp(min.z(), float(ret.z()), max.z());
return ret; return ret;
} }
inline void splitX(CAABox& negX, CAABox& posX) const void splitX(CAABox& negX, CAABox& posX) const {
{ float midX = (max.x() - min.x()) * .5f + min.x();
float midX = (max.x - min.x) * .5f + min.x;
posX.max = max; posX.max = max;
posX.min = min; posX.min = min;
posX.min.x = midX; posX.min.x() = midX;
negX.max = max; negX.max = max;
negX.max.x = midX; negX.max.x() = midX;
negX.min = min; negX.min = min;
} }
inline void splitY(CAABox& negY, CAABox& posY) const void splitY(CAABox& negY, CAABox& posY) const {
{ float midY = (max.y() - min.y()) * .5f + min.y();
float midY = (max.y - min.y) * .5f + min.y;
posY.max = max; posY.max = max;
posY.min = min; posY.min = min;
posY.min.y = midY; posY.min.y() = midY;
negY.max = max; negY.max = max;
negY.max.y = midY; negY.max.y() = midY;
negY.min = min; negY.min = min;
} }
inline void splitZ(CAABox& negZ, CAABox& posZ) const void splitZ(CAABox& negZ, CAABox& posZ) const {
{ float midZ = (max.z() - min.z()) * .5f + min.z();
float midZ = (max.z - min.z) * .5f + min.z;
posZ.max = max; posZ.max = max;
posZ.min = min; posZ.min = min;
posZ.min.z = midZ; posZ.min.z() = midZ;
negZ.max = max; negZ.max = max;
negZ.max.z = midZ; negZ.max.z() = midZ;
negZ.min = min; negZ.min = min;
} }
inline bool invalid() { return (max.x < min.x || max.y < min.y || max.z < min.z); } bool invalid() { return (max.x() < min.x() || max.y() < min.y() || max.z() < min.z()); }
inline float operator[](size_t idx) const float operator[](size_t idx) const {
{
assert(idx < 6); assert(idx < 6);
if (idx < 3) if (idx < 3)
return min[idx]; return min[idx];
else else
return max[idx-3]; return max[idx - 3];
} }
}; };
inline bool operator==(const CAABox& left, const CAABox& right) inline bool operator==(const CAABox& left, const CAABox& right) {
{
return (left.min == right.min && left.max == right.max); return (left.min == right.min && left.max == right.max);
} }
inline bool operator!=(const CAABox& left, const CAABox& right)
{ inline bool operator!=(const CAABox& left, const CAABox& right) {
return (left.min != right.min || left.max != right.max); return (left.min != right.min || left.max != right.max);
} }
} }

View File

@ -4,21 +4,14 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#include "CUnitVector.hpp" #include "CUnitVector.hpp"
namespace zeus namespace zeus {
{ struct CAxisAngle : CVector3f {
struct alignas(16) CAxisAngle : CVector3f
{
ZE_DECLARE_ALIGNED_ALLOCATOR();
CAxisAngle() = default; CAxisAngle() = default;
CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {} CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {}
CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {} CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {}
CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {} CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {}
float angle() const { return magnitude(); } float angle() const { return magnitude(); }
const CVector3f& getVector() const { return *this; } const CVector3f& getVector() const { return *this; }
static const CAxisAngle sIdentity; static const CAxisAngle sIdentity;
}; };
} }

View File

@ -2,11 +2,15 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "TVectorUnion.hpp" #include "CVector4f.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/FileReader.hpp>
#include <athena/FileWriter.hpp> #include "athena/FileReader.hpp"
#include "athena/FileWriter.hpp"
#endif #endif
#include <iostream> #include <iostream>
#include <cassert> #include <cassert>
@ -20,15 +24,13 @@
#define COLOR(rgba) rgba #define COLOR(rgba) rgba
#endif #endif
namespace zeus namespace zeus {
{
typedef uint8_t Comp8; typedef uint8_t Comp8;
typedef uint32_t Comp32; typedef uint32_t Comp32;
constexpr float OneOver255 = 1.f / 255.f; constexpr float OneOver255 = 1.f / 255.f;
typedef union { typedef union {
struct struct {
{
Comp8 r, g, b, a; Comp8 r, g, b, a;
}; };
Comp32 rgba; Comp32 rgba;
@ -36,11 +38,9 @@ typedef union {
class CVector4f; class CVector4f;
class alignas(16) CColor class CColor {
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); simd<float> mSimd;
static const CColor skRed; static const CColor skRed;
static const CColor skBlack; static const CColor skBlack;
static const CColor skBlue; static const CColor skBlue;
@ -52,285 +52,210 @@ public:
static const CColor skWhite; static const CColor skWhite;
static const CColor skClear; static const CColor skClear;
#if __SSE__ CColor() : mSimd(1.f) {}
CColor(const __m128& mVec128) : mVec128(mVec128) {}
#endif
CColor() : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
CColor(float rgb, float a = 1.0) { splat(rgb, a); } CColor(float rgb, float a = 1.0) { splat(rgb, a); }
CColor(float r, float g, float b, float a = 1.0f)
{ CColor(float r, float g, float b, float a = 1.0f) : mSimd(r, g, b, a) {}
v[0] = r;
v[1] = g;
v[2] = b;
v[3] = a;
}
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CColor(const atVec4f& vec)
#if __SSE__ || __GEKKO_PS__ CColor(const atVec4f& vec) : mSimd(vec.simd) {}
: mVec128(vec.mVec128)
{
}
#else
{
r = vec.vec[0], g = vec.vec[1], b = vec.vec[2], a = vec.vec[3];
}
#endif
#endif #endif
CColor(Comp32 rgba) { fromRGBA32(rgba); } CColor(Comp32 rgba) { fromRGBA32(rgba); }
CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); } CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
CColor(const CVector4f& other); CColor(const CVector4f& other) : mSimd(other.mSimd) {}
CColor& operator=(const CVector4f& other);
template <typename T>
CColor(const simd<T>& s) : mSimd(s) {}
CColor& operator=(const CVector4f& other) {
mSimd = other.mSimd;
return *this;
}
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
static inline CColor ReadRGBABig(athena::io::IStreamReader& reader) static CColor ReadRGBABig(athena::io::IStreamReader& reader) {
{
CColor ret; CColor ret;
ret.readRGBABig(reader); ret.readRGBABig(reader);
return ret; return ret;
} }
inline void readRGBABig(athena::io::IStreamReader& reader) void readRGBABig(athena::io::IStreamReader& reader) {
{ simd_floats f;
r = reader.readFloatBig(); f[0] = reader.readFloatBig();
g = reader.readFloatBig(); f[1] = reader.readFloatBig();
b = reader.readFloatBig(); f[2] = reader.readFloatBig();
a = reader.readFloatBig(); f[3] = reader.readFloatBig();
mSimd.copy_from(f);
} }
inline void readBGRABig(athena::io::IStreamReader& reader)
{ void readBGRABig(athena::io::IStreamReader& reader) {
b = reader.readFloatBig(); simd_floats f;
g = reader.readFloatBig(); f[2] = reader.readFloatBig();
r = reader.readFloatBig(); f[1] = reader.readFloatBig();
a = reader.readFloatBig(); f[0] = reader.readFloatBig();
f[3] = reader.readFloatBig();
mSimd.copy_from(f);
} }
inline void writeRGBABig(athena::io::IStreamWriter& writer) const
{ void writeRGBABig(athena::io::IStreamWriter& writer) const {
writer.writeFloatBig(r); simd_floats f(mSimd);
writer.writeFloatBig(g); writer.writeFloatBig(f[0]);
writer.writeFloatBig(b); writer.writeFloatBig(f[1]);
writer.writeFloatBig(a); writer.writeFloatBig(f[2]);
writer.writeFloatBig(f[3]);
} }
inline void writeBGRABig(athena::io::IStreamWriter& writer) const
{ void writeBGRABig(athena::io::IStreamWriter& writer) const {
writer.writeFloatBig(b); simd_floats f(mSimd);
writer.writeFloatBig(g); writer.writeFloatBig(f[2]);
writer.writeFloatBig(r); writer.writeFloatBig(f[1]);
writer.writeFloatBig(a); writer.writeFloatBig(f[0]);
writer.writeFloatBig(f[3]);
} }
inline void writeRGBA8(athena::io::IStreamWriter& writer) const
{ void writeRGBA8(athena::io::IStreamWriter& writer) const {
writer.writeUByte(this->r * 255); simd_floats f(mSimd);
writer.writeUByte(this->g * 255); writer.writeUByte(atUint8(f[0] * 255));
writer.writeUByte(this->b * 255); writer.writeUByte(atUint8(f[1] * 255));
writer.writeUByte(this->a * 255); writer.writeUByte(atUint8(f[2] * 255));
writer.writeUByte(atUint8(f[3] * 255));
} }
#endif #endif
inline bool operator==(const CColor& rhs) const { return (r == rhs.r && g == rhs.g && b == rhs.b && a == rhs.a); } bool operator==(const CColor& rhs) const {
inline bool operator!=(const CColor& rhs) const { return !(*this == rhs); } return (r() == rhs.r() && g() == rhs.g() && b() == rhs.b() && a() == rhs.a());
inline CColor operator+(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CColor(r + rhs.r, g + rhs.g, b + rhs.b, a + rhs.a);
#endif
} }
inline CColor operator-(const CColor& rhs) const
{ bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
#if __SSE__
return CColor(_mm_sub_ps(mVec128, rhs.mVec128)); CColor operator+(const CColor& rhs) const {
#else return mSimd + rhs.mSimd;
return CColor(r - rhs.r, g - rhs.g, b - rhs.b, a - rhs.a);
#endif
} }
inline CColor operator*(const CColor& rhs) const
{ CColor operator-(const CColor& rhs) const {
#if __SSE__ return mSimd - rhs.mSimd;
return CColor(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CColor(r * rhs.r, g * rhs.g, b * rhs.b, a * rhs.a);
#endif
} }
inline CColor operator/(const CColor& rhs) const
{ CColor operator*(const CColor& rhs) const {
#if __SSE__ return mSimd * rhs.mSimd;
return CColor(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CColor(r / rhs.r, g / rhs.g, b / rhs.b, a / rhs.a);
#endif
} }
inline CColor operator+(float val) const
{ CColor operator/(const CColor& rhs) const {
#if __SSE__ return mSimd / rhs.mSimd;
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_add_ps(mVec128, splat.mVec128));
#else
return CColor(r + val, g + val, b + val, a + val);
#endif
} }
inline CColor operator-(float val) const
{ CColor operator+(float val) const {
#if __SSE__ return mSimd + simd<float>(val);
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CColor(r - val, g - val, b - val, a - val);
#endif
} }
inline CColor operator*(float val) const
{ CColor operator-(float val) const {
#if __SSE__ return mSimd - simd<float>(val);
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CColor(r * val, g * val, b * val, a * val);
#endif
} }
inline CColor operator/(float val) const
{ CColor operator*(float val) const {
#if __SSE__ return mSimd * simd<float>(val);
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_div_ps(mVec128, splat.mVec128));
#else
return CColor(r / val, g / val, b / val, a / val);
#endif
} }
inline const CColor& operator+=(const CColor& rhs)
{ CColor operator/(float val) const {
#if __SSE__ return mSimd / simd<float>(val);
mVec128 = _mm_add_ps(mVec128, rhs.mVec128); }
#else
r += rhs.r; const CColor& operator+=(const CColor& rhs) {
g += rhs.g; mSimd += rhs.mSimd;
b += rhs.b;
a += rhs.a;
#endif
return *this; return *this;
} }
inline const CColor& operator-=(const CColor& rhs)
{ const CColor& operator-=(const CColor& rhs) {
#if __SSE__ mSimd -= rhs.mSimd;
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
r -= rhs.r;
g -= rhs.g;
b -= rhs.b;
a -= rhs.a;
#endif
return *this; return *this;
} }
inline const CColor& operator*=(const CColor& rhs)
{ const CColor& operator*=(const CColor& rhs) {
#if __SSE__ mSimd *= rhs.mSimd;
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
r *= rhs.r;
g *= rhs.g;
b *= rhs.b;
a *= rhs.a;
#endif
return *this; return *this;
} }
inline const CColor& operator/=(const CColor& rhs)
{ const CColor& operator/=(const CColor& rhs) {
#if __SSE__ mSimd /= rhs.mSimd;
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
r /= rhs.r;
g /= rhs.g;
b /= rhs.b;
a /= rhs.a;
#endif
return *this; return *this;
} }
inline void normalize()
{ const CColor& operator+=(float rhs) {
mSimd += simd<float>(rhs);
return *this;
}
const CColor& operator-=(float rhs) {
mSimd -= simd<float>(rhs);
return *this;
}
const CColor& operator*=(float rhs) {
mSimd *= simd<float>(rhs);
return *this;
}
const CColor& operator/=(float rhs) {
mSimd /= simd<float>(rhs);
return *this;
}
void normalize() {
float mag = magnitude(); float mag = magnitude();
mag = 1.f / mag; mag = 1.f / mag;
*this *= mag; *this *= mag;
} }
inline CColor normalized() const
{ CColor normalized() const {
float mag = magnitude(); float mag = magnitude();
mag = 1.f / mag; mag = 1.f / mag;
return *this * mag; return *this * mag;
} }
inline float magSquared() const float magSquared() const {
{ return mSimd.dot4(mSimd);
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return r * r + g * g + b * b + a * a;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
static inline CColor lerp(const CColor& a, const CColor& b, float t) { return (a + (b - a) * t); }
static inline CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
inline float& operator[](const size_t& idx) { assert(idx < 4); return (&r)[idx]; }
inline const float& operator[](const size_t& idx) const { assert(idx < 4); return (&r)[idx]; }
inline void splat(float rgb, float a)
{
#if __SSE__
TVectorUnion splat = {{rgb, rgb, rgb, a}};
mVec128 = splat.mVec128;
#else
v[0] = rgb;
v[1] = rgb;
v[2] = rgb;
v[3] = a;
#endif
} }
inline float rgbDot(const CColor& rhs) const float magnitude() const { return std::sqrt(magSquared()); }
{
#if __SSE__ static CColor lerp(const CColor& a, const CColor& b, float t) {
TVectorUnion result; return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (r * rhs.r) + (g * rhs.g) + (b * rhs.b);
#endif
} }
union { static CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
struct
{
float r, g, b, a;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
void fromRGBA8(Comp8 r, Comp8 g, Comp8 b, Comp8 a) simd<float>::reference operator[](const size_t& idx) {
{ assert(idx < 4);
this->r = r * OneOver255; return mSimd[idx];
this->g = g * OneOver255;
this->b = b * OneOver255;
this->a = a * OneOver255;
} }
void fromRGBA32(Comp32 rgba) float operator[](const size_t& idx) const {
{ assert(idx < 4);
return mSimd[idx];
}
void splat(float rgb, float a) {
mSimd = simd<float>(rgb);
mSimd[3] = a;
}
float rgbDot(const CColor& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
void fromRGBA8(const Comp8 ri, const Comp8 gi, const Comp8 bi, const Comp8 ai) {
mSimd = simd<float>(ri * OneOver255, gi * OneOver255, bi * OneOver255, ai * OneOver255);
}
void fromRGBA32(Comp32 rgba) {
static RGBA32 tmp; static RGBA32 tmp;
tmp.rgba = COLOR(rgba); tmp.rgba = COLOR(rgba);
fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a); fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a);
@ -343,12 +268,11 @@ public:
* \param b * \param b
* \param a * \param a
*/ */
void toRGBA8(Comp8& r, Comp8& g, Comp8& b, Comp8& a) void toRGBA8(Comp8& ro, Comp8& go, Comp8& bo, Comp8& ao) const {
{ ro = Comp8(r() * 255);
r = this->r * 255; go = Comp8(g() * 255);
g = this->g * 255; bo = Comp8(b() * 255);
b = this->b * 255; ao = Comp8(a() * 255);
a = this->a * 255;
} }
/** /**
@ -371,59 +295,44 @@ public:
void fromHSL(float h, float s, float l, float _a = 1.0); void fromHSL(float h, float s, float l, float _a = 1.0);
void toHSL(float& h, float& s, float& l); void toHSL(float& h, float& s, float& l) const;
CColor toGrayscale() { return {std::sqrt((r * r + g * g + b * b) / 3), a}; } CColor toGrayscale() const { return {std::sqrt((r() * r() + g() * g() + b() * b()) / 3), a()}; }
/** /**
* @brief Clamps to GPU-safe RGBA values [0,1] * @brief Clamps to GPU-safe RGBA values [0,1]
*/ */
void Clamp() void Clamp() {
{ r() = std::min(1.f, std::max(0.f, float(r())));
this->r = std::min(1.f, std::max(0.f, this->r)); g() = std::min(1.f, std::max(0.f, float(g())));
this->g = std::min(1.f, std::max(0.f, this->g)); b() = std::min(1.f, std::max(0.f, float(b())));
this->b = std::min(1.f, std::max(0.f, this->b)); a() = std::min(1.f, std::max(0.f, float(a())));
this->a = std::min(1.f, std::max(0.f, this->a));
} }
float r() const { return mSimd[0]; }
float g() const { return mSimd[1]; }
float b() const { return mSimd[2]; }
float a() const { return mSimd[3]; }
simd<float>::reference r() { return mSimd[0]; }
simd<float>::reference g() { return mSimd[1]; }
simd<float>::reference b() { return mSimd[2]; }
simd<float>::reference a() { return mSimd[3]; }
}; };
static inline CColor operator+(float lhs, const CColor& rhs) static inline CColor operator+(float lhs, const CColor& rhs) {
{ return simd<float>(lhs) + rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs + rhs.r, lhs + rhs.g, lhs + rhs.b, lhs + rhs.a);
#endif
} }
static inline CColor operator-(float lhs, const CColor& rhs) static inline CColor operator-(float lhs, const CColor& rhs) {
{ return simd<float>(lhs) - rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs - rhs.r, lhs - rhs.g, lhs - rhs.b, lhs - rhs.a);
#endif
} }
static inline CColor operator*(float lhs, const CColor& rhs) static inline CColor operator*(float lhs, const CColor& rhs) {
{ return simd<float>(lhs) * rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs * rhs.r, lhs * rhs.g, lhs * rhs.b, lhs * rhs.a);
#endif
} }
static inline CColor operator/(float lhs, const CColor& rhs) static inline CColor operator/(float lhs, const CColor& rhs) {
{ return simd<float>(lhs) / rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs / rhs.r, lhs / rhs.g, lhs / rhs.b, lhs / rhs.a);
#endif
} }
} }

View File

@ -2,12 +2,10 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{
class CQuaternion; class CQuaternion;
class CEulerAngles : public CVector3f class CEulerAngles : public CVector3f {
{
public: public:
CEulerAngles(float x, float y, float z) { assign(x, y, z); } CEulerAngles(float x, float y, float z) { assign(x, y, z); }
CEulerAngles(const CQuaternion& quat); CEulerAngles(const CQuaternion& quat);

View File

@ -4,17 +4,14 @@
#include "zeus/CAABox.hpp" #include "zeus/CAABox.hpp"
#include "zeus/CProjection.hpp" #include "zeus/CProjection.hpp"
namespace zeus namespace zeus {
{ class CFrustum {
class CFrustum
{
CPlane planes[6]; CPlane planes[6];
bool valid = false; bool valid = false;
public: public:
void updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection); void updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection);
void updatePlanes(const CTransform& viewPointMtx, const CProjection& projection); void updatePlanes(const CTransform& viewPointMtx, const CProjection& projection);
bool aabbFrustumTest(const CAABox& aabb) const; bool aabbFrustumTest(const CAABox& aabb) const;
bool sphereFrustumTest(const CSphere& sphere) const; bool sphereFrustumTest(const CSphere& sphere) const;
bool pointFrustumTest(const CVector3f& point) const; bool pointFrustumTest(const CVector3f& point) const;

View File

@ -3,12 +3,11 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CLine {
class CLine
{
public: public:
CLine(const CVector3f& origin, const CVector3f& dir) : origin(origin), dir(dir) {} CLine(const CVector3f& origin, const CVector3f& dir) : origin(origin), dir(dir) {}
CVector3f origin; CVector3f origin;
CVector3f dir; CVector3f dir;
}; };

View File

@ -3,15 +3,12 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CLineSeg {
class CLineSeg
{
public: public:
CLineSeg(const CVector3f& start, const CVector3f& end) : x0_start(start), x18_end(end) CLineSeg(const CVector3f& start, const CVector3f& end) : x0_start(start), x18_end(end) {
{
CVector3f tmp = (end - start).normalized(); CVector3f tmp = (end - start).normalized();
if (tmp.x != 0 || tmp.y != 0 || tmp.z != 0) if (tmp.x() != 0.f || tmp.y() != 0.f || tmp.z() != 0.f)
xc_dir = tmp.normalized(); xc_dir = tmp.normalized();
else else
xc_dir = CVector3f::skZero; xc_dir = CVector3f::skZero;

View File

@ -1,28 +1,24 @@
#pragma once #pragma once
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#include "zeus/CTransform.hpp" #include "zeus/CTransform.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{ struct CMRay {
struct CMRay
{
CMRay(const CVector3f& start, const CVector3f& dirin, float len) CMRay(const CVector3f& start, const CVector3f& dirin, float len)
: start(start), length(len), invLength(1.f / len), dir(dirin) : start(start), length(len), invLength(1.f / len), dir(dirin) {
{
end = start + (len * dirin); end = start + (len * dirin);
delta = end - start; delta = end - start;
} }
CMRay(const CVector3f& start, const CVector3f& end, float len, float invLen) CMRay(const CVector3f& start, const CVector3f& end, float len, float invLen)
: start(start), end(end), length(len), invLength(invLen) : start(start), end(end), length(len), invLength(invLen) {
{
delta = end - start; delta = end - start;
dir = invLen * delta; dir = invLen * delta;
} }
CMRay getInvUnscaledTransformRay(const CTransform& xfrm) const CMRay getInvUnscaledTransformRay(const CTransform& xfrm) const {
{
const CTransform inv = xfrm.inverse(); const CTransform inv = xfrm.inverse();
return CMRay(inv * start, inv * end, length, invLength); return CMRay(inv * start, inv * end, length, invLength);
} }

View File

@ -6,79 +6,68 @@
#include <cstring> #include <cstring>
/* Column-major matrix class */ /* Column-major matrix class */
namespace zeus namespace zeus {
{
class CQuaternion; class CQuaternion;
class alignas(16) CMatrix3f
{
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
explicit CMatrix3f(bool zero = false) class CMatrix3f {
{ public:
memset(m, 0, sizeof(m));
if (!zero) explicit CMatrix3f(bool zero = false) {
{ m[0] = simd<float>(0.f);
m[1] = simd<float>(0.f);
m[2] = simd<float>(0.f);
if (!zero) {
m[0][0] = 1.0; m[0][0] = 1.0;
m[1][1] = 1.0; m[1][1] = 1.0;
m[2][2] = 1.0; m[2][2] = 1.0;
} }
} }
CMatrix3f(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21, float m22)
{ CMatrix3f(float m00, float m01, float m02,
m[0][0] = m00, m[1][0] = m01, m[2][0] = m02; float m10, float m11, float m12,
m[0][1] = m10, m[1][1] = m11, m[2][1] = m12; float m20, float m21, float m22)
m[0][2] = m20, m[1][2] = m21, m[2][2] = m22; : m{{m00, m10, m20},
} {m01, m11, m21},
CMatrix3f(const CVector3f& scaleVec) {m02, m12, m22}} {}
{
memset(m, 0, sizeof(m)); CMatrix3f(const CVector3f& scaleVec) {
m[0] = simd<float>(0.f);
m[1] = simd<float>(0.f);
m[2] = simd<float>(0.f);
m[0][0] = scaleVec[0]; m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1]; m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2]; m[2][2] = scaleVec[2];
} }
CMatrix3f(float scale) : CMatrix3f(CVector3f(scale)) {} CMatrix3f(float scale) : CMatrix3f(CVector3f(scale)) {}
CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2)
{ CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2) {
vec[0] = r0; m[0] = r0;
vec[1] = r1; m[1] = r1;
vec[2] = r2; m[2] = r2;
} }
CMatrix3f(const CMatrix3f& other)
{ CMatrix3f(const CMatrix3f& other) {
vec[0] = other.vec[0]; m[0] = other.m[0];
vec[1] = other.vec[1]; m[1] = other.m[1];
vec[2] = other.vec[2]; m[2] = other.m[2];
} }
#if __SSE__
CMatrix3f(const __m128& r0, const __m128& r1, const __m128& r2) CMatrix3f(const simd<float>& r0, const simd<float>& r1, const simd<float>& r2) {
{ m[0].mSimd = r0;
vec[0].mVec128 = r0; m[1].mSimd = r1;
vec[1].mVec128 = r1; m[2].mSimd = r2;
vec[2].mVec128 = r2;
} }
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CMatrix3f(const atVec4f& r0, const atVec4f& r1, const atVec4f& r2)
{ CMatrix3f(const atVec4f& r0, const atVec4f& r1, const atVec4f& r2) {
#if __SSE__ m[0].mSimd = r0.simd;
vec[0].mVec128 = r0.mVec128; m[1].mSimd = r1.simd;
vec[1].mVec128 = r1.mVec128; m[2].mSimd = r2.simd;
vec[2].mVec128 = r2.mVec128;
#else
vec[0].x = r0.vec[0];
vec[0].y = r0.vec[1];
vec[0].z = r0.vec[2];
vec[1].x = r1.vec[0];
vec[1].y = r1.vec[1];
vec[1].z = r1.vec[2];
vec[2].x = r2.vec[0];
vec[2].y = r2.vec[1];
vec[2].z = r2.vec[2];
#endif
} }
void readBig(athena::io::IStreamReader& input)
{ void readBig(athena::io::IStreamReader& input) {
m[0][0] = input.readFloatBig(); m[0][0] = input.readFloatBig();
m[1][0] = input.readFloatBig(); m[1][0] = input.readFloatBig();
m[2][0] = input.readFloatBig(); m[2][0] = input.readFloatBig();
@ -90,170 +79,112 @@ public:
m[2][2] = input.readFloatBig(); m[2][2] = input.readFloatBig();
} }
static CMatrix3f ReadBig(athena::io::IStreamReader& input) static CMatrix3f ReadBig(athena::io::IStreamReader& input) {
{
CMatrix3f ret; CMatrix3f ret;
ret.readBig(input); ret.readBig(input);
return ret; return ret;
} }
#endif
CMatrix3f(const CVector3f& axis, float angle);
CMatrix3f(const CQuaternion& quat);
CMatrix3f(const TVectorUnion& r0, const TVectorUnion& r1, const TVectorUnion& r2)
{
#if __SSE__
vec[0].mVec128 = r0.mVec128;
vec[1].mVec128 = r1.mVec128;
vec[2].mVec128 = r2.mVec128;
#else
vec[0].x = r0.vec[0];
vec[0].y = r0.vec[1];
vec[0].z = r0.vec[2];
vec[1].x = r1.vec[0];
vec[1].y = r1.vec[1];
vec[1].z = r1.vec[2];
vec[2].x = r2.vec[0];
vec[2].y = r2.vec[1];
vec[2].z = r2.vec[2];
#endif
}
inline CMatrix3f& operator=(const CMatrix3f& other) #endif
{
vec[0] = other.vec[0]; CMatrix3f(const CVector3f& axis, float angle);
vec[1] = other.vec[1];
vec[2] = other.vec[2]; CMatrix3f(const CQuaternion& quat);
CMatrix3f& operator=(const CMatrix3f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
return *this; return *this;
} }
inline CVector3f operator*(const CVector3f& other) const CVector3f operator*(const CVector3f& other) const {
{ return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
#if __SSE__ m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
TVectorUnion res; m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>();
res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)),
_mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))),
_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)));
return CVector3f(res.mVec128);
#else
return CVector3f(m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2],
m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2],
m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2]);
#endif
} }
inline CVector3f& operator[](int i) CVector3f& operator[](size_t i) {
{ assert(i < 3);
assert(0 <= i && i < 3); return m[i];
return vec[i];
} }
inline const CVector3f& operator[](int i) const const CVector3f& operator[](size_t i) const {
{ assert(i < 3);
assert(0 <= i && i < 3); return m[i];
return vec[i];
} }
inline CMatrix3f orthonormalized() const CMatrix3f orthonormalized() const {
{
CMatrix3f ret; CMatrix3f ret;
ret[0] = vec[0].normalized(); ret[0] = m[0].normalized();
ret[2] = ret[0].cross(vec[1]); ret[2] = ret[0].cross(m[1]);
ret[2].normalize(); ret[2].normalize();
ret[1] = ret[2].cross(ret[0]); ret[1] = ret[2].cross(ret[0]);
return ret; return ret;
} }
inline bool operator==(const CMatrix3f& other) const bool operator==(const CMatrix3f& other) const {
{ return m[0] == other.m[0] && m[1] == other.m[1] && m[2] == other.m[2];
return vec[0] == other.vec[0] && vec[1] == other.vec[1] && vec[2] == other.vec[2];
} }
static const CMatrix3f skIdentityMatrix3f; static const CMatrix3f skIdentityMatrix3f;
void transpose(); void transpose();
void transposeSSE3();
CMatrix3f transposed() const;
CMatrix3f transposedSSE3() const;
inline void invert() { *this = inverted(); } CMatrix3f transposed() const;
void invert() { *this = inverted(); }
CMatrix3f inverted() const; CMatrix3f inverted() const;
void addScaledMatrix(const CMatrix3f& other, float scale) void addScaledMatrix(const CMatrix3f& other, float scale) {
{
CVector3f scaleVec(scale); CVector3f scaleVec(scale);
vec[0] += other.vec[0] * scaleVec; m[0] += other.m[0] * scaleVec;
vec[1] += other.vec[1] * scaleVec; m[1] += other.m[1] * scaleVec;
vec[2] += other.vec[2] * scaleVec; m[2] += other.m[2] * scaleVec;
} }
static inline CMatrix3f RotateX(float theta) static CMatrix3f RotateX(float theta) {
{
float sinT = std::sin(theta); float sinT = std::sin(theta);
float cosT = std::cos(theta); float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{1.f, 0.f, 0.f, 0.f}}, return CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
TVectorUnion{{0.f, cosT, sinT, 0.f}}, simd<float>{0.f, cosT, sinT, 0.f},
TVectorUnion{{0.f, -sinT, cosT, 0.f}}); simd<float>{0.f, -sinT, cosT, 0.f});
} }
static inline CMatrix3f RotateY(float theta) static CMatrix3f RotateY(float theta) {
{
float sinT = std::sin(theta); float sinT = std::sin(theta);
float cosT = std::cos(theta); float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{cosT, 0.f, -sinT, 0.f}}, return CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
TVectorUnion{{0.f, 1.f, 0.f, 0.f}}, simd<float>{0.f, 1.f, 0.f, 0.f},
TVectorUnion{{sinT, 0.f, cosT, 0.f}}); simd<float>{sinT, 0.f, cosT, 0.f});
} }
static inline CMatrix3f RotateZ(float theta) static CMatrix3f RotateZ(float theta) {
{
float sinT = std::sin(theta); float sinT = std::sin(theta);
float cosT = std::cos(theta); float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{cosT, sinT, 0.f, 0.f}}, return CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
TVectorUnion{{-sinT, cosT, 0.f, 0.f}}, simd<float>{-sinT, cosT, 0.f, 0.f},
TVectorUnion{{0.f, 0.f, 1.f, 0.f}}); simd<float>{0.f, 0.f, 1.f, 0.f});
} }
float determinant() const float determinant() const {
{
return return
m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) + m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) +
m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) + m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) +
m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]); m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
} }
union { CVector3f m[3];
float m[3][4]; /* 4th row for union-alignment */
struct
{
CVector3f vec[3];
};
};
}; };
static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs) static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs) {
{ simd<float> v[3];
#if __SSE__ for (int i = 0; i < 3; ++i)
unsigned i; v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
TVectorUnion resVec[3]; lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
for (i = 0; i < 3; ++i) lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>();
{ return CMatrix3f(v[0], v[1], v[2]);
resVec[i].mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
_mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
_mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
resVec[i].v[3] = 0.0;
}
return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
#else
return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
#endif
} }
} }

View File

@ -1,176 +1,116 @@
#pragma once #pragma once
#include "zeus/CMatrix3f.hpp" #include "zeus/CMatrix3f.hpp"
#include "zeus/CVector4f.hpp" #include "zeus/CVector4f.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CMatrix4f {
class alignas(16) CMatrix4f
{
public: public:
static const CMatrix4f skIdentityMatrix4f; static const CMatrix4f skIdentityMatrix4f;
ZE_DECLARE_ALIGNED_ALLOCATOR();
explicit CMatrix4f(bool zero = false)
{
memset(m, 0, sizeof(m));
if (!zero) explicit CMatrix4f(bool zero = false) {
{ if (!zero) {
m[0][0] = 1.0; m[0][0] = 1.0;
m[1][1] = 1.0; m[1][1] = 1.0;
m[2][2] = 1.0; m[2][2] = 1.0;
m[3][3] = 1.0; m[3][3] = 1.0;
} }
} }
CMatrix4f(float m00, float m01, float m02, float m03, float m10, float m11, float m12, float m13, float m20, float m21,
float m22, float m23, float m30, float m31, float m32, float m33) CMatrix4f(float m00, float m01, float m02, float m03,
{ float m10, float m11, float m12, float m13,
m[0][0] = m00, m[1][0] = m01, m[2][0] = m02, m[3][0] = m03; float m20, float m21, float m22, float m23,
m[0][1] = m10, m[1][1] = m11, m[2][1] = m12, m[3][1] = m13; float m30, float m31, float m32, float m33)
m[0][2] = m20, m[1][2] = m21, m[2][2] = m22, m[3][2] = m23; : m{{m00, m10, m20, m30},
m[0][3] = m30, m[1][3] = m31, m[2][3] = m32, m[3][3] = m33; {m01, m11, m21, m31},
} {m02, m12, m22, m32},
CMatrix4f(const CVector3f& scaleVec) {m03, m13, m23, m33}} {}
{
memset(m, 0, sizeof(m)); CMatrix4f(const CVector3f& scaleVec) {
m[0][0] = scaleVec[0]; m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1]; m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2]; m[2][2] = scaleVec[2];
m[3][3] = 1.0f; m[3][3] = 1.0f;
} }
CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3)
{ CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3) {
vec[0] = r0; m[0] = r0;
vec[1] = r1; m[1] = r1;
vec[2] = r2; m[2] = r2;
vec[3] = r3; m[3] = r3;
} }
CMatrix4f(const CMatrix4f& other)
{ CMatrix4f(const CMatrix4f& other) {
vec[0] = other.vec[0]; m[0] = other.m[0];
vec[1] = other.vec[1]; m[1] = other.m[1];
vec[2] = other.vec[2]; m[2] = other.m[2];
vec[3] = other.vec[3]; m[3] = other.m[3];
} }
#if __SSE__
CMatrix4f(const __m128& r0, const __m128& r1, const __m128& r2, const __m128& r3) CMatrix4f(const simd<float>& r0, const simd<float>& r1, const simd<float>& r2, const simd<float>& r3) {
{ m[0].mSimd = r0;
vec[0].mVec128 = r0; m[1].mSimd = r1;
vec[1].mVec128 = r1; m[2].mSimd = r2;
vec[2].mVec128 = r2; m[3].mSimd = r3;
vec[3].mVec128 = r3;
} }
#endif
CMatrix4f(const CMatrix3f& other) CMatrix4f(const CMatrix3f& other) {
{ m[0] = other.m[0];
memset(m, 0, sizeof(m)); m[1] = other.m[1];
vec[0] = other.vec[0]; m[2] = other.m[2];
vec[1] = other.vec[1]; m[3] = CVector4f(0.f, 0.f, 0.f, 1.0f);
vec[2] = other.vec[2];
vec[3] = CVector4f(0, 0, 0, 1.0f);
} }
inline CMatrix4f& operator=(const CMatrix4f& other)
{ CMatrix4f& operator=(const CMatrix4f& other) {
vec[0] = other.vec[0]; m[0] = other.m[0];
vec[1] = other.vec[1]; m[1] = other.m[1];
vec[2] = other.vec[2]; m[2] = other.m[2];
vec[3] = other.vec[3]; m[3] = other.m[3];
return *this; return *this;
} }
inline CVector4f operator*(const CVector4f& other) const
{
#if __SSE__
TVectorUnion res;
res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)),
_mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))),
_mm_add_ps(_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)),
_mm_mul_ps(vec[3].mVec128, ze_splat_ps(other.mVec128, 3))));
return CVector4f(res.mVec128); CVector4f operator*(const CVector4f& other) const {
#else return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
return CVector4f(m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2] + m[3][0] * other.v[3], m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2] + m[3][1] * other.v[3], m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>() +
m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2] + m[3][2] * other.v[3], m[3].mSimd * other.mSimd.shuffle<3, 3, 3, 3>();
m[0][3] * other.v[0] + m[1][3] * other.v[1] + m[2][3] * other.v[2] + m[3][3] * other.v[3]);
#endif
} }
inline CVector4f& operator[](int i) CVector4f& operator[](size_t i) {
{ assert(i < 4);
assert(0 <= i && i < 4); return m[i];
return vec[i];
} }
inline const CVector4f& operator[](int i) const const CVector4f& operator[](size_t i) const {
{ assert(i < 4);
assert(0 <= i && i < 4); return m[i];
return vec[i];
} }
CMatrix4f transposed() const; CMatrix4f transposed() const;
CMatrix4f transposedSSE3() const;
inline CVector3f multiplyOneOverW(const CVector3f& point) const CVector3f multiplyOneOverW(const CVector3f& point) const {
{
CVector4f xfVec = *this * point; CVector4f xfVec = *this * point;
return xfVec.toVec3f() / xfVec.w; return xfVec.toVec3f() / xfVec.w();
} }
inline CVector3f multiplyOneOverW(const CVector3f& point, float& wOut) const CVector3f multiplyOneOverW(const CVector3f& point, float& wOut) const {
{
CVector4f xfVec = *this * point; CVector4f xfVec = *this * point;
wOut = xfVec.w; wOut = xfVec.w();
return xfVec.toVec3f() / xfVec.w; return xfVec.toVec3f() / xfVec.w();
} }
union { CVector4f m[4];
float m[4][4];
struct
{
CVector4f vec[4];
};
};
}; };
static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs)
{
CMatrix4f ret;
#if __SSE__
unsigned i;
for (i = 0; i < 4; ++i) static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs) {
{ simd<float> v[4];
ret.vec[i].mVec128 = _mm_add_ps( for (int i = 0; i < 4; ++i)
_mm_add_ps(_mm_add_ps(_mm_mul_ps(lhs.vec[0].mVec128, v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(0, 0, 0, 0))), lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
_mm_mul_ps(lhs.vec[1].mVec128, lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>() +
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(1, 1, 1, 1)))), lhs.m[3].mSimd * rhs[i].mSimd.shuffle<3, 3, 3, 3>();
_mm_mul_ps(lhs.vec[2].mVec128, return CMatrix4f(v[0], v[1], v[2], v[3]);
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(2, 2, 2, 2)))),
_mm_mul_ps(lhs.vec[3].mVec128, _mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(3, 3, 3, 3))));
}
#else
ret.m[0][0] = lhs.m[0][0] * rhs.m[0][0] + lhs.m[1][0] * rhs.m[0][1] + lhs.m[2][0] * rhs.m[0][2] + lhs.m[3][0] * rhs.m[0][3];
ret.m[1][0] = lhs.m[0][0] * rhs.m[1][0] + lhs.m[1][0] * rhs.m[1][1] + lhs.m[2][0] * rhs.m[1][2] + lhs.m[3][0] * rhs.m[1][3];
ret.m[2][0] = lhs.m[0][0] * rhs.m[2][0] + lhs.m[1][0] * rhs.m[2][1] + lhs.m[2][0] * rhs.m[2][2] + lhs.m[3][0] * rhs.m[2][3];
ret.m[3][0] = lhs.m[0][0] * rhs.m[3][0] + lhs.m[1][0] * rhs.m[3][1] + lhs.m[2][0] * rhs.m[3][2] + lhs.m[3][0] * rhs.m[3][3];
ret.m[0][1] = lhs.m[0][1] * rhs.m[0][0] + lhs.m[1][1] * rhs.m[0][1] + lhs.m[2][1] * rhs.m[0][2] + lhs.m[3][1] * rhs.m[0][3];
ret.m[1][1] = lhs.m[0][1] * rhs.m[1][0] + lhs.m[1][1] * rhs.m[1][1] + lhs.m[2][1] * rhs.m[1][2] + lhs.m[3][1] * rhs.m[1][3];
ret.m[2][1] = lhs.m[0][1] * rhs.m[2][0] + lhs.m[1][1] * rhs.m[2][1] + lhs.m[2][1] * rhs.m[2][2] + lhs.m[3][1] * rhs.m[2][3];
ret.m[3][1] = lhs.m[0][1] * rhs.m[3][0] + lhs.m[1][1] * rhs.m[3][1] + lhs.m[2][1] * rhs.m[3][2] + lhs.m[3][1] * rhs.m[3][3];
ret.m[0][2] = lhs.m[0][2] * rhs.m[0][0] + lhs.m[1][2] * rhs.m[0][1] + lhs.m[2][2] * rhs.m[0][2] + lhs.m[3][2] * rhs.m[0][3];
ret.m[1][2] = lhs.m[0][2] * rhs.m[1][0] + lhs.m[1][2] * rhs.m[1][1] + lhs.m[2][2] * rhs.m[1][2] + lhs.m[3][2] * rhs.m[1][3];
ret.m[2][2] = lhs.m[0][2] * rhs.m[2][0] + lhs.m[1][2] * rhs.m[2][1] + lhs.m[2][2] * rhs.m[2][2] + lhs.m[3][2] * rhs.m[2][3];
ret.m[3][2] = lhs.m[0][2] * rhs.m[3][0] + lhs.m[1][2] * rhs.m[3][1] + lhs.m[2][2] * rhs.m[3][2] + lhs.m[3][2] * rhs.m[3][3];
ret.m[0][3] = lhs.m[0][3] * rhs.m[0][0] + lhs.m[1][3] * rhs.m[0][1] + lhs.m[2][3] * rhs.m[0][2] + lhs.m[3][3] * rhs.m[0][3];
ret.m[1][3] = lhs.m[0][3] * rhs.m[1][0] + lhs.m[1][3] * rhs.m[1][1] + lhs.m[2][3] * rhs.m[1][2] + lhs.m[3][3] * rhs.m[1][3];
ret.m[2][3] = lhs.m[0][3] * rhs.m[2][0] + lhs.m[1][3] * rhs.m[2][1] + lhs.m[2][3] * rhs.m[2][2] + lhs.m[3][3] * rhs.m[2][3];
ret.m[3][3] = lhs.m[0][3] * rhs.m[3][0] + lhs.m[1][3] * rhs.m[3][1] + lhs.m[2][3] * rhs.m[2][2] + lhs.m[3][3] * rhs.m[3][3];
#endif
return ret;
} }
} }

View File

@ -5,20 +5,17 @@
#include "zeus/CAABox.hpp" #include "zeus/CAABox.hpp"
#include "zeus/CMRay.hpp" #include "zeus/CMRay.hpp"
namespace zeus namespace zeus {
{ class COBBox {
class alignas(16) COBBox
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
void readBig(athena::io::IStreamReader& in)
{ void readBig(athena::io::IStreamReader& in) {
transform.read34RowMajor(in); transform.read34RowMajor(in);
extents.readBig(in); extents.readBig(in);
} }
static COBBox ReadBig(athena::io::IStreamReader& in)
{ static COBBox ReadBig(athena::io::IStreamReader& in) {
COBBox out; COBBox out;
out.readBig(in); out.readBig(in);
return out; return out;
@ -29,7 +26,7 @@ public:
CTransform transform; CTransform transform;
CVector3f extents; CVector3f extents;
COBBox() {} COBBox() = default;
COBBox(const CAABox& aabb) : extents(aabb.extents()) { transform.origin = aabb.center(); } COBBox(const CAABox& aabb) : extents(aabb.extents()) { transform.origin = aabb.center(); }
@ -37,8 +34,7 @@ public:
CAABox calculateAABox(const CTransform& worldXf = CTransform()) const; CAABox calculateAABox(const CTransform& worldXf = CTransform()) const;
static COBBox FromAABox(const CAABox& box, const CTransform& xf) static COBBox FromAABox(const CAABox& box, const CTransform& xf) {
{
const CVector3f extents = box.max - box.center(); const CVector3f extents = box.max - box.center();
const CTransform newXf = CTransform::Translate(box.center()) * xf; const CTransform newXf = CTransform::Translate(box.center()) * xf;
return COBBox(newXf, extents); return COBBox(newXf, extents);
@ -46,8 +42,7 @@ public:
bool OBBIntersectsBox(const COBBox& other) const; bool OBBIntersectsBox(const COBBox& other) const;
bool AABoxIntersectsBox(const CAABox& other) bool AABoxIntersectsBox(const CAABox& other) {
{
return OBBIntersectsBox(FromAABox(other, CTransform::Identity())); return OBBIntersectsBox(FromAABox(other, CTransform::Identity()));
} }
}; };

View File

@ -4,72 +4,67 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{ class CPlane {
class alignas(16) CPlane
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CPlane() : mSimd(1.0, 0.f, 0.f, 0.f) {}
inline CPlane() : a(1.f), b(0.f), c(0.f), d(0.f) {} CPlane(float a, float b, float c, float d) : mSimd(a, b, c, d) {}
CPlane(float a, float b, float c, float d) : a(a), b(b), c(c), d(d) {}
CPlane(const CVector3f& a, const CVector3f& b, const CVector3f& c) CPlane(const CVector3f& a, const CVector3f& b, const CVector3f& c) {
{ mSimd = (b - a).cross(c - a).normalized().mSimd;
vec = (b - a).cross(c - a).normalized(); mSimd[3] = a.dot(normal());
d = a.dot(vec);
} }
CPlane(const CVector3f& point, float displacement) CPlane(const CVector3f& point, float displacement) {
{ mSimd = point.mSimd;
#if __SSE__ mSimd[3] = displacement;
mVec128 = point.mVec128;
#else
a = point[0];
b = point[1];
c = point[2];
#endif
d = displacement;
} }
float clipLineSegment(const CVector3f& a, const CVector3f& b) float clipLineSegment(const CVector3f& a, const CVector3f& b) {
{ float mag = (b - a).dot(normal());
float mag = (b-a).dot(vec); float dis = (-(y() - d())) / mag;
float dis = (-(vec.y - d)) / mag;
return clamp(0.0f, dis, 1.0f); return clamp(0.0f, dis, 1.0f);
} }
inline void normalize() void normalize() {
{ float nd = d();
float nd = d; auto norm = normal();
float mag = vec.magnitude(); float mag = norm.magnitude();
mag = 1.f / mag; mag = 1.f / mag;
vec = vec * mag; mSimd = (norm * mag).mSimd;
d = nd * mag; mSimd[3] = nd * mag;
} }
float pointToPlaneDist(const CVector3f& pos) const float pointToPlaneDist(const CVector3f& pos) const {
{ return pos.dot(normal()) - d();
return pos.dot(vec) - d;
} }
bool rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const; bool rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const;
const CVector3f& normal() const { return vec; } CVector3f normal() const { return mSimd; }
inline float& operator[](size_t idx) { assert(idx < 4); return p[idx]; } zeus::simd<float>::reference operator[](size_t idx) {
inline const float& operator[](size_t idx) const { assert(idx < 4); return p[idx]; } assert(idx < 4);
return mSimd[idx];
}
union { float operator[](size_t idx) const {
struct assert(idx < 4);
{ return mSimd[idx];
float a, b, c, d; }
};
float p[4]; float x() const { return mSimd[0]; }
CVector3f vec; float y() const { return mSimd[1]; }
#ifdef __SSE__ float z() const { return mSimd[2]; }
__m128 mVec128; float d() const { return mSimd[3]; }
#endif
}; simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
simd<float>::reference d() { return mSimd[3]; }
zeus::simd<float> mSimd;
}; };
} }

View File

@ -6,56 +6,51 @@
#include <cstdio> #include <cstdio>
#include <cmath> #include <cmath>
namespace zeus namespace zeus {
{ enum class EProjType {
enum class EProjType
{
None = 0, None = 0,
Orthographic = 1, Orthographic = 1,
Perspective = 2 Perspective = 2
}; };
class SProjOrtho class SProjOrtho {
{
public: public:
float top, bottom, left, right, znear, zfar; float top, bottom, left, right, znear, zfar;
explicit SProjOrtho(float p_top = 1.0f, float p_bottom = -1.0f, float p_left = -1.0f, float p_right = 1.0f, explicit SProjOrtho(float p_top = 1.0f, float p_bottom = -1.0f, float p_left = -1.0f, float p_right = 1.0f,
float p_near = 1.0f, float p_far = -1.0f) float p_near = 1.0f, float p_far = -1.0f)
: top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far) : top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far) {
{
} }
}; };
struct SProjPersp
{ struct SProjPersp {
float fov, aspect, znear, zfar; float fov, aspect, znear, zfar;
SProjPersp(float p_fov = degToRad(55.0f), float p_aspect = 1.0f, float p_near = 0.1f, float p_far = 4096.f) SProjPersp(float p_fov = degToRad(55.0f), float p_aspect = 1.0f, float p_near = 0.1f, float p_far = 4096.f)
: fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far) : fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far) {
{
} }
}; };
extern const SProjOrtho kOrthoIdentity; extern const SProjOrtho kOrthoIdentity;
class alignas(16) CProjection class CProjection {
{
void _updateCachedMatrix(); void _updateCachedMatrix();
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CProjection() {
CProjection()
{
m_projType = EProjType::Orthographic; m_projType = EProjType::Orthographic;
m_ortho = SProjOrtho(); m_ortho = SProjOrtho();
m_mtx = CMatrix4f::skIdentityMatrix4f; m_mtx = CMatrix4f::skIdentityMatrix4f;
} }
CProjection(const CProjection& other) { *this = other; } CProjection(const CProjection& other) { *this = other; }
CProjection(const SProjOrtho& ortho) { setOrtho(ortho); } CProjection(const SProjOrtho& ortho) { setOrtho(ortho); }
CProjection(const SProjPersp& persp) { setPersp(persp); } CProjection(const SProjPersp& persp) { setPersp(persp); }
inline CProjection& operator=(const CProjection& other) CProjection& operator=(const CProjection& other) {
{ if (this != &other) {
if (this != &other)
{
m_projType = other.m_projType; m_projType = other.m_projType;
m_ortho = other.m_ortho; m_ortho = other.m_ortho;
m_mtx = other.m_mtx; m_mtx = other.m_mtx;
@ -63,40 +58,41 @@ public:
return *this; return *this;
} }
inline void setOrtho(const SProjOrtho& ortho) void setOrtho(const SProjOrtho& ortho) {
{
m_projType = EProjType::Orthographic; m_projType = EProjType::Orthographic;
m_ortho = ortho; m_ortho = ortho;
_updateCachedMatrix(); _updateCachedMatrix();
} }
inline void setPersp(const SProjPersp& persp)
{ void setPersp(const SProjPersp& persp) {
m_projType = EProjType::Perspective; m_projType = EProjType::Perspective;
m_persp = persp; m_persp = persp;
_updateCachedMatrix(); _updateCachedMatrix();
} }
inline EProjType getType() const { return m_projType; } EProjType getType() const { return m_projType; }
inline const SProjOrtho& getOrtho() const
{ const SProjOrtho& getOrtho() const {
if (m_projType != EProjType::Orthographic) #ifndef NDEBUG
{ if (m_projType != EProjType::Orthographic) {
std::fprintf(stderr, "attempted to access orthographic structure of non-ortho projection"); std::fprintf(stderr, "attempted to access orthographic structure of non-ortho projection");
std::abort(); std::abort();
} }
#endif
return m_ortho; return m_ortho;
} }
inline const SProjPersp& getPersp() const
{ const SProjPersp& getPersp() const {
if (m_projType != EProjType::Perspective) #ifndef NDEBUG
{ if (m_projType != EProjType::Perspective) {
std::fprintf(stderr, "attempted to access perspective structure of non-persp projection"); std::fprintf(stderr, "attempted to access perspective structure of non-persp projection");
std::abort(); std::abort();
} }
#endif
return m_persp; return m_persp;
} }
inline const CMatrix4f& getCachedMatrix() const { return m_mtx; } const CMatrix4f& getCachedMatrix() const { return m_mtx; }
protected: protected:
/* Projection type */ /* Projection type */

View File

@ -8,15 +8,16 @@
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "zeus/CRelAngle.hpp" #include "zeus/CRelAngle.hpp"
#include "zeus/CTransform.hpp" #include "zeus/CTransform.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp> #include <athena/IStreamReader.hpp>
#endif #endif
namespace zeus namespace zeus {
{
static inline float normalize_angle(float angle) static float normalize_angle(float angle) {
{
if (angle > M_PIF) if (angle > M_PIF)
angle -= 2.f * M_PIF; angle -= 2.f * M_PIF;
else if (angle < -M_PIF) else if (angle < -M_PIF)
@ -28,91 +29,95 @@ static inline float normalize_angle(float angle)
class CNUQuaternion; class CNUQuaternion;
/** Unit quaternion, used for all quaternion arithmetic */ /** Unit quaternion, used for all quaternion arithmetic */
class alignas(16) CQuaternion class CQuaternion {
{
#if __atdna__ && ZE_ATHENA_TYPES
float clangVec __attribute__((__vector_size__(16)));
#endif
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CQuaternion() : mSimd(1.f, 0.f, 0.f, 0.f) {}
CQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
CQuaternion() : w(1.0f), x(0.0f), y(0.0f), z(0.0f) {}
CQuaternion(float wi, float xi, float yi, float zi) : w(wi), x(xi), y(yi), z(zi) {}
CQuaternion(float xi, float yi, float zi) { fromVector3f(CVector3f(xi, yi, zi)); } CQuaternion(float xi, float yi, float zi) { fromVector3f(CVector3f(xi, yi, zi)); }
CQuaternion(float wi, const CVector3f& vec) : w(wi), x(vec.x), y(vec.y), z(vec.z) {}
#if ZE_ATHENA_TYPES CQuaternion(float wi, const CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
inline void readBig(athena::io::IStreamReader& input) mSimd[0] = wi;
{
w = input.readFloatBig();
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
}
CQuaternion(const atVec4f& vec)
{
#if __SSE__
mVec128 = vec.mVec128;
#else
x = vec.vec[1];
y = vec.vec[2];
z = vec.vec[3];
w = vec.vec[0];
#endif
} }
operator atVec4f&() template <typename T>
{ CQuaternion(const simd<T>& s) : mSimd(s) {}
return *reinterpret_cast<atVec4f*>(v);
#if ZE_ATHENA_TYPES
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = input.readFloatBig();
mSimd.copy_from(f);
} }
operator const atVec4f&() const
{ CQuaternion(const atVec4f& vec) : mSimd(vec.simd) {}
return *reinterpret_cast<const atVec4f*>(v);
operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
}
operator const atVec4f&() const {
return *reinterpret_cast<const atVec4f*>(this);
} }
#endif #endif
CQuaternion(const CMatrix3f& mat); CQuaternion(const CMatrix3f& mat);
CQuaternion(const CVector3f& vec) { fromVector3f(vec); }
CQuaternion(const CVector4f& vec)
{
#if __SSE__
mVec128 = vec.mVec128;
#else
x = vec[1];
y = vec[2];
z = vec[3];
w = vec[0];
#endif
}
CQuaternion(const CVector3f& vecA, const CVector3f& vecB) CQuaternion(const CVector3f& vec) { fromVector3f(vec); }
{
CQuaternion(const CVector4f& vec) : mSimd(vec.mSimd) {}
CQuaternion(const CVector3f& vecA, const CVector3f& vecB) {
CVector3f vecAN = vecA.normalized(); CVector3f vecAN = vecA.normalized();
CVector3f vecBN = vecB.normalized(); CVector3f vecBN = vecB.normalized();
CVector3f w = vecAN.cross(vecBN); CVector3f w = vecAN.cross(vecBN);
*this = CQuaternion(1.f + vecAN.dot(vecBN), w.x, w.y, w.z).normalized(); *this = CQuaternion(1.f + vecAN.dot(vecBN), w).normalized();
} }
void fromVector3f(const CVector3f& vec); void fromVector3f(const CVector3f& vec);
CQuaternion& operator=(const CQuaternion& q); CQuaternion& operator=(const CQuaternion& q);
CQuaternion operator+(const CQuaternion& q) const; CQuaternion operator+(const CQuaternion& q) const;
CQuaternion operator-(const CQuaternion& q) const; CQuaternion operator-(const CQuaternion& q) const;
CQuaternion operator*(const CQuaternion& q) const; CQuaternion operator*(const CQuaternion& q) const;
CQuaternion operator/(const CQuaternion& q) const; CQuaternion operator/(const CQuaternion& q) const;
CQuaternion operator*(float scale) const; CQuaternion operator*(float scale) const;
CQuaternion operator/(float scale) const; CQuaternion operator/(float scale) const;
CQuaternion operator-() const; CQuaternion operator-() const;
const CQuaternion& operator+=(const CQuaternion& q); const CQuaternion& operator+=(const CQuaternion& q);
const CQuaternion& operator-=(const CQuaternion& q); const CQuaternion& operator-=(const CQuaternion& q);
const CQuaternion& operator*=(const CQuaternion& q); const CQuaternion& operator*=(const CQuaternion& q);
const CQuaternion& operator*=(float scale); const CQuaternion& operator*=(float scale);
const CQuaternion& operator/=(float scale); const CQuaternion& operator/=(float scale);
float magnitude() const { return std::sqrt(magSquared()); } float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return w * w + x * x + y * y + z * z; }
float magSquared() const { return mSimd.dot4(mSimd); }
void normalize() { *this /= magnitude(); } void normalize() { *this /= magnitude(); }
CQuaternion normalized() const { return *this / magnitude(); } CQuaternion normalized() const { return *this / magnitude(); }
void invert(); void invert();
CQuaternion inverse() const; CQuaternion inverse() const;
/** /**
@ -121,27 +126,26 @@ public:
* @param angle The magnitude of the rotation in radians * @param angle The magnitude of the rotation in radians
* @return * @return
*/ */
static inline CQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) static CQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) {
{
return CQuaternion(std::cos(angle / 2.f), axis * std::sin(angle / 2.f)); return CQuaternion(std::cos(angle / 2.f), axis * std::sin(angle / 2.f));
} }
void rotateX(const CRelAngle& angle) { *this *= fromAxisAngle({1.0f, 0.0f, 0.0f}, angle); } void rotateX(const CRelAngle& angle) { *this *= fromAxisAngle({1.0f, 0.0f, 0.0f}, angle); }
void rotateY(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 1.0f, 0.0f}, angle); } void rotateY(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 1.0f, 0.0f}, angle); }
void rotateZ(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 0.0f, 1.0f}, angle); } void rotateZ(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 0.0f, 1.0f}, angle); }
static inline CVector3f rotate(const CQuaternion& rotation, const CAxisAngle& v) static CVector3f rotate(const CQuaternion& rotation, const CAxisAngle& v) {
{
CQuaternion q = rotation * v; CQuaternion q = rotation * v;
q *= rotation.inverse(); q *= rotation.inverse();
return {q.x, q.y, q.z}; return {q.mSimd.shuffle<1, 2, 3, 3>()};
} }
static CQuaternion lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng); static CQuaternion lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng);
CVector3f transform(const CVector3f& v) const CVector3f transform(const CVector3f& v) const {
{
CQuaternion r(0.f, v); CQuaternion r(0.f, v);
return (*this * r * inverse()).getImaginary(); return (*this * r * inverse()).getImaginary();
} }
@ -150,57 +154,75 @@ public:
CQuaternion exp() const; CQuaternion exp() const;
inline CTransform toTransform() const { return CTransform(CMatrix3f(*this)); } CTransform toTransform() const { return CTransform(CMatrix3f(*this)); }
inline CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
inline float dot(const CQuaternion& rhs) const CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
{
#if __SSE__ float dot(const CQuaternion& rhs) const {
TVectorUnion result; return mSimd.dot4(rhs.mSimd);
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
} }
static CQuaternion lerp(const CQuaternion& a, const CQuaternion& b, double t); static CQuaternion lerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerp(const CQuaternion& a, const CQuaternion& b, double t); static CQuaternion slerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerpShort(const CQuaternion& a, const CQuaternion& b, double t); static CQuaternion slerpShort(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion nlerp(const CQuaternion& a, const CQuaternion& b, double t); static CQuaternion nlerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1); static CQuaternion shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1);
static CQuaternion clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1, static CQuaternion clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle); const zeus::CRelAngle& angle);
inline float roll() const { return std::atan2(2.f * (x * y + w * z), w * w + x * x - y * y - z * z); } float roll() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[1] * f[2] + f[0] * f[3]), f[0] * f[0] + f[1] * f[1] - f[2] * f[2] - f[3] * f[3]);
}
inline float pitch() const { return std::atan2(2.f * (y * z + w * x), w * w - x * x - y * y + z * z); } float pitch() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[2] * f[3] + f[0] * f[1]), f[0] * f[0] - f[1] * f[1] - f[2] * f[2] + f[3] * f[3]);
}
inline float yaw() const { return std::asin(-2.f * (x * z - w * y)); } float yaw() const {
simd_floats f(mSimd);
return std::asin(-2.f * (f[1] * f[3] - f[0] * f[2]));
}
CQuaternion buildEquivalent() const; CQuaternion buildEquivalent() const;
zeus::CVector3f getImaginary() const { return {x, y, z}; } zeus::CVector3f getImaginary() const { return mSimd.shuffle<1, 2, 3, 3>(); }
void setImaginary(const zeus::CVector3f& i) { x = i.x; y = i.y; z = i.z; }
void setImaginary(const zeus::CVector3f& i) {
x() = i.x();
y() = i.y();
z() = i.z();
}
CRelAngle angleFrom(const zeus::CQuaternion& other); CRelAngle angleFrom(const zeus::CQuaternion& other);
inline float& operator[](size_t idx) { assert(idx < 4); return (&w)[idx]; } simd<float>::reference operator[](size_t idx) {
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&w)[idx]; } assert(idx < 4);
return mSimd[idx];
}
union float operator[](size_t idx) const {
{ assert(idx < 4);
__m128 mVec128; return mSimd[idx];
struct }
{
float w, x, y, z; float w() const { return mSimd[0]; }
}; float x() const { return mSimd[1]; }
float v[4]; float y() const { return mSimd[2]; }
}; float z() const { return mSimd[3]; }
simd<float>::reference w() { return mSimd[0]; }
simd<float>::reference x() { return mSimd[1]; }
simd<float>::reference y() { return mSimd[2]; }
simd<float>::reference z() { return mSimd[3]; }
simd<float> mSimd;
static const CQuaternion skNoRotation; static const CQuaternion skNoRotation;
@ -210,60 +232,79 @@ public:
/** Non-unit quaternion, no guarantee that it's normalized. /** Non-unit quaternion, no guarantee that it's normalized.
* Converting to CQuaternion will perform normalize operation. * Converting to CQuaternion will perform normalize operation.
*/ */
class alignas(16) CNUQuaternion class CNUQuaternion {
{
public: public:
CNUQuaternion() : w(1.0f), x(0.0f), y(0.0f), z(0.0f) {} CNUQuaternion() : mSimd(1.f, 0.f, 0.f, 0.f) {}
CNUQuaternion(float wi, float xi, float yi, float zi) : w(wi), x(xi), y(yi), z(zi) {}
CNUQuaternion(float win, const zeus::CVector3f& vec) { w = win; x = vec.x; y = vec.y; z = vec.z; } CNUQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
CNUQuaternion(const CQuaternion& other) { w = other.w; x = other.x; y = other.y; z = other.z; }
CNUQuaternion(float win, const zeus::CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
w() = win;
}
CNUQuaternion(const CQuaternion& other) : mSimd(other.mSimd) {}
CNUQuaternion(const CMatrix3f& mtx) : CNUQuaternion(CQuaternion(mtx)) {} CNUQuaternion(const CMatrix3f& mtx) : CNUQuaternion(CQuaternion(mtx)) {}
static inline CNUQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle)
{ CNUQuaternion(const simd<float>& s) : mSimd(s) {}
static CNUQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) {
return CNUQuaternion(CQuaternion::fromAxisAngle(axis, angle)); return CNUQuaternion(CQuaternion::fromAxisAngle(axis, angle));
} }
float magnitude() const { return std::sqrt(magSquared()); } float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return w * w + x * x + y * y + z * z; }
void normalize() float magSquared() const { return mSimd.dot4(mSimd); }
{
void normalize() {
float magDiv = 1.f / magnitude(); float magDiv = 1.f / magnitude();
w *= magDiv; mSimd *= magDiv;
x *= magDiv;
y *= magDiv;
z *= magDiv;
} }
CNUQuaternion normalized() const
{ CNUQuaternion normalized() const {
float magDiv = 1.f / magnitude(); float magDiv = 1.f / magnitude();
return { w * magDiv, x * magDiv, y * magDiv, z * magDiv }; return mSimd * simd<float>(magDiv);
} }
CNUQuaternion operator*(const CNUQuaternion& q) const; CNUQuaternion operator*(const CNUQuaternion& q) const;
CNUQuaternion operator*(float f) const; CNUQuaternion operator*(float f) const;
const CNUQuaternion& operator+=(const CNUQuaternion& q); const CNUQuaternion& operator+=(const CNUQuaternion& q);
inline float& operator[](size_t idx) { assert(idx < 4); return (&w)[idx]; } zeus::simd<float>::reference operator[](size_t idx) {
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&w)[idx]; } assert(idx < 4);
return mSimd[idx];
}
union float operator[](size_t idx) const {
{ assert(idx < 4);
__m128 mVec128; return mSimd[idx];
struct }
{
float w, x, y, z; float w() const { return mSimd[0]; }
}; float x() const { return mSimd[1]; }
}; float y() const { return mSimd[2]; }
float z() const { return mSimd[3]; }
simd<float>::reference w() { return mSimd[0]; }
simd<float>::reference x() { return mSimd[1]; }
simd<float>::reference y() { return mSimd[2]; }
simd<float>::reference z() { return mSimd[3]; }
simd<float> mSimd;
}; };
inline CQuaternion CQuaternion::fromNUQuaternion(const CNUQuaternion& q) inline CQuaternion CQuaternion::fromNUQuaternion(const CNUQuaternion& q) {
{
auto norm = q.normalized(); auto norm = q.normalized();
return { norm.w, norm.x, norm.y, norm.z }; return norm.mSimd;
} }
CQuaternion operator+(float lhs, const CQuaternion& rhs); CQuaternion operator+(float lhs, const CQuaternion& rhs);
CQuaternion operator-(float lhs, const CQuaternion& rhs); CQuaternion operator-(float lhs, const CQuaternion& rhs);
CQuaternion operator*(float lhs, const CQuaternion& rhs); CQuaternion operator*(float lhs, const CQuaternion& rhs);
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs); CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs);
} }

View File

@ -1,28 +1,26 @@
#pragma once #pragma once
#include "zeus/CVector2f.hpp" #include "zeus/CVector2f.hpp"
namespace zeus namespace zeus {
{ class CRectangle {
class CRectangle
{
public: public:
CRectangle() {} CRectangle() {}
CRectangle(float x, float y, float w, float h) : position(x, y), size(w, h) {} CRectangle(float x, float y, float w, float h) : position(x, y), size(w, h) {}
inline bool contains(const CVector2f& point) const bool contains(const CVector2f& point) const {
{ if (point.x() < position.x() || point.x() > position.x() + size.x())
if (point.x < position.x || point.x > position.x + size.x)
return false; return false;
if (point.y < position.y || point.y > position.y + size.y) if (point.y() < position.y() || point.y() > position.y() + size.y())
return false; return false;
return true; return true;
} }
inline bool intersects(const CRectangle& rect) const bool intersects(const CRectangle& rect) const {
{ return !(position.x() > rect.position.x() + rect.size.x() || rect.position.x() > position.x() + size.x() ||
return !(position.x > rect.position.x + rect.size.x || rect.position.x > position.x + size.x || position.y() > rect.position.y() + rect.size.y() || rect.position.y() > position.y() + size.y());
position.y > rect.position.y + rect.size.y || rect.position.y > position.y + size.y);
} }
CVector2f position; CVector2f position;

View File

@ -4,17 +4,14 @@
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include <cmath> #include <cmath>
namespace zeus namespace zeus {
{
/** /**
* @brief The CRelAngle class represents relative angle in radians * @brief The CRelAngle class represents relative angle in radians
*/ */
struct CRelAngle struct CRelAngle {
{
float angle = 0.f; float angle = 0.f;
static float MakeRelativeAngle(float angle) static float MakeRelativeAngle(float angle) {
{
float absAngle = std::fabs(angle); float absAngle = std::fabs(angle);
if (absAngle == 2.f * M_PIF) if (absAngle == 2.f * M_PIF)
return std::copysign(absAngle, angle); return std::copysign(absAngle, angle);
@ -23,32 +20,76 @@ struct CRelAngle
} }
CRelAngle() = default; CRelAngle() = default;
CRelAngle(float angle) : angle(MakeRelativeAngle(angle)) {} CRelAngle(float angle) : angle(MakeRelativeAngle(angle)) {}
CRelAngle& operator=(float ang) { angle = MakeRelativeAngle(ang); return *this; }
CRelAngle& operator=(const CRelAngle& ang) { angle = ang.angle; return *this; } CRelAngle& operator=(float ang) {
angle = MakeRelativeAngle(ang);
return *this;
}
CRelAngle& operator=(const CRelAngle& ang) {
angle = ang.angle;
return *this;
}
float asDegrees() const { return radToDeg(angle); } float asDegrees() const { return radToDeg(angle); }
float asRadians() const { return angle; } float asRadians() const { return angle; }
float arcCosine() const { return std::acos(angle); } float arcCosine() const { return std::acos(angle); }
static CRelAngle FromDegrees(float angle) static CRelAngle FromDegrees(float angle) {
{
CRelAngle ret; CRelAngle ret;
ret.angle = MakeRelativeAngle(degToRad(angle)); ret.angle = MakeRelativeAngle(degToRad(angle));
return ret; return ret;
} }
operator float() const { return angle; } operator float() const { return angle; }
static CRelAngle FromRadians(float angle) { return CRelAngle(angle); } static CRelAngle FromRadians(float angle) { return CRelAngle(angle); }
bool operator <(const CRelAngle& other) const { return angle < other.angle; } bool operator<(const CRelAngle& other) const { return angle < other.angle; }
CRelAngle& operator +=(const CRelAngle& other) { angle = MakeRelativeAngle(angle + other.angle); return *this; }
CRelAngle& operator +=(float r) { angle = MakeRelativeAngle(angle + r); return *this; } CRelAngle& operator+=(const CRelAngle& other) {
CRelAngle& operator -=(const CRelAngle& other) { angle = MakeRelativeAngle(angle - other.angle); return *this; } angle = MakeRelativeAngle(angle + other.angle);
CRelAngle& operator -=(float r) { angle = MakeRelativeAngle(angle - r); return *this; } return *this;
CRelAngle& operator *=(const CRelAngle& other) { angle = MakeRelativeAngle(angle * other.angle); return *this; } }
CRelAngle& operator *=(float r) { angle = MakeRelativeAngle(angle * r); return *this; }
CRelAngle& operator /=(const CRelAngle& other) { angle = MakeRelativeAngle(angle / other.angle); return *this; } CRelAngle& operator+=(float r) {
CRelAngle& operator /=(float r) { angle = MakeRelativeAngle(angle / r); return *this; } angle = MakeRelativeAngle(angle + r);
return *this;
}
CRelAngle& operator-=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle - other.angle);
return *this;
}
CRelAngle& operator-=(float r) {
angle = MakeRelativeAngle(angle - r);
return *this;
}
CRelAngle& operator*=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle * other.angle);
return *this;
}
CRelAngle& operator*=(float r) {
angle = MakeRelativeAngle(angle * r);
return *this;
}
CRelAngle& operator/=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle / other.angle);
return *this;
}
CRelAngle& operator/=(float r) {
angle = MakeRelativeAngle(angle / r);
return *this;
}
}; };
} }

View File

@ -2,19 +2,14 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CSphere {
class alignas(16) CSphere
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CSphere(const CVector3f& position, float radius) : position(position), radius(radius) {} CSphere(const CVector3f& position, float radius) : position(position), radius(radius) {}
inline CVector3f getSurfaceNormal(const CVector3f& coord) const { return (coord - position).normalized(); } CVector3f getSurfaceNormal(const CVector3f& coord) const { return (coord - position).normalized(); }
inline bool intersects(const CSphere& other) bool intersects(const CSphere& other) {
{
float dist = (position - other.position).magnitude(); float dist = (position - other.position).magnitude();
return dist < (radius + other.radius); return dist < (radius + other.radius);
} }

View File

@ -8,102 +8,107 @@
#include <cstdint> #include <cstdint>
#include <cstdio> #include <cstdio>
namespace zeus namespace zeus {
{ class CTransform {
class alignas(16) CTransform
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CTransform() : basis(false) {} CTransform() : basis(false) {}
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero) : basis(basis), origin(offset) {}
#if ZE_ATHENA_TYPES
CTransform(const atVec4f* mtx) : basis(mtx[0], mtx[1], mtx[2]), origin(mtx[0].vec[3], mtx[1].vec[3], mtx[2].vec[3]) {}
void read34RowMajor(athena::io::IStreamReader& r) CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero)
{ : basis(basis), origin(offset) {}
#if ZE_ATHENA_TYPES
CTransform(const atVec4f* mtx)
: basis(mtx[0], mtx[1], mtx[2])
, origin(mtx[0].simd[3], mtx[1].simd[3], mtx[2].simd[3]) {}
void read34RowMajor(athena::io::IStreamReader& r) {
atVec4f r0 = r.readVec4fBig(); atVec4f r0 = r.readVec4fBig();
atVec4f r1 = r.readVec4fBig(); atVec4f r1 = r.readVec4fBig();
atVec4f r2 = r.readVec4fBig(); atVec4f r2 = r.readVec4fBig();
basis = CMatrix3f(r0, r1, r2); basis = CMatrix3f(r0, r1, r2);
basis.transpose(); basis.transpose();
origin = CVector3f(r0.vec[3], r1.vec[3], r2.vec[3]); origin = CVector3f(r0.simd[3], r1.simd[3], r2.simd[3]);
} }
#endif #endif
/* Column constructor */ /* Column constructor */
CTransform(const CVector3f& c0, const CVector3f& c1, const CVector3f& c2, const CVector3f& c3) CTransform(const CVector3f& c0, const CVector3f& c1, const CVector3f& c2, const CVector3f& c3)
: basis(c0, c1, c2), origin(c3) {} : basis(c0, c1, c2), origin(c3) {}
static inline CTransform Identity() { return CTransform(CMatrix3f::skIdentityMatrix3f); } static CTransform Identity() {
return CTransform(CMatrix3f::skIdentityMatrix3f);
}
inline bool operator ==(const CTransform& other) const bool operator==(const CTransform& other) const {
{
return origin == other.origin && basis == other.basis; return origin == other.origin && basis == other.basis;
} }
inline CTransform operator*(const CTransform& rhs) const CTransform operator*(const CTransform& rhs) const {
{
return CTransform(basis * rhs.basis, origin + (basis * rhs.origin)); return CTransform(basis * rhs.basis, origin + (basis * rhs.origin));
} }
inline CTransform inverse() const CTransform inverse() const {
{
CMatrix3f inv = basis.inverted(); CMatrix3f inv = basis.inverted();
return CTransform(inv, inv * -origin); return CTransform(inv, inv * -origin);
} }
static inline CTransform Translate(const CVector3f& position) { return {CMatrix3f::skIdentityMatrix3f, position}; } static CTransform Translate(const CVector3f& position) {
return {CMatrix3f::skIdentityMatrix3f, position};
}
static inline CTransform Translate(float x, float y, float z) { return Translate({x, y, z}); } static CTransform Translate(float x, float y, float z) {
return Translate({x, y, z});
}
inline CTransform operator+(const CVector3f& other) { return CTransform(basis, origin + other); } CTransform operator+(const CVector3f& other) {
return CTransform(basis, origin + other);
}
inline CTransform& operator+=(const CVector3f& other) CTransform& operator+=(const CVector3f& other) {
{
origin += other; origin += other;
return *this; return *this;
} }
inline CTransform operator-(const CVector3f& other) { return CTransform(basis, origin - other); } CTransform operator-(const CVector3f& other) {
return CTransform(basis, origin - other);
}
inline CTransform& operator-=(const CVector3f& other) CTransform& operator-=(const CVector3f& other) {
{
origin -= other; origin -= other;
return *this; return *this;
} }
inline zeus::CVector3f rotate(const CVector3f& vec) const { return basis * vec; } zeus::CVector3f rotate(const CVector3f& vec) const {
return basis * vec;
static inline CTransform RotateX(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{1.f, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, cosT, sinT, 0.f}},
TVectorUnion{{0.f, -sinT, cosT, 0.f}}));
} }
static inline CTransform RotateY(float theta) static CTransform RotateX(float theta) {
{
float sinT = std::sin(theta); float sinT = std::sin(theta);
float cosT = std::cos(theta); float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{cosT, 0.f, -sinT, 0.f}}, return CTransform(CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
TVectorUnion{{0.f, 1.f, 0.f, 0.f}}, simd<float>{0.f, cosT, sinT, 0.f},
TVectorUnion{{sinT, 0.f, cosT, 0.f}})); simd<float>{0.f, -sinT, cosT, 0.f}));
} }
static inline CTransform RotateZ(float theta) static CTransform RotateY(float theta) {
{
float sinT = std::sin(theta); float sinT = std::sin(theta);
float cosT = std::cos(theta); float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{cosT, sinT, 0.f, 0.f}}, return CTransform(CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
TVectorUnion{{-sinT, cosT, 0.f, 0.f}}, simd<float>{0.f, 1.f, 0.f, 0.f},
TVectorUnion{{0.f, 0.f, 1.f, 0.f}})); simd<float>{sinT, 0.f, cosT, 0.f}));
} }
inline void rotateLocalX(float theta) static CTransform RotateZ(float theta) {
{ float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
simd<float>{-sinT, cosT, 0.f, 0.f},
simd<float>{0.f, 0.f, 1.f, 0.f}));
}
void rotateLocalX(float theta) {
float sinT = std::sin(theta); float sinT = std::sin(theta);
float cosT = std::cos(theta); float cosT = std::cos(theta);
@ -118,8 +123,7 @@ public:
basis[2] -= b1; basis[2] -= b1;
} }
inline void rotateLocalY(float theta) void rotateLocalY(float theta) {
{
float sinT = std::sin(theta); float sinT = std::sin(theta);
float cosT = std::cos(theta); float cosT = std::cos(theta);
@ -134,8 +138,7 @@ public:
basis[0] -= b2; basis[0] -= b2;
} }
inline void rotateLocalZ(float theta) void rotateLocalZ(float theta) {
{
float sinT = std::sin(theta); float sinT = std::sin(theta);
float cosT = std::cos(theta); float cosT = std::cos(theta);
@ -150,66 +153,67 @@ public:
basis[1] -= b0; basis[1] -= b0;
} }
inline CVector3f transposeRotate(const CVector3f& in) const CVector3f transposeRotate(const CVector3f& in) const {
{
return CVector3f(basis[0].dot(in), basis[1].dot(in), basis[2].dot(in)); return CVector3f(basis[0].dot(in), basis[1].dot(in), basis[2].dot(in));
} }
inline void scaleBy(float factor) void scaleBy(float factor) {
{
CTransform xfrm(CMatrix3f(CVector3f(factor, factor, factor))); CTransform xfrm(CMatrix3f(CVector3f(factor, factor, factor)));
*this = *this * xfrm; *this = *this * xfrm;
} }
static inline CTransform Scale(const CVector3f& factor) static CTransform Scale(const CVector3f& factor) {
{ return CTransform(CMatrix3f(simd<float>{factor.x(), 0.f, 0.f, 0.f},
return CTransform(CMatrix3f(TVectorUnion{{factor.x, 0.f, 0.f, 0.f}}, simd<float>{0.f, factor.y(), 0.f, 0.f},
TVectorUnion{{0.f, factor.y, 0.f, 0.f}}, simd<float>{0.f, 0.f, factor.z(), 0.f}));
TVectorUnion{{0.f, 0.f, factor.z, 0.f}}));
} }
static inline CTransform Scale(float x, float y, float z) static CTransform Scale(float x, float y, float z) {
{ return CTransform(CMatrix3f(simd<float>{x, 0.f, 0.f, 0.f},
return CTransform( simd<float>{0.f, y, 0.f, 0.f},
CMatrix3f(TVectorUnion{{x, 0.f, 0.f, 0.f}}, simd<float>{0.f, 0.f, z, 0.f}));
TVectorUnion{{0.f, y, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, z, 0.f}}));
} }
static inline CTransform Scale(float factor) static CTransform Scale(float factor) {
{ return CTransform(CMatrix3f(simd<float>{factor, 0.f, 0.f, 0.f},
return CTransform(CMatrix3f(TVectorUnion{{factor, 0.f, 0.f, 0.f}}, simd<float>{0.f, factor, 0.f, 0.f},
TVectorUnion{{0.f, factor, 0.f, 0.f}}, simd<float>{0.f, 0.f, factor, 0.f}));
TVectorUnion{{0.f, 0.f, factor, 0.f}}));
} }
inline CTransform multiplyIgnoreTranslation(const CTransform& xfrm) const CTransform multiplyIgnoreTranslation(const CTransform& xfrm) const {
{
CTransform ret; CTransform ret;
ret.basis = basis * xfrm.basis; ret.basis = basis * xfrm.basis;
return ret; return ret;
} }
inline CTransform getRotation() const CTransform getRotation() const {
{
CTransform ret = *this; CTransform ret = *this;
ret.origin.zeroOut(); ret.origin.zeroOut();
return ret; return ret;
} }
void setRotation(const CMatrix3f& mat) { basis = mat; }
void setRotation(const CTransform& xfrm) { setRotation(xfrm.basis); } void setRotation(const CMatrix3f& mat) {
basis = mat;
}
void setRotation(const CTransform& xfrm) {
setRotation(xfrm.basis);
}
/** /**
* @brief buildMatrix3f Returns the stored matrix * @brief buildMatrix3f Returns the stored matrix
* buildMatrix3f is here for compliance with Retro's Math API * buildMatrix3f is here for compliance with Retro's Math API
* @return The Matrix (Neo, you are the one) * @return The Matrix (Neo, you are the one)
*/ */
inline const CMatrix3f& buildMatrix3f() const { return basis; } const CMatrix3f& buildMatrix3f() const {
return basis;
}
inline CVector3f operator*(const CVector3f& other) const { return origin + basis * other; } CVector3f operator*(const CVector3f& other) const {
return origin + basis * other;
}
inline CMatrix4f toMatrix4f() const CMatrix4f toMatrix4f() const {
{
CMatrix4f ret(basis[0], basis[1], basis[2], origin); CMatrix4f ret(basis[0], basis[1], basis[2], origin);
ret[0][3] = 0.0f; ret[0][3] = 0.0f;
ret[1][3] = 0.0f; ret[1][3] = 0.0f;
@ -218,31 +222,26 @@ public:
return ret; return ret;
} }
inline CVector3f upVector() const CVector3f upVector() const {
{ return basis.m[2];
return basis.vec[2];
} }
inline CVector3f frontVector() const CVector3f frontVector() const {
{ return basis.m[1];
return basis.vec[1];
} }
inline CVector3f rightVector() const CVector3f rightVector() const {
{ return basis.m[0];
return basis.vec[0];
} }
inline void orthonormalize() void orthonormalize() {
{
basis[0].normalize(); basis[0].normalize();
basis[2] = basis[0].cross(basis[1]); basis[2] = basis[0].cross(basis[1]);
basis[2].normalize(); basis[2].normalize();
basis[1] = basis[2].cross(basis[0]); basis[1] = basis[2].cross(basis[0]);
} }
void printMatrix() const void printMatrix() const {
{
printf("%f %f %f %f\n" printf("%f %f %f %f\n"
"%f %f %f %f\n" "%f %f %f %f\n"
"%f %f %f %f\n" "%f %f %f %f\n"
@ -253,10 +252,9 @@ public:
0.f, 0.f, 0.f, 1.f); 0.f, 0.f, 0.f, 1.f);
} }
static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec) static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec) {
{
uint32_t i; uint32_t i;
if (uVec.y < uVec.x || uVec.z < uVec.y || uVec.z < uVec.x) if (uVec.y() < uVec.x() || uVec.z() < uVec.y() || uVec.z() < uVec.x())
i = 2; i = 2;
else else
i = 1; i = 1;
@ -271,10 +269,16 @@ public:
CVector3f origin; CVector3f origin;
}; };
static inline CTransform CTransformFromScaleVector(const CVector3f& scale) { return CTransform(CMatrix3f(scale)); } static inline CTransform CTransformFromScaleVector(const CVector3f& scale) {
return CTransform(CMatrix3f(scale));
}
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec); CTransform CTransformFromEditorEuler(const CVector3f& eulerVec);
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin); CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin);
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle); CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle);
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up = CVector3f::skUp); CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up = CVector3f::skUp);
} }

View File

@ -2,21 +2,17 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CUnitVector3f : public CVector3f {
class alignas(16) CUnitVector3f : public CVector3f
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CUnitVector3f() : CVector3f(0.f, 1.f, 0.f) {}
CUnitVector3f() : CVector3f(0, 1, 0) {} CUnitVector3f(float x, float y, float z, bool doNormalize = true) : CVector3f(x, y, z) {
CUnitVector3f(float x, float y, float z, bool doNormalize = true) : CVector3f(x, y, z)
{
if (doNormalize && canBeNormalized()) if (doNormalize && canBeNormalized())
normalize(); normalize();
} }
CUnitVector3f(const CVector3f& vec, bool doNormalize = true) : CVector3f(vec)
{ CUnitVector3f(const CVector3f& vec, bool doNormalize = true) : CVector3f(vec) {
if (doNormalize && canBeNormalized()) if (doNormalize && canBeNormalized())
normalize(); normalize();
} }

View File

@ -2,431 +2,260 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include <cassert> #include <cassert>
namespace zeus namespace zeus {
{ class CVector2f {
class alignas(16) CVector2f
{
#if __atdna__
float clangVec __attribute__((__vector_size__(8)));
#endif
public: public:
// ZE_DECLARE_ALIGNED_ALLOCATOR(); simd<float> mSimd;
union { CVector2f() : mSimd(0.f) {}
struct
{ template <typename T>
float x, y; CVector2f(const simd<T>& s) : mSimd(s) {}
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
inline CVector2f() { zeroOut(); }
#if __SSE__
CVector2f(const __m128& mVec128) : mVec128(mVec128)
{
v[2] = 0.0f;
v[3] = 0.0f;
}
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CVector2f(const atVec2f& vec)
#if __SSE__
: mVec128(vec.mVec128)
{
}
#else
{
x = vec.vec[0], y = vec.vec[1], v[2] = 0.0f, v[3] = 0.0f;
}
#endif
operator atVec2f&() CVector2f(const atVec2f& vec) : mSimd(vec.simd) {}
{
return *reinterpret_cast<atVec2f*>(v); operator atVec2f&() {
} return *reinterpret_cast<atVec2f*>(this);
operator const atVec2f&() const
{
return *reinterpret_cast<const atVec2f*>(v);
} }
void readBig(athena::io::IStreamReader& input) operator const atVec2f&() const {
{ return *reinterpret_cast<const atVec2f*>(this);
x = input.readFloatBig();
y = input.readFloatBig();
v[2] = 0.0f;
v[3] = 0.0f;
} }
static CVector2f ReadBig(athena::io::IStreamReader& input) void readBig(athena::io::IStreamReader& input) {
{ mSimd[0] = input.readFloatBig();
mSimd[1] = input.readFloatBig();
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
static CVector2f ReadBig(athena::io::IStreamReader& input) {
CVector2f ret; CVector2f ret;
ret.readBig(input); ret.readBig(input);
return ret; return ret;
} }
#endif #endif
explicit CVector2f(float xy) { splat(xy); } explicit CVector2f(float xy) { splat(xy); }
void assign(float x, float y)
{ void assign(float x, float y) {
v[0] = x; mSimd[0] = x;
v[1] = y; mSimd[1] = y;
v[2] = 0.0f; mSimd[2] = 0.0f;
v[3] = 0.0f; mSimd[3] = 0.0f;
} }
CVector2f(float x, float y) { assign(x, y); } CVector2f(float x, float y) { assign(x, y); }
inline bool operator==(const CVector2f& rhs) const { return (x == rhs.x && y == rhs.y); } bool operator==(const CVector2f& rhs) const {
inline bool operator!=(const CVector2f& rhs) const { return !(*this == rhs); } return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1];
inline bool operator<(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x < rhs.x || y < rhs.y);
#endif
}
inline bool operator<=(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x <= rhs.x || y <= rhs.y);
#endif
}
inline bool operator>(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x > rhs.x || y > rhs.y);
#endif
}
inline bool operator>=(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x >= rhs.x || y >= rhs.y);
#endif
} }
inline CVector2f operator+(const CVector2f& rhs) const bool operator!=(const CVector2f& rhs) const {
{ return mSimd[0] != rhs.mSimd[0] || mSimd[1] != rhs.mSimd[1];
#if __SSE__
return CVector2f(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x + rhs.x, y + rhs.y);
#endif
} }
inline CVector2f operator-(const CVector2f& rhs) const
{ bool operator<(const CVector2f& rhs) const {
#if __SSE__ return mSimd[0] < rhs.mSimd[0] && mSimd[1] < rhs.mSimd[1];
return CVector2f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x - rhs.x, y - rhs.y);
#endif
} }
inline CVector2f operator-() const
{ bool operator<=(const CVector2f& rhs) const {
#if __SSE__ return mSimd[0] <= rhs.mSimd[0] && mSimd[1] <= rhs.mSimd[1];
return CVector2f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#else
return CVector2f(-x, -y);
#endif
} }
inline CVector2f operator*(const CVector2f& rhs) const
{ bool operator>(const CVector2f& rhs) const {
#if __SSE__ return mSimd[0] > rhs.mSimd[0] && mSimd[1] > rhs.mSimd[1];
return CVector2f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x * rhs.x, y * rhs.y);
#endif
} }
inline CVector2f operator/(const CVector2f& rhs) const
{ bool operator>=(const CVector2f& rhs) const {
#if __SSE__ return mSimd[0] >= rhs.mSimd[0] && mSimd[1] >= rhs.mSimd[1];
return CVector2f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x / rhs.x, y / rhs.y);
#endif
} }
inline CVector2f operator+(float val) const
{ CVector2f operator+(const CVector2f& rhs) const {
#if __SSE__ return mSimd + rhs.mSimd;
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector2f(x + val, y + val);
#endif
} }
inline CVector2f operator-(float val) const
{ CVector2f operator-(const CVector2f& rhs) const {
#if __SSE__ return mSimd - rhs.mSimd;
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CVector2f(x - val, y - val);
#endif
} }
inline CVector2f operator*(float val) const
{ CVector2f operator-() const {
#if __SSE__ return -mSimd;
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x * val, y * val);
#endif
} }
inline CVector2f operator/(float val) const
{ CVector2f operator*(const CVector2f& rhs) const {
return mSimd * rhs.mSimd;
}
CVector2f operator/(const CVector2f& rhs) const {
return mSimd / rhs.mSimd;
}
CVector2f operator+(float val) const {
return mSimd + simd<float>(val);
}
CVector2f operator-(float val) const {
return mSimd - simd<float>(val);
}
CVector2f operator*(float val) const {
return mSimd * simd<float>(val);
}
CVector2f operator/(float val) const {
float ooval = 1.f / val; float ooval = 1.f / val;
#if __SSE__ return mSimd * simd<float>(ooval);
TVectorUnion splat = {{ooval, ooval, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x * ooval, y * ooval);
#endif
} }
inline const CVector2f& operator+=(const CVector2f& rhs)
{ const CVector2f& operator+=(const CVector2f& rhs) {
#if __SSE__ mSimd += rhs.mSimd;
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
#endif
return *this; return *this;
} }
inline const CVector2f& operator-=(const CVector2f& rhs)
{ const CVector2f& operator-=(const CVector2f& rhs) {
#if __SSE__ mSimd -= rhs.mSimd;
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
#endif
return *this; return *this;
} }
inline const CVector2f& operator*=(const CVector2f& rhs)
{ const CVector2f& operator*=(const CVector2f& rhs) {
#if __SSE__ mSimd *= rhs.mSimd;
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
#endif
return *this; return *this;
} }
inline const CVector2f& operator/=(const CVector2f& rhs)
{ const CVector2f& operator/=(const CVector2f& rhs) {
#if __SSE__ mSimd /= rhs.mSimd;
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
#endif
return *this; return *this;
} }
inline const CVector2f& operator+=(float rhs)
{ const CVector2f& operator+=(float rhs) {
#if __SSE__ mSimd += simd<float>(rhs);
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_add_ps(mVec128, splat.mVec128);
#else
x += rhs;
y += rhs;
#endif
return *this; return *this;
} }
inline const CVector2f& operator-=(float rhs)
{ const CVector2f& operator-=(float rhs) {
#if __SSE__ mSimd -= simd<float>(rhs);
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_sub_ps(mVec128, splat.mVec128);
#else
x -= rhs;
y -= rhs;
#endif
return *this; return *this;
} }
inline const CVector2f& operator*=(float rhs)
{ const CVector2f& operator*=(float rhs) {
#if __SSE__ mSimd *= simd<float>(rhs);
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x *= rhs;
y *= rhs;
#endif
return *this; return *this;
} }
inline const CVector2f& operator/=(float rhs)
{ const CVector2f& operator/=(float rhs) {
float oorhs = 1.f / rhs; float oorhs = 1.f / rhs;
#if __SSE__ mSimd /= simd<float>(oorhs);
TVectorUnion splat = {{oorhs, oorhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x *= oorhs;
y *= oorhs;
#endif
return *this; return *this;
} }
inline void normalize()
{ void normalize() {
float mag = magnitude(); float mag = magnitude();
mag = 1.f / mag; mag = 1.f / mag;
*this *= CVector2f(mag); *this *= CVector2f(mag);
} }
inline CVector2f normalized() const CVector2f normalized() const {
{
float mag = magnitude(); float mag = magnitude();
mag = 1.f / mag; mag = 1.f / mag;
return *this * mag; return *this * mag;
} }
inline CVector2f perpendicularVector() const { return {-y, x}; } CVector2f perpendicularVector() const { return {-y(), x()}; }
inline float cross(const CVector2f& rhs) const { return (x * rhs.y) - (y * rhs.x); } float cross(const CVector2f& rhs) const { return (x() * rhs.y()) - (y() * rhs.x()); }
inline float dot(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1];
#endif
#else
return (x * rhs.x) + (y * rhs.y);
#endif
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1];
#endif
#else
return x * x + y * y;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
inline void zeroOut() float dot(const CVector2f& rhs) const {
{ return mSimd.dot2(rhs.mSimd);
}
float magSquared() const {
return mSimd.dot2(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
void zeroOut() {
*this = CVector2f::skZero; *this = CVector2f::skZero;
} }
inline void splat(float xy) void splat(float xy) {
{ mSimd = zeus::simd<float>(xy);
#if __SSE__
TVectorUnion splat = {{xy, xy, 0.0f, 0.0f}};
mVec128 = splat.mVec128;
#else
v[0] = xy;
v[1] = xy;
v[2] = 0.0f;
v[3] = 0.0f;
#endif
} }
static float getAngleDiff(const CVector2f& a, const CVector2f& b); static float getAngleDiff(const CVector2f& a, const CVector2f& b);
static inline CVector2f lerp(const CVector2f& a, const CVector2f& b, float t) { return (a + (b - a) * t); } static CVector2f lerp(const CVector2f& a, const CVector2f& b, float t) {
static inline CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) { return lerp(a, b, t).normalized(); } return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) {
return lerp(a, b, t).normalized();
}
static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t); static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; } bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
inline bool canBeNormalized() const bool canBeNormalized() const {
{ if (std::isinf(x()) || std::isinf(y()))
if (std::isinf(x) || std::isinf(y))
return false; return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON; return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON;
} }
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; } bool isZero() const {
return magSquared() <= FLT_EPSILON;
}
inline bool isEqu(const CVector2f& other, float epsilon = 1.1920929e-7f) bool isEqu(const CVector2f& other, float epsilon = FLT_EPSILON) {
{
const CVector2f diffVec = other - *this; const CVector2f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon); return (diffVec.x() <= epsilon && diffVec.y() <= epsilon);
} }
inline float& operator[](size_t idx) { assert(idx < 2); return (&x)[idx]; } zeus::simd<float>::reference operator[](size_t idx) {
inline const float& operator[](size_t idx) const { assert(idx < 2); return (&x)[idx]; } assert(idx < 2);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 2);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
static const CVector2f skOne; static const CVector2f skOne;
static const CVector2f skNegOne; static const CVector2f skNegOne;
static const CVector2f skZero; static const CVector2f skZero;
}; };
static inline CVector2f operator+(float lhs, const CVector2f& rhs) static inline CVector2f operator+(float lhs, const CVector2f& rhs) {
{ return zeus::simd<float>(lhs) + rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs + rhs.x, lhs + rhs.y);
#endif
} }
static inline CVector2f operator-(float lhs, const CVector2f& rhs) static inline CVector2f operator-(float lhs, const CVector2f& rhs) {
{ return zeus::simd<float>(lhs) - rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs - rhs.x, lhs - rhs.y);
#endif
} }
static inline CVector2f operator*(float lhs, const CVector2f& rhs) static inline CVector2f operator*(float lhs, const CVector2f& rhs) {
{ return zeus::simd<float>(lhs) * rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs * rhs.x, lhs * rhs.y);
#endif
} }
static inline CVector2f operator/(float lhs, const CVector2f& rhs) static inline CVector2f operator/(float lhs, const CVector2f& rhs) {
{ return zeus::simd<float>(lhs) / rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs / rhs.x, lhs / rhs.y);
#endif
} }
} }

View File

@ -5,54 +5,55 @@
#include "CVector2f.hpp" #include "CVector2f.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp> #include <athena/IStreamReader.hpp>
#endif #endif
namespace zeus namespace zeus {
{
class CVector2i class CVector2i {
{
public: public:
union { union {
struct struct {
{
int x, y; int x, y;
}; };
int v[2]; int v[2];
}; };
CVector2i() = default; CVector2i() = default;
CVector2i(int xin, int yin) : x(xin), y(yin) {} CVector2i(int xin, int yin) : x(xin), y(yin) {}
CVector2i(const CVector2f& vec) : x(int(vec.x)), y(int(vec.y)) {}
CVector2i(const CVector2f& vec) : x(int(vec.x())), y(int(vec.y())) {}
CVector2f toVec2f() const { return CVector2f(x, y); } CVector2f toVec2f() const { return CVector2f(x, y); }
inline CVector2i operator+(const CVector2i& val) const CVector2i operator+(const CVector2i& val) const {
{
return CVector2i(x + val.x, y + val.y); return CVector2i(x + val.x, y + val.y);
} }
inline CVector2i operator-(const CVector2i& val) const
{ CVector2i operator-(const CVector2i& val) const {
return CVector2i(x - val.x, y - val.y); return CVector2i(x - val.x, y - val.y);
} }
inline CVector2i operator*(const CVector2i& val) const
{ CVector2i operator*(const CVector2i& val) const {
return CVector2i(x * val.x, y * val.y); return CVector2i(x * val.x, y * val.y);
} }
inline CVector2i operator/(const CVector2i& val) const
{ CVector2i operator/(const CVector2i& val) const {
return CVector2i(x / val.x, y / val.y); return CVector2i(x / val.x, y / val.y);
} }
inline bool operator==(const CVector2i& other) const
{ bool operator==(const CVector2i& other) const {
return x == other.x && y == other.y; return x == other.x && y == other.y;
} }
inline bool operator!=(const CVector2i& other) const
{ bool operator!=(const CVector2i& other) const {
return x != other.x || y != other.y; return x != other.x || y != other.y;
} }
inline CVector2i operator*(int val) const
{ CVector2i operator*(int val) const {
return CVector2i(x * val, y * val); return CVector2i(x * val, y * val);
} }
}; };

View File

@ -1,288 +1,118 @@
#pragma once #pragma once
#include <athena/Types.hpp> #include "athena/Types.hpp"
#include "Global.hpp" #include "Global.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{
class alignas(32) CVector3d class CVector3d {
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR32(); zeus::simd<double> mSimd;
CVector3d() { zeroOut(); } CVector3d() : mSimd(0.0) {}
template <typename T>
CVector3d(const simd<T>& s) : mSimd(s) {}
#if __AVX__
CVector3d(const __m256d& mVec256)
{
this->mVec256 = mVec256;
v[3] = 0.0;
}
#elif __SSE__
CVector3d(const __m128d mVec128[2])
{
this->mVec128[0] = mVec128[0];
this->mVec128[1] = mVec128[1];
v[3] = 0.0;
}
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CVector3d(const atVec3d& vec) CVector3d(const atVec3d& vec) : mSimd(vec.simd) {}
{
#if __AVX__
mVec256 = vec.mVec256;
#elif __SSE__
mVec128[0] = vec.mVec128[0];
mVec128[1] = vec.mVec128[1];
#else
x = v[0], y = v[1], z = v[2], v[3] = 0.0f;
#endif
}
#endif #endif
explicit CVector3d(double xyz) { splat(xyz); } explicit CVector3d(double xyz) : mSimd(xyz) {}
CVector3d(const CVector3f& vec) CVector3d(const CVector3f& vec) : mSimd(vec.mSimd) {}
{
#if __AVX__ CVector3d(double x, double y, double z) : mSimd(x, y, z) {}
mVec256 = _mm256_cvtps_pd(vec.mVec128);
#elif __SSE__ CVector3f asCVector3f() {
mVec128[0] = _mm_cvtps_pd(vec.mVec128); return mSimd;
v[2] = vec[2];
#else
v[0] = vec[0];
v[1] = vec[1];
v[2] = vec[2];
v[3] = 0.0;
#endif
} }
CVector3d(double x, double y, double z) double magSquared() const {
{ return mSimd.dot3(mSimd);
#if __AVX__
TDblVectorUnion splat{{x, y, z, 0.0}};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat{{x, y, z, 0.0}};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
#else
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = 0.0;
#endif
} }
CVector3f asCVector3f() double magnitude() const {
{ return sqrt(magSquared());
#if __AVX__
return CVector3f(_mm256_cvtpd_ps(mVec256));
#else
return CVector3f(float(x), float(y), float(z));
#endif
} }
double magSquared() const CVector3d cross(const CVector3d& rhs) const {
{ return {y() * rhs.z() - z() * rhs.y(),
#if __SSE__ z() * rhs.x() - x() * rhs.z(),
TDblVectorUnion result; x() * rhs.y() - y() * rhs.x()};
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
return result.v[0] + (v[2] * v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
} }
double magnitude() const { return sqrt(magSquared()); } double dot(const CVector3d& rhs) const {
inline CVector3d cross(const CVector3d& rhs) const return mSimd.dot3(rhs.mSimd);
{
return {y * rhs.z - z * rhs.y,
z * rhs.x - x * rhs.z,
x * rhs.y - y * rhs.x};
} }
double dot(const CVector3d& rhs) const CVector3d asNormalized() {
{
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
return result.v[0] + (v[2] * rhs.v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
}
CVector3d asNormalized()
{
double mag = magnitude(); double mag = magnitude();
mag = 1.0 / mag; mag = 1.0 / mag;
return {x * mag, y * mag, z * mag}; return mSimd * zeus::simd<double>(mag);
} }
void splat(double xyz) void splat(double xyz) {
{ mSimd = zeus::simd<double>(xyz);
#if __AVX__
TDblVectorUnion splat = {{xyz, xyz, xyz, 0.0}};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat = {{xyz, xyz, xyz, 0.0}};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = 0.0;
#endif
} }
void zeroOut() void zeroOut() {
{
*this = skZero; *this = skZero;
} }
inline CVector3d operator+(const CVector3d& rhs) const CVector3d operator+(const CVector3d& rhs) const {
{ return mSimd + rhs.mSimd;
#if __AVX__
return _mm256_add_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]),
_mm_add_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x + rhs.x, y + rhs.y, z + rhs.z);
#endif
}
inline CVector3d operator-(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_sub_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_sub_pd(mVec128[0], rhs.mVec128[0]),
_mm_sub_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x - rhs.x, y - rhs.y, z - rhs.z);
#endif
}
inline CVector3d operator*(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_mul_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_mul_pd(mVec128[0], rhs.mVec128[0]),
_mm_mul_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x * rhs.x, y * rhs.y, z * rhs.z);
#endif
}
inline CVector3d operator/(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_div_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_div_pd(mVec128[0], rhs.mVec128[0]),
_mm_div_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x / rhs.x, y / rhs.y, z / rhs.z);
#endif
} }
inline double& operator[](size_t idx) { assert(idx < 3); return v[idx]; } CVector3d operator-(const CVector3d& rhs) const {
inline const double& operator[](size_t idx) const { assert(idx < 3); return v[idx]; } return mSimd - rhs.mSimd;
}
union { CVector3d operator*(const CVector3d& rhs) const {
struct return mSimd * rhs.mSimd;
{ }
double x, y, z;
}; CVector3d operator/(const CVector3d& rhs) const {
double v[4]; return mSimd / rhs.mSimd;
#if __AVX__ }
__m256d mVec256;
#endif zeus::simd<double>::reference operator[](size_t idx) {
#if __SSE__ assert(idx < 3);
__m128d mVec128[2]; return mSimd[idx];
#endif }
};
double operator[](size_t idx) const {
assert(idx < 3);
return mSimd[idx];
}
double x() const { return mSimd[0]; }
double y() const { return mSimd[1]; }
double z() const { return mSimd[2]; }
simd<double>::reference x() { return mSimd[0]; }
simd<double>::reference y() { return mSimd[1]; }
simd<double>::reference z() { return mSimd[2]; }
static const CVector3d skZero; static const CVector3d skZero;
}; };
static inline CVector3d operator+(double lhs, const CVector3d& rhs) static inline CVector3d operator+(double lhs, const CVector3d& rhs) {
{ return zeus::simd<double>(lhs) + rhs.mSimd;
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_add_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_add_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_add_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs + rhs.x, lhs + rhs.y, lhs + rhs.z};
#endif
} }
static inline CVector3d operator-(double lhs, const CVector3d& rhs) static inline CVector3d operator-(double lhs, const CVector3d& rhs) {
{ return zeus::simd<double>(lhs) - rhs.mSimd;
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_sub_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_sub_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_sub_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs - rhs.x, lhs - rhs.y, lhs - rhs.z};
#endif
} }
static inline CVector3d operator*(double lhs, const CVector3d& rhs) static inline CVector3d operator*(double lhs, const CVector3d& rhs) {
{ return zeus::simd<double>(lhs) * rhs.mSimd;
#if __AVX__ }
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_mul_pd(splat.mVec256, rhs.mVec256); static inline CVector3d operator/(double lhs, const CVector3d& rhs) {
#elif __SSE__ return zeus::simd<double>(lhs) / rhs.mSimd;
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_mul_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_mul_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs * rhs.x, lhs * rhs.y, lhs * rhs.z};
#endif
} }
static inline CVector3d operator/(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_div_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_div_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_div_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs.x / rhs.x, lhs.y / rhs.y, lhs.z / rhs.z};
#endif
}
} }

View File

@ -3,362 +3,210 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "zeus/CVector2f.hpp" #include "zeus/CVector2f.hpp"
#include "TVectorUnion.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp> #include "athena/IStreamReader.hpp"
#endif #endif
namespace zeus namespace zeus {
{
class CVector3d; class CVector3d;
class alignas(16) CVector3f
{ class CVector3f {
#if __atdna__
float clangVec __attribute__((__vector_size__(12)));
#endif
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); zeus::simd<float> mSimd;
CVector3f() : mSimd(0.f) {}
union { template <typename T>
struct CVector3f(const simd<T>& s) : mSimd(s) {}
{
float x, y, z;
};
float v[4];
#if __SSE__
__m128 mVec128;
#elif __GEKKO_PS__
ps128_t mVec128;
#endif
};
inline CVector3f() { zeroOut(); }
#if __SSE__ || __GEKKO_PS__
CVector3f(const __m128& mVec128) : mVec128(mVec128) { v[3] = 0.0f; }
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CVector3f(const atVec3f& vec)
#if __SSE__ || __GEKKO_PS__
: mVec128(vec.mVec128)
{
}
#else
{
x = vec.vec[0], y = vec.vec[1], z = vec.vec[2], v[3] = 0.0f;
}
#endif
operator atVec3f&() CVector3f(const atVec3f& vec) : mSimd(vec.simd) {}
{
return *reinterpret_cast<atVec3f*>(v); operator atVec3f&() {
} return *reinterpret_cast<atVec3f*>(this);
operator const atVec3f&() const
{
return *reinterpret_cast<const atVec3f*>(v);
} }
void readBig(athena::io::IStreamReader& input) operator const atVec3f&() const {
{ return *reinterpret_cast<const atVec3f*>(this);
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
v[3] = 0.0f;
} }
static CVector3f ReadBig(athena::io::IStreamReader& input) void readBig(athena::io::IStreamReader& input) {
{ simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = 0.0f;
mSimd.copy_from(f);
}
static CVector3f ReadBig(athena::io::IStreamReader& input) {
CVector3f ret; CVector3f ret;
ret.readBig(input); ret.readBig(input);
return ret; return ret;
} }
#endif #endif
CVector3f(const CVector3d& vec); CVector3f(const CVector3d& vec);
explicit CVector3f(float xyz) { splat(xyz); } explicit CVector3f(float xyz) : mSimd(xyz) {}
void assign(float x, float y, float z)
{
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = 0.0f;
}
CVector3f(float x, float y, float z) { assign(x, y, z); }
CVector3f(const float* floats) void assign(float x, float y, float z) {
{ mSimd = zeus::simd<float>(x, y, z);
#if __SSE__
mVec128 = _mm_loadu_ps(floats);
#else
x = floats[0];
y = floats[1];
z = floats[2];
#endif
v[3] = 0.0f;
} }
CVector3f(const CVector2f& other) CVector3f(float x, float y, float z) : mSimd(x, y, z) {}
{
x = other.x; CVector3f(const float* floats) : mSimd(floats[0], floats[1], floats[2]) {}
y = other.y;
z = 0.0f; CVector3f(const CVector2f& other) {
v[3] = 0.0f; mSimd = other.mSimd;
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
} }
inline CVector2f toVec2f() const CVector2f toVec2f() const {
{ return CVector2f(mSimd);
#if __SSE__
return CVector2f(mVec128);
#else
return CVector2f(x, y);
#endif
} }
inline bool operator==(const CVector3f& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); } bool operator==(const CVector3f& rhs) const {
inline bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); } return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1] && mSimd[2] == rhs.mSimd[2];
inline CVector3f operator+(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_add_ps(mVec128, rhs.mVec128));
#elif __GEKKO_PS__
return CVector3f(__mm_gekko_add_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x + rhs.x, y + rhs.y, z + rhs.z);
#endif
} }
inline CVector3f operator-(const CVector3f& rhs) const
{ bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); }
#if __SSE__
return CVector3f(_mm_sub_ps(mVec128, rhs.mVec128)); CVector3f operator+(const CVector3f& rhs) const {
#else return mSimd + rhs.mSimd;
return CVector3f(x - rhs.x, y - rhs.y, z - rhs.z);
#endif
} }
inline CVector3f operator-() const
{ CVector3f operator-(const CVector3f& rhs) const {
#if __SSE__ return mSimd - rhs.mSimd;
return CVector3f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_neg_ps(mVec128));
#else
return CVector3f(-x, -y, -z);
#endif
} }
inline CVector3f operator*(const CVector3f& rhs) const
{ CVector3f operator-() const {
#if __SSE__ return -mSimd;
return CVector3f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x * rhs.x, y * rhs.y, z * rhs.z);
#endif
} }
inline CVector3f operator/(const CVector3f& rhs) const
{ CVector3f operator*(const CVector3f& rhs) const {
#if __SSE__ return mSimd * rhs.mSimd;
return CVector3f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x / rhs.x, y / rhs.y, z / rhs.z);
#endif
} }
inline CVector3f operator+(float val) const
{ CVector3f operator/(const CVector3f& rhs) const {
#if __SSE__ return mSimd / rhs.mSimd;
TVectorUnion splat = {{val, val, val, 0.0f}};
return CVector3f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector3f(x + val, y + val, z + val);
#endif
} }
inline CVector3f operator-(float val) const
{ CVector3f operator+(float val) const {
#if __SSE__ || __GEKKO_PS__ return mSimd + zeus::simd<float>(val);
TVectorUnion splat = {{val, val, val, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_sub_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_sub_ps(mVec128, splat.mVec128));
#else
return CVector3f(x - val, y - val, z - val);
#endif
} }
inline CVector3f operator*(float val) const
{ CVector3f operator-(float val) const {
#if __SSE__ || __GEKKO_PS__ return mSimd - zeus::simd<float>(val);
TVectorUnion splat = {{val, val, val, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x * val, y * val, z * val);
#endif
} }
inline CVector3f operator/(float val) const
{ CVector3f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
}
CVector3f operator/(float val) const {
float ooval = 1.f / val; float ooval = 1.f / val;
#if __SSE__ || __GEKKO_PS__ return mSimd * zeus::simd<float>(ooval);
TVectorUnion splat = {{ooval, ooval, ooval, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x * ooval, y * ooval, z * ooval);
#endif
} }
inline const CVector3f& operator+=(const CVector3f& rhs)
{ const CVector3f& operator+=(const CVector3f& rhs) {
#if __SSE__ mSimd += rhs.mSimd;
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#elif __GEKKO_PS__
mVec128 = _mm_gekko_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
z += rhs.z;
#endif
return *this;
}
inline const CVector3f& operator-=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
z -= rhs.z;
#endif
return *this;
}
inline const CVector3f& operator*=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
#endif
return *this;
}
inline const CVector3f& operator/=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
z /= rhs.z;
#endif
return *this; return *this;
} }
inline void normalize() const CVector3f& operator-=(const CVector3f& rhs) {
{ mSimd -= rhs.mSimd;
return *this;
}
const CVector3f& operator*=(const CVector3f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CVector3f& operator/=(const CVector3f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
void normalize() {
float mag = 1.f / magnitude(); float mag = 1.f / magnitude();
*this *= CVector3f(mag); *this *= CVector3f(mag);
} }
inline CVector3f normalized() const
{ CVector3f normalized() const {
float mag = 1.f / magnitude(); float mag = 1.f / magnitude();
return *this * mag; return *this * mag;
} }
inline CVector3f cross(const CVector3f& rhs) const
{ CVector3f cross(const CVector3f& rhs) const {
return CVector3f(y * rhs.z - z * rhs.y, return CVector3f(y() * rhs.z() - z() * rhs.y(),
z * rhs.x - x * rhs.z, z() * rhs.x() - x() * rhs.z(),
x * rhs.y - y * rhs.x); x() * rhs.y() - y() * rhs.x());
} }
inline float dot(const CVector3f& rhs) const float dot(const CVector3f& rhs) const {
{ return mSimd.dot3(rhs.mSimd);
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
} }
inline float magSquared() const float magSquared() const {
{ return mSimd.dot3(mSimd);
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
} }
inline float magnitude() const { return std::sqrt(magSquared()); } float magnitude() const {
return std::sqrt(magSquared());
inline bool isNotInf() const
{
return !(std::isinf(x) || std::isinf(y) || std::isinf(z));
} }
inline bool isMagnitudeSafe() const bool isNotInf() const {
{ return !(std::isinf(x()) || std::isinf(y()) || std::isinf(z()));
}
bool isMagnitudeSafe() const {
return isNotInf() && magSquared() >= 9.9999994e-29; return isNotInf() && magSquared() >= 9.9999994e-29;
} }
inline void zeroOut() void zeroOut() {
{
*this = CVector3f::skZero; *this = CVector3f::skZero;
} }
inline void splat(float xyz) void splat(float xyz) {
{ mSimd = zeus::simd<float>(xyz);
#if __SSE__
TVectorUnion splat = {{xyz, xyz, xyz, 0.0f}};
mVec128 = splat.mVec128;
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = 0.0f;
#endif
} }
static float getAngleDiff(const CVector3f& a, const CVector3f& b); static float getAngleDiff(const CVector3f& a, const CVector3f& b);
static inline CVector3f lerp(const CVector3f& a, const CVector3f& b, float t) { return (a + (b - a) * t); } static CVector3f lerp(const CVector3f& a, const CVector3f& b, float t) {
static inline CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) { return lerp(a, b, t).normalized(); } return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y) || std::isinf(z))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON;
} }
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; } static CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) {
return lerp(a, b, t).normalized();
}
inline void scaleToLength(float newLength) static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
{
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON || std::fabs(z()) >= FLT_EPSILON;
}
bool isZero() const {
return magSquared() <= FLT_EPSILON;
}
void scaleToLength(float newLength) {
float length = magSquared(); float length = magSquared();
if (length < 1.1920929e-7f) if (length < FLT_EPSILON) {
{ mSimd[0] = newLength, mSimd[1] = 0.f, mSimd[2] = 0.f;
x = newLength, y = 0.f, z = 0.f;
return; return;
} }
@ -367,21 +215,34 @@ public:
*this *= CVector3f(scalar); *this *= CVector3f(scalar);
} }
inline CVector3f scaledToLength(float newLength) const CVector3f scaledToLength(float newLength) const {
{
CVector3f v = *this; CVector3f v = *this;
v.scaleToLength(newLength); v.scaleToLength(newLength);
return v; return v;
} }
inline bool isEqu(const CVector3f& other, float epsilon = 1.1920929e-7f) bool isEqu(const CVector3f& other, float epsilon = FLT_EPSILON) {
{
const CVector3f diffVec = other - *this; const CVector3f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon && diffVec.z <= epsilon); return (diffVec.x() <= epsilon && diffVec.y() <= epsilon && diffVec.z() <= epsilon);
} }
inline float& operator[](size_t idx) { assert(idx < 3); return (&x)[idx]; } zeus::simd<float>::reference operator[](size_t idx) {
inline const float& operator[](size_t idx) const { assert(idx < 3); return (&x)[idx]; } assert(idx < 3);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 3);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
static const CVector3f skOne; static const CVector3f skOne;
static const CVector3f skNegOne; static const CVector3f skNegOne;
@ -396,47 +257,24 @@ public:
static const CVector3f skDegToRadVec; static const CVector3f skDegToRadVec;
static CVector3f radToDeg(const CVector3f& rad) { return rad * skRadToDegVec; } static CVector3f radToDeg(const CVector3f& rad) { return rad * skRadToDegVec; }
static CVector3f degToRad(const CVector3f& deg) { return deg * skDegToRadVec; } static CVector3f degToRad(const CVector3f& deg) { return deg * skDegToRadVec; }
}; };
static inline CVector3f operator+(float lhs, const CVector3f& rhs) static inline CVector3f operator+(float lhs, const CVector3f& rhs) {
{ return zeus::simd<float>(lhs) + rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z);
#endif
} }
static inline CVector3f operator-(float lhs, const CVector3f& rhs) static inline CVector3f operator-(float lhs, const CVector3f& rhs) {
{ return zeus::simd<float>(lhs) - rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z);
#endif
} }
static inline CVector3f operator*(float lhs, const CVector3f& rhs) static inline CVector3f operator*(float lhs, const CVector3f& rhs) {
{ return zeus::simd<float>(lhs) * rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
#endif
} }
static inline CVector3f operator/(float lhs, const CVector3f& rhs) static inline CVector3f operator/(float lhs, const CVector3f& rhs) {
{ return zeus::simd<float>(lhs) / rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z);
#endif
} }
} }

View File

@ -1,420 +1,261 @@
#pragma once #pragma once
#include "Global.hpp" #include "Global.hpp"
#include "TVectorUnion.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#include "athena/IStreamReader.hpp"
#endif #endif
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include <cfloat> #include <cfloat>
#include <cassert> #include <cassert>
namespace zeus namespace zeus {
{
class CColor; class CColor;
class alignas(16) CVector4f
{ class CVector4f {
#if __atdna__
float clangVec __attribute__((__vector_size__(16)));
#endif
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); zeus::simd<float> mSimd;
union {
struct CVector4f() : mSimd(0.f) {}
{
float x, y, z, w; template <typename T>
}; CVector4f(const simd<T>& s) : mSimd(s) {}
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
inline CVector4f() { zeroOut(); }
#if __SSE__
CVector4f(const __m128& mVec128) : mVec128(mVec128) {}
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CVector4f(const atVec4f& vec)
#if __SSE__ CVector4f(const atVec4f& vec) : mSimd(vec.simd) {}
: mVec128(vec.mVec128)
{ operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
} }
#else
{ operator const atVec4f&() const {
x = vec.vec[0], y = vec.vec[1], z = vec.vec[2], w = vec.vec[3]; return *reinterpret_cast<const atVec4f*>(this);
} }
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = input.readFloatBig();
mSimd.copy_from(f);
}
#endif #endif
operator atVec4f&() explicit CVector4f(float xyzw) : mSimd(xyzw) {}
{
return *reinterpret_cast<atVec4f*>(v); void assign(float x, float y, float z, float w) {
} mSimd = simd<float>(x, y, z, w);
operator const atVec4f&() const
{
return *reinterpret_cast<const atVec4f*>(v);
} }
void readBig(athena::io::IStreamReader& input) CVector4f(float x, float y, float z, float w) : mSimd(x, y, z, w) {}
{
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
w = input.readFloatBig();
}
#endif
explicit CVector4f(float xyzw) { splat(xyzw); }
void assign(float x, float y, float z, float w)
{
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = w;
}
CVector4f(float x, float y, float z, float w) { assign(x, y, z, w); }
CVector4f(const CColor& other); CVector4f(const CColor& other);
CVector4f(const CVector3f& other, float wIn = 1.f) CVector4f(const CVector3f& other, float wIn = 1.f) : mSimd(other.mSimd) {
{ mSimd[3] = wIn;
#if __SSE__
mVec128 = other.mVec128;
#else
x = other.x;
y = other.y;
z = other.z;
#endif
w = wIn;
} }
static CVector4f ToClip(const zeus::CVector3f& v, float w) static CVector4f ToClip(const zeus::CVector3f& v, float w) {
{
return CVector4f(v * w, w); return CVector4f(v * w, w);
} }
inline CVector3f toVec3f() const CVector3f toVec3f() const {
{ return CVector3f(mSimd);
#if __SSE__
return CVector3f(mVec128);
#else
return CVector3f(x, y, z);
#endif
} }
CVector4f& operator=(const CColor& other); CVector4f& operator=(const CColor& other);
inline bool operator==(const CVector4f& rhs) const
{ bool operator==(const CVector4f& rhs) const {
#if __SSE__ auto eq_mask = mSimd == rhs.mSimd;
TVectorUnion vec; return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
vec.mVec128 = _mm_cmpeq_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
#else
return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w);
#endif
} }
inline bool operator!=(const CVector4f& rhs) const
{ bool operator!=(const CVector4f& rhs) const {
#if __SSE__ auto eq_mask = mSimd != rhs.mSimd;
TVectorUnion vec; return eq_mask[0] || eq_mask[1] || eq_mask[2] || eq_mask[3];
vec.mVec128 = _mm_cmpneq_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
#else
return !(*this == rhs);
#endif
} }
inline bool operator<(const CVector4f& rhs) const
{ bool operator<(const CVector4f& rhs) const {
#if __SSE__ auto eq_mask = mSimd < rhs.mSimd;
TVectorUnion vec; return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x < rhs.x || y < rhs.y || z < rhs.z || w < rhs.w);
#endif
} }
inline bool operator<=(const CVector4f& rhs) const
{ bool operator<=(const CVector4f& rhs) const {
#if __SSE__ auto eq_mask = mSimd <= rhs.mSimd;
TVectorUnion vec; return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x <= rhs.x || y <= rhs.y || z <= rhs.z || w <= rhs.w);
#endif
} }
inline bool operator>(const CVector4f& rhs) const
{ bool operator>(const CVector4f& rhs) const {
#if __SSE__ auto eq_mask = mSimd > rhs.mSimd;
TVectorUnion vec; return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x > rhs.x || y > rhs.y || z > rhs.z || w > rhs.w);
#endif
} }
inline bool operator>=(const CVector4f& rhs) const
{ bool operator>=(const CVector4f& rhs) const {
#if __SSE__ auto eq_mask = mSimd >= rhs.mSimd;
TVectorUnion vec; return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x >= rhs.x || y >= rhs.y || z >= rhs.z || w >= rhs.w);
#endif
} }
inline CVector4f operator+(const CVector4f& rhs) const
{ CVector4f operator+(const CVector4f& rhs) const {
#if __SSE__ return mSimd + rhs.mSimd;
return CVector4f(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w);
#endif
} }
inline CVector4f operator-(const CVector4f& rhs) const
{ CVector4f operator-(const CVector4f& rhs) const {
#if __SSE__ return mSimd - rhs.mSimd;
return CVector4f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w);
#endif
} }
inline CVector4f operator-() const
{ CVector4f operator-() const {
#if __SSE__ return -mSimd;
return CVector4f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#else
return CVector4f(-x, -y, -z, -w);
#endif
} }
inline CVector4f operator*(const CVector4f& rhs) const
{ CVector4f operator*(const CVector4f& rhs) const {
#if __SSE__ return mSimd * rhs.mSimd;
return CVector4f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w);
#endif
} }
inline CVector4f operator/(const CVector4f& rhs) const
{ CVector4f operator/(const CVector4f& rhs) const {
#if __SSE__ return mSimd / rhs.mSimd;
return CVector4f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w);
#endif
} }
inline CVector4f operator+(float val) const
{ CVector4f operator+(float val) const {
#if __SSE__ return mSimd + zeus::simd<float>(val);
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector4f(x + val, y + val, z + val, w + val);
#endif
} }
inline CVector4f operator-(float val) const
{ CVector4f operator-(float val) const {
#if __SSE__ return mSimd - zeus::simd<float>(val);
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CVector4f(x - val, y - val, z - val, w - val);
#endif
} }
inline CVector4f operator*(float val) const
{ CVector4f operator*(float val) const {
#if __SSE__ return mSimd * zeus::simd<float>(val);
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x * val, y * val, z * val, w * val);
#endif
} }
inline CVector4f operator/(float val) const
{ CVector4f operator/(float val) const {
float ooval = 1.f / val; float ooval = 1.f / val;
#if __SSE__ return mSimd * zeus::simd<float>(ooval);
TVectorUnion splat = {{ooval, ooval, ooval, ooval}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x * ooval, y * ooval, z * ooval, w * ooval);
#endif
} }
inline const CVector4f& operator+=(const CVector4f& rhs)
{ const CVector4f& operator+=(const CVector4f& rhs) {
#if __SSE__ mSimd += rhs.mSimd;
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
z += rhs.z;
w += rhs.w;
#endif
return *this; return *this;
} }
inline const CVector4f& operator-=(const CVector4f& rhs)
{ const CVector4f& operator-=(const CVector4f& rhs) {
#if __SSE__ mSimd -= rhs.mSimd;
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
z -= rhs.z;
w -= rhs.w;
#endif
return *this; return *this;
} }
inline const CVector4f& operator*=(const CVector4f& rhs)
{ const CVector4f& operator*=(const CVector4f& rhs) {
#if __SSE__ mSimd *= rhs.mSimd;
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
w *= rhs.w;
#endif
return *this; return *this;
} }
inline const CVector4f& operator/=(const CVector4f& rhs)
{ const CVector4f& operator/=(const CVector4f& rhs) {
#if __SSE__ mSimd /= rhs.mSimd;
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
z /= rhs.z;
w /= rhs.w;
#endif
return *this; return *this;
} }
inline void normalize()
{ void normalize() {
float mag = magnitude(); float mag = magnitude();
mag = 1.f / mag; mag = 1.f / mag;
*this *= CVector4f(mag); *this *= CVector4f(mag);
} }
inline CVector4f normalized() const
{ CVector4f normalized() const {
float mag = magnitude(); float mag = magnitude();
mag = 1.f / mag; mag = 1.f / mag;
return *this * mag; return *this * mag;
} }
inline float dot(const CVector4f& rhs) const float dot(const CVector4f& rhs) const {
{ return mSimd.dot4(rhs.mSimd);
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
} }
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z + w * w;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
inline void zeroOut() float magSquared() const {
{ return mSimd.dot4(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
void zeroOut() {
*this = CVector4f::skZero; *this = CVector4f::skZero;
} }
inline void splat(float xyzw) void splat(float xyzw) {
{ mSimd = zeus::simd<float>(xyzw);
#if __SSE__
TVectorUnion splat = {{xyzw, xyzw, xyzw, xyzw}};
mVec128 = splat.mVec128;
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = xyzw;
#endif
} }
static inline CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) { return (a + (b - a) * t); } static CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) {
static inline CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) { return lerp(a, b, t).normalized(); } return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; } static CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) {
return lerp(a, b, t).normalized();
}
inline bool canBeNormalized() const bool isNormalized() const {
{ return std::fabs(1.f - magSquared()) < 0.01f;
if (std::isinf(x) || std::isinf(y) || std::isinf(z) || std::isinf(w)) }
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()) || std::isinf(w()))
return false; return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON || std::fabs(w) >= FLT_EPSILON; return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON ||
std::fabs(z()) >= FLT_EPSILON || std::fabs(w()) >= FLT_EPSILON;
} }
inline bool isEqu(const CVector4f& other, float epsilon = 1.1920929e-7f) bool isEqu(const CVector4f& other, float epsilon = FLT_EPSILON) {
{
const CVector4f diffVec = other - *this; const CVector4f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon && diffVec.z <= epsilon && diffVec.w <= epsilon); return (diffVec.x() <= epsilon && diffVec.y() <= epsilon &&
diffVec.z() <= epsilon && diffVec.w() <= epsilon);
} }
inline float& operator[](size_t idx) { assert(idx < 4); return (&x)[idx]; } zeus::simd<float>::reference operator[](size_t idx) {
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&x)[idx]; } assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
float w() const { return mSimd[3]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
simd<float>::reference w() { return mSimd[3]; }
static const CVector4f skOne; static const CVector4f skOne;
static const CVector4f skNegOne; static const CVector4f skNegOne;
static const CVector4f skZero; static const CVector4f skZero;
}; };
static inline CVector4f operator+(float lhs, const CVector4f& rhs) static CVector4f operator+(float lhs, const CVector4f& rhs) {
{ return zeus::simd<float>(lhs) + rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z, lhs + rhs.w);
#endif
} }
static inline CVector4f operator-(float lhs, const CVector4f& rhs) static CVector4f operator-(float lhs, const CVector4f& rhs) {
{ return zeus::simd<float>(lhs) - rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z, lhs - rhs.w);
#endif
} }
static inline CVector4f operator*(float lhs, const CVector4f& rhs) static CVector4f operator*(float lhs, const CVector4f& rhs) {
{ return zeus::simd<float>(lhs) * rhs.mSimd;
#if __SSE__ }
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_mul_ps(splat.mVec128, rhs.mVec128)); static CVector4f operator/(float lhs, const CVector4f& rhs) {
#else return zeus::simd<float>(lhs) / rhs.mSimd;
return CVector4f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z, lhs * rhs.w);
#endif
} }
static inline CVector4f operator/(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z, lhs / rhs.w);
#endif
}
} }

View File

@ -1,61 +1,19 @@
#pragma once #pragma once
#if _M_IX86_FP >= 1 || _M_X64 #if ZE_ATHENA_TYPES
#define __SSE__ 1 #include "athena/IStreamReader.hpp"
#endif #include "athena/simd/simd.hpp"
#if __SSE__
#include <immintrin.h>
#ifndef _MSC_VER
#include <mm_malloc.h>
#endif
#define zeAlloc(sz, align) _mm_malloc(sz, align)
#define zeFree(ptr) _mm_free(ptr)
#elif GEKKO
#include <ps_intrins.h>
#define zeAlloc(sz, align) _ps_malloc(sz, align)
#define zeFree(ptr) _ps_free(ptr)
#endif
#if __SSE__ || __GEKKO_PS__
#define ZE_DECLARE_ALIGNED_ALLOCATOR() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 16); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 16); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#else #else
#define ZE_DECLARE_ALIGNED_ALLOCATOR() void __unused__() #include "simd/simd.hpp"
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() void __unused__()
#endif #endif
#if __SSE__ namespace zeus {
#define ZE_SHUFFLE(x, y, z, w) ((w) << 6 | (z) << 4 | (y) << 2 | (x)) #if ZE_ATHENA_TYPES
#define ze_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask)) template<typename T> using simd = athena::simd<T>;
#define ze_splat3_ps(_a, _i) ze_pshufd_ps((_a), ZE_SHUFFLE(_i, _i, _i, 3)) using simd_floats = athena::simd_floats;
#define ze_splat_ps(_a, _i) ze_pshufd_ps((_a), ZE_SHUFFLE(_i, _i, _i, _i)) using simd_doubles = athena::simd_doubles;
#if _WIN32
#define zeCastiTo128f(a) (_mm_castsi128_ps(a))
#else
#define zeCastiTo128f(a) ((__m128)(a))
#endif
#elif __GEKKO_PS__
#endif #endif
}
inline int rotr(int x, int n) { return ((x >> n) | (x << (32 - n))); } inline int rotr(int x, int n) { return ((x >> n) | (x << (32 - n))); }
inline int rotl(int x, int n) { return ((x << n) | (x >> (32 - n))); } inline int rotl(int x, int n) { return ((x << n) | (x >> (32 - n))); }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <cfloat> #include <cfloat>
#undef min #undef min
#undef max #undef max
@ -26,8 +27,7 @@
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
namespace zeus namespace zeus {
{
#if _MSC_VER #if _MSC_VER
#if defined(_M_IX86) #if defined(_M_IX86)
@ -43,8 +43,7 @@ namespace zeus
#endif #endif
#endif #endif
struct CPUInfo struct CPUInfo {
{
const char cpuBrand[48] = {0}; const char cpuBrand[48] = {0};
const char cpuVendor[32] = {0}; const char cpuVendor[32] = {0};
#if ZEUS_ARCH_X86_64 || ZEUS_ARCH_X86 #if ZEUS_ARCH_X86_64 || ZEUS_ARCH_X86
@ -61,64 +60,83 @@ struct CPUInfo
const bool AVX2 = false; const bool AVX2 = false;
#endif #endif
}; };
/** /**
* Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available * Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
*/ */
void detectCPU(); void detectCPU();
const CPUInfo& cpuFeatures(); const CPUInfo& cpuFeatures();
std::pair<bool, const CPUInfo&> validateCPU(); std::pair<bool, const CPUInfo&> validateCPU();
void getCpuInfo(int eax, int regs[4]); void getCpuInfo(int eax, int regs[4]);
void getCpuInfoEx(int eax, int ecx, int regs[4]); void getCpuInfoEx(int eax, int ecx, int regs[4]);
class CVector3f; class CVector3f;
class CVector2f; class CVector2f;
class CTransform; class CTransform;
template <typename T> template<typename T>
inline constexpr T min(const T& a, const T& b) inline constexpr T min(const T& a, const T& b) {
{
return a < b ? a : b; return a < b ? a : b;
} }
template <typename T>
inline constexpr T max(const T& a, const T& b) template<typename T>
{ inline constexpr T max(const T& a, const T& b) {
return a > b ? a : b; return a > b ? a : b;
} }
template <> CVector3f min(const CVector3f& a, const CVector3f& b);
template <> CVector3f max(const CVector3f& a, const CVector3f& b);
template <typename T> template<>
inline constexpr T clamp(const T& a, const T& val, const T& b) CVector3f min(const CVector3f& a, const CVector3f& b);
{
template<>
CVector3f max(const CVector3f& a, const CVector3f& b);
template<typename T>
inline constexpr T clamp(const T& a, const T& val, const T& b) {
return max<T>(a, min<T>(b, val)); return max<T>(a, min<T>(b, val));
} }
inline constexpr float radToDeg(float rad) { return rad * (180.f / M_PIF); } inline constexpr float radToDeg(float rad) { return rad * (180.f / M_PIF); }
inline constexpr float degToRad(float deg) { return deg * (M_PIF / 180.f); } inline constexpr float degToRad(float deg) { return deg * (M_PIF / 180.f); }
inline constexpr double radToDeg(double rad) { return rad * (180.0 / M_PI); } inline constexpr double radToDeg(double rad) { return rad * (180.0 / M_PI); }
inline constexpr double degToRad(double deg) { return deg * (M_PI / 180.0); } inline constexpr double degToRad(double deg) { return deg * (M_PI / 180.0); }
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary); CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary);
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t); CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t); float getCatmullRomSplinePoint(float a, float b, float c, float d, float t);
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f
getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d,
float t); float t);
// Since round(double) doesn't exist in some <cmath> implementations // Since round(double) doesn't exist in some <cmath> implementations
// we'll define our own // we'll define our own
inline double round(double val) { return (val < 0.0 ? std::ceil(val - 0.5) : std::ceil(val + 0.5)); } inline double round(double val) { return (val < 0.0 ? std::ceil(val - 0.5) : std::ceil(val + 0.5)); }
inline double powD(float a, float b) { return std::exp(b * std::log(a)); } inline double powD(float a, float b) { return std::exp(b * std::log(a)); }
inline double invSqrtD(double val) { return 1.0 / std::sqrt(val); } inline double invSqrtD(double val) { return 1.0 / std::sqrt(val); }
inline float invSqrtF(float val) { return float(1.0 / std::sqrt(val)); } inline float invSqrtF(float val) { return float(1.0 / std::sqrt(val)); }
int floorPowerOfTwo(int x); int floorPowerOfTwo(int x);
int ceilingPowerOfTwo(int x); int ceilingPowerOfTwo(int x);
template <typename U> template<typename U>
typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x) typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x) {
{
#if __GNUC__ >= 4 #if __GNUC__ >= 4
return __builtin_popcountll(x); return __builtin_popcountll(x);
#else #else
@ -134,23 +152,21 @@ typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, i
#endif #endif
} }
template <typename E> template<typename E>
typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e) typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e) {
{
return PopCount(static_cast<typename std::underlying_type<E>::type>(e)); return PopCount(static_cast<typename std::underlying_type<E>::type>(e));
} }
bool close_enough(const CVector3f &a, const CVector3f &b, float epsilon = 0.000099999997f); bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon = 0.000099999997f);
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon = 0.000099999997f); bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon = 0.000099999997f);
inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378752) inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378752) {
{
return std::fabs(a - b) < epsilon; return std::fabs(a - b) < epsilon;
} }
inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752) inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752) {
{
return std::fabs(a - b) < epsilon; return std::fabs(a - b) < epsilon;
} }
} }

View File

@ -1,22 +0,0 @@
#pragma once
namespace zeus
{
typedef union {
float v[4];
#if __SSE__
__m128 mVec128;
#endif
} TVectorUnion;
typedef union {
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif
} TDblVectorUnion;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
#pragma once
#define _ZEUS_SIMD_INCLUDED
namespace zeus::_simd { using namespace std; }
#include "parallelism_v2_simd.hpp"
#if _M_IX86_FP >= 1 || _M_X64
#define __SSE__ 1
#endif
#if __AVX__
#include "simd_avx.hpp"
#elif __SSE__
#include "simd_sse.hpp"
#else
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = fixed_size<4>; };
template<> struct zeus_native<double> { using type = fixed_size<4>; };
}
#endif
namespace zeus {
template<typename T> using simd = _simd::simd<T,
typename _simd::simd_abi::zeus_native<T>::type>;
template<typename T>
using simd_values = _simd::simd_data<simd<T>>;
using simd_floats = simd_values<float>;
using simd_doubles = simd_values<double>;
}

View File

@ -0,0 +1,188 @@
#pragma once
#ifndef _ZEUS_SIMD_INCLUDED
#error simd_avx.hpp must not be included directly. Include simd.hpp instead.
#endif
#include "simd_sse.hpp"
#include <immintrin.h>
namespace zeus::_simd {
// __m256d storage for AVX
template<>
class __simd_storage<double, m256d_abi> {
public:
using storage_type = __m256d;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, double __val) noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm256_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_ = _mm256_set_pd(d, c, b, a);
}
void __broadcast(double __val) noexcept {
__storage_ = _mm256_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
}
double __dot3(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
}
double __dot4(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
}
void __copy_from(const simd_data<simd<double, m256d_abi>>& __buffer) noexcept {
__storage_ = _mm256_load_pd(__buffer.data());
}
void __copy_to(simd_data<simd<double, m256d_abi>>& __buffer) const noexcept {
_mm256_store_pd(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_ = _mm256_cvtps_pd(other.__storage_);
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m256d mask storage for AVX
template<>
class __simd_mask_storage<double, m256d_abi> : public __simd_storage<double, m256d_abi> {
public:
bool __get(size_t __index) const noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT64_MAX : 0;
__storage_ = _mm256_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m256d_abi> simd<double, m256d_abi>::operator-() const {
return _mm256_xor_pd(__s_.__storage_, _mm256_set1_pd(-0.0));
}
inline simd<double, m256d_abi>
operator+(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator-(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator*(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator/(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>&
operator+=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator-=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator*=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator/=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>::mask_type
operator==(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_EQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator!=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_NEQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GT_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LT_OQ);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m256d_abi>& other) {
__storage_ = _mm256_cvtpd_ps(other.__storage_);
}
namespace simd_abi {
template<> struct zeus_native<double> { using type = m256d_abi; };
} // namespace simd_abi
} // namespace zeus::_simd

View File

@ -0,0 +1,455 @@
#pragma once
#ifndef _ZEUS_SIMD_INCLUDED
#error simd_sse.hpp must not be included directly. Include simd.hpp instead.
#endif
#include <xmmintrin.h>
#if __SSE4_1__
#include <smmintrin.h>
#endif
namespace zeus::_simd {
// __m128 ABI
using m128_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 1), 4>;
// __m128d ABI
using m128d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 2), 4>;
#ifdef __AVX__
// __m256d ABI
using m256d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 3), 4>;
#endif
template <>
class __simd_storage<double, m128d_abi>;
#ifdef __AVX__
template <>
class __simd_storage<double, m256d_abi>;
#endif
// __m128 storage for SSE2+
template <>
class __simd_storage<float, m128_abi> {
public:
using storage_type = __m128;
storage_type __storage_;
float __get(size_t __index) const noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, float __val) noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm_load_ps(sse_data.data());
}
void __set4(float a, float b, float c, float d) noexcept {
__storage_ = _mm_set_ps(d, c, b, a);
}
void __broadcast(float __val) noexcept {
__storage_ = _mm_set1_ps(__val);
}
float __dot2(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x3F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
#endif
}
float __dot3(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x7F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
#endif
}
float __dot4(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0xFF));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
#endif
}
template<int x, int y, int z, int w>
__simd_storage __shuffle() const noexcept {
__simd_storage s;
s.__storage_ = _mm_shuffle_ps(__storage_, __storage_, _MM_SHUFFLE(w, z, y, x));
return s;
}
void __copy_from(const simd_data<simd<float, m128_abi>>& __buffer) noexcept {
__storage_ = _mm_load_ps(__buffer.data());
}
void __copy_to(simd_data<simd<float, m128_abi>>& __buffer) const noexcept {
_mm_store_ps(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<double, m128d_abi>& other);
#ifdef __AVX__
explicit __simd_storage(const __simd_storage<double, m256d_abi>& other);
#endif
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128 mask storage for SSE2+
template <>
class __simd_mask_storage<float, m128_abi> : public __simd_storage<float, m128_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT32_MAX : 0;
__storage_ = _mm_load_ps(reinterpret_cast<float*>(sse_data));
}
};
template <>
inline simd<float, m128_abi> simd<float, m128_abi>::operator-() const {
return _mm_xor_ps(__s_.__storage_, _mm_set1_ps(-0.f));
}
inline simd<float, m128_abi>
operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator-(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator*(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator/(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>&
operator+=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator-=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator*=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator/=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>::mask_type
operator==(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpeq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator!=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpneq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpge_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmple_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpgt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmplt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
// __m128d storage for SSE2+
template <>
class __simd_storage<double, m128d_abi> {
public:
using storage_type = std::array<__m128d, 2>;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
return sse_data[__index % 2];
}
void __set(size_t __index, double __val) noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
sse_data[__index % 2] = __val;
__storage_[__index / 2] = _mm_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_[0] = _mm_set_pd(b, a);
__storage_[1] = _mm_set_pd(d, c);
}
void __broadcast(double __val) noexcept {
for (int i = 0; i < 2; ++i)
__storage_[i] = _mm_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
return ret;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
return sse_data[0] + sse_data[1];
#endif
}
double __dot3(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return ret + sse_data2[0];
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0];
#endif
}
double __dot4(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
double ret2;
_mm_store_sd(&ret2, _mm_dp_pd(__storage_[1], other.__storage_[1], 0x3F));
return ret + ret2;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0] + sse_data2[1];
#endif
}
void __copy_from(const simd_data<simd<double, m128d_abi>>& __buffer) noexcept {
__storage_[0] = _mm_load_pd(__buffer.data());
__storage_[1] = _mm_load_pd(__buffer.data() + 2);
}
void __copy_to(simd_data<simd<double, m128d_abi>>& __buffer) const noexcept {
_mm_store_pd(__buffer.data(), __storage_[0]);
_mm_store_pd(__buffer.data() + 2, __storage_[1]);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_[0] = _mm_cvtps_pd(other.__storage_);
__storage_[1] = _mm_cvtps_pd(_mm_movehl_ps(other.__storage_, other.__storage_));
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128d mask storage for SSE2+
template <>
class __simd_mask_storage<double, m128d_abi> : public __simd_storage<double, m128d_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
sse_data[__index % 2] = __val ? UINT64_MAX : 0;
__storage_[__index / 2] = _mm_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m128d_abi> simd<double, m128d_abi>::operator-() const {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_xor_pd(__s_.__storage_[i], _mm_set1_pd(-0.0));
return ret;
}
inline simd<double, m128d_abi>
operator+(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator-(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator*(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator/(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>&
operator+=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator-=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator*=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator/=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>::mask_type
operator==(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpeq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator!=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpneq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpge_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmple_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpgt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmplt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m128d_abi>& other) {
__storage_ = _mm_movelh_ps(_mm_cvtpd_ps(other.__storage_[0]), _mm_cvtpd_ps(other.__storage_[1]));
}
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = m128_abi; };
#ifndef __AVX__
template<> struct zeus_native<double> { using type = m128d_abi; };
#endif
} // namespace simd_abi
} // namespace zeus::_simd

View File

@ -3,7 +3,6 @@
namespace zeus namespace zeus
{ {
const CAABox CAABox::skInvertedBox = CAABox(); const CAABox CAABox::skInvertedBox = CAABox();
const CAABox CAABox::skNullBox = CAABox(CVector3f::skZero, CVector3f::skZero); const CAABox CAABox::skNullBox = CAABox(CVector3f::skZero, CVector3f::skZero);
} }

View File

@ -1,8 +1,7 @@
#include "zeus/CColor.hpp" #include "zeus/CColor.hpp"
#include "zeus/CVector4f.hpp" #include "zeus/CVector4f.hpp"
namespace zeus namespace zeus {
{
const CColor CColor::skRed(Comp32(0xFF0000FFul)); const CColor CColor::skRed(Comp32(0xFF0000FFul));
const CColor CColor::skBlack(Comp32(0x000000FFul)); const CColor CColor::skBlack(Comp32(0x000000FFul));
const CColor CColor::skBlue(Comp32(0x0000FFFFul)); const CColor CColor::skBlue(Comp32(0x0000FFFFul));
@ -14,8 +13,7 @@ const CColor CColor::skYellow(Comp32(0xFFFF00FFul));
const CColor CColor::skWhite(Comp32(0xFFFFFFFFul)); const CColor CColor::skWhite(Comp32(0xFFFFFFFFul));
const CColor CColor::skClear(Comp32(0x00000000ul)); const CColor CColor::skClear(Comp32(0x00000000ul));
float hueToRgb(float p, float q, float t) float hueToRgb(float p, float q, float t) {
{
if (t < 0.0f) if (t < 0.0f)
t += 1.0f; t += 1.0f;
if (t > 1.0f) if (t > 1.0f)
@ -29,118 +27,92 @@ float hueToRgb(float p, float q, float t)
return p; return p;
} }
CColor::CColor(const CVector4f& other) void CColor::fromHSV(float h, float s, float v, float _a) {
{ int i = int(h * 6.f);
r = other.x; float f = h * 6.f - i;
g = other.y; float p = v * (1.f - s);
b = other.z; float q = v * (1.f - f * s);
a = other.w; float t = v * (1.f - (1.f - f) * s);
} simd_floats fo;
CColor& CColor::operator=(const CVector4f& other) switch (i % 6) {
{
r = other.x;
g = other.y;
b = other.z;
a = other.w;
return *this;
}
void CColor::fromHSV(float h, float s, float v, float _a)
{
int i = int(h * 6);
float f = h * 6 - i;
float p = v * (1 - s);
float q = v * (1 - f * s);
float t = v * (1 - (1 - f) * s);
float _r, _g, _b;
switch (i % 6)
{
case 0: case 0:
_r = v, _g = t, _b = p; fo[0] = v, fo[1] = t, fo[2] = p;
break; break;
case 1: case 1:
_r = q, _g = v, _b = p; fo[0] = q, fo[1] = v, fo[2] = p;
break; break;
case 2: case 2:
_r = p, _g = v, _b = t; fo[0] = p, fo[1] = v, fo[2] = t;
break; break;
case 3: case 3:
_r = p, _g = q, _b = v; fo[0] = p, fo[1] = q, fo[2] = v;
break; break;
case 4: case 4:
_r = t, _g = p, _b = v; fo[0] = t, fo[1] = p, fo[2] = v;
break; break;
case 5: case 5:
_r = v, _g = p, _b = q; fo[0] = v, fo[1] = p, fo[2] = q;
break;
default:
break; break;
} }
r = _r; fo[3] = _a;
g = _g; mSimd.copy_from(fo);
b = _b;
a = _a;
} }
void CColor::toHSV(float& h, float& s, float& v) const void CColor::toHSV(float& h, float& s, float& v) const {
{ float min = std::min(r(), std::min(g(), b()));
float min = std::min(r, std::min(g, b)); float max = std::max(r(), std::max(g(), b()));
float max = std::max(r, std::max(g, b));
v = max; v = max;
float delta = max - min; float delta = max - min;
s = max == 0 ? 0 : delta / max; s = max == 0.f ? 0.f : delta / max;
if (max == min) if (max == min)
h = 0; h = 0.f;
else else {
{ if (max == r())
if (max == r) h = (g() - b()) / delta + (g() < b() ? 6.f : 0.f);
h = (g - b) / delta + (g < b ? 6 : 0); else if (max == g())
else if (max == g) h = (b() - r()) / delta + 2.f;
h = (b - r) / delta + 2; else if (max == b())
else if (max == b) h = (r() - g()) / delta + 4.f;
h = (r - g) / delta + 4; h /= 6.f;
h /= 6;
} }
} }
void CColor::fromHSL(float h, float s, float l, float _a) void CColor::fromHSL(float h, float s, float l, float _a) {
{ if (s == 0.0f) {
if (s == 0.0f) mSimd = simd<float>(l);
r = g = b = l; } else {
else
{
const float q = l < 0.5f ? l * (1.f + s) : l + s - 1.f * s; const float q = l < 0.5f ? l * (1.f + s) : l + s - 1.f * s;
const float p = 2 * l - q; const float p = 2.f * l - q;
r = hueToRgb(p, q, h + 1.f / 3); r() = hueToRgb(p, q, h + 1.f / 3.f);
g = hueToRgb(p, q, h); g() = hueToRgb(p, q, h);
b = hueToRgb(p, q, h - 1.f / 3); b() = hueToRgb(p, q, h - 1.f / 3.f);
} }
a = _a; a() = _a;
} }
void CColor::toHSL(float& h, float& s, float& l) void CColor::toHSL(float& h, float& s, float& l) const {
{ const float min = std::min(r(), std::min(g(), b()));
const float min = std::min(r, std::min(g, b)); const float max = std::max(r(), std::max(g(), b()));
const float max = std::max(r, std::max(g, b));
const float d = max - min; const float d = max - min;
if (max == min) if (max == min)
h = s = 0; h = s = 0.f;
else else {
{
s = l > 0.5f ? d / (2.f - max - min) : d / (max + min); s = l > 0.5f ? d / (2.f - max - min) : d / (max + min);
if (max == r) if (max == r())
h = (g - b) / d + (g < b ? 6.f : 0.f); h = (g() - b()) / d + (g() < b() ? 6.f : 0.f);
else if (max == g) else if (max == g())
h = (b - r) / d + 2.f; h = (b() - r()) / d + 2.f;
else if (max == b) else if (max == b())
h = (r - g) / d + 4.f; h = (r() - g()) / d + 4.f;
h /= 6; h /= 6.f;
} }
} }
} }

View File

@ -10,29 +10,29 @@ CEulerAngles::CEulerAngles(const CQuaternion& quat)
float t0 = 0.f; float t0 = 0.f;
if (quatDot > 0.f) if (quatDot > 0.f)
t0 = 2.f / quatDot; t0 = 2.f / quatDot;
double t1 = 1.0 - (t0 * quat.x * quat.x + t0 * quat.z * quat.z); double t1 = 1.0 - (t0 * quat.x() * quat.x() + t0 * quat.z() * quat.z());
double t2 = t0 * quat.y * quat.x - t0 * quat.z * quat.w; double t2 = t0 * quat.y() * quat.x() - t0 * quat.z() * quat.w();
double t3 = t1 * t1 + t2 * t2; double t3 = t1 * t1 + t2 * t2;
double t4 = 0.0; double t4 = 0.0;
if (t3 > 0.0) if (t3 > 0.0)
t4 = std::sqrt(t3); t4 = std::sqrt(t3);
double t5 = t0 * quat.z * quat.y + t0 * quat.x * quat.w; double t5 = t0 * quat.z() * quat.y() + t0 * quat.x() * quat.w();
if (std::abs(t4) > 0.00001) if (std::abs(t4) > 0.00001)
{ {
x = -std::atan2(-t5, t4); x() = -std::atan2(-t5, t4);
y = -std::atan2(t0 * quat.z * quat.x - t0 * quat.y * quat.w, y() = -std::atan2(t0 * quat.z() * quat.x() - t0 * quat.y() * quat.w(),
1.0 - (t0 * quat.x * quat.x + t0 * quat.y * quat.y)); 1.0 - (t0 * quat.x() * quat.x() + t0 * quat.y() * quat.y()));
z = -std::atan2(t2, t1); z() = -std::atan2(t2, t1);
} }
else else
{ {
x = -std::atan2(-t5, t4); x() = -std::atan2(-t5, t4);
y = -std::atan2(-(t0 * quat.z * quat.x + t0 * quat.y * quat.w), y() = -std::atan2(-(t0 * quat.z() * quat.x() + t0 * quat.y() * quat.w()),
1.0 - (t0 * quat.y * quat.y + t0 * quat.z * quat.z)); 1.0 - (t0 * quat.y() * quat.y() + t0 * quat.z() * quat.z()));
z = 0.f; z() = 0.f;
} }
} }
@ -58,15 +58,15 @@ CEulerAngles::CEulerAngles(const CTransform& xf)
if (std::fabs(f1) >= 0.00001) if (std::fabs(f1) >= 0.00001)
{ {
x = -std::atan2(-xf.basis[1][2], f1); x() = -std::atan2(-xf.basis[1][2], f1);
y = -std::atan2(xf.basis[0][2], xf.basis[2][2]); y() = -std::atan2(xf.basis[0][2], xf.basis[2][2]);
z = -std::atan2(xf.basis[1][0], xf.basis[1][1]); z() = -std::atan2(xf.basis[1][0], xf.basis[1][1]);
} }
else else
{ {
x = -std::atan2(-xf.basis[1][2], f1); x() = -std::atan2(-xf.basis[1][2], f1);
y = -std::atan2(-xf.basis[2][0], xf.basis[0][0]); y() = -std::atan2(-xf.basis[2][0], xf.basis[0][0]);
z = 0.f; z() = 0.f;
} }
} }

View File

@ -1,71 +1,28 @@
#include "zeus/CFrustum.hpp" #include "zeus/CFrustum.hpp"
namespace zeus namespace zeus {
{
void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection) void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection) {
{
CMatrix4f mvp = projection * viewMtx; CMatrix4f mvp = projection * viewMtx;
CMatrix4f mvp_rm = mvp.transposed(); CMatrix4f mvp_rm = mvp.transposed();
#if __SSE__
/* Left */ /* Left */
planes[0].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[0].mVec128); planes[0].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[0].mSimd;
/* Right */ /* Right */
planes[1].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[0].mVec128); planes[1].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[0].mSimd;
/* Bottom */ /* Bottom */
planes[2].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[1].mVec128); planes[2].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[1].mSimd;
/* Top */ /* Top */
planes[3].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[1].mVec128); planes[3].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[1].mSimd;
/* Near */ /* Near */
planes[4].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[2].mVec128); planes[4].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[2].mSimd;
/* Far */ /* Far */
planes[5].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[2].mVec128); planes[5].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[2].mSimd;
#else
/* Left */
planes[0].a = mvp.m[0][0] + mvp.m[3][0];
planes[0].b = mvp.m[0][1] + mvp.m[3][1];
planes[0].c = mvp.m[0][2] + mvp.m[3][2];
planes[0].d = mvp.m[0][3] + mvp.m[3][3];
/* Right */
planes[1].a = -mvp.m[0][0] + mvp.m[3][0];
planes[1].b = -mvp.m[0][1] + mvp.m[3][1];
planes[1].c = -mvp.m[0][2] + mvp.m[3][2];
planes[1].d = -mvp.m[0][3] + mvp.m[3][3];
/* Bottom */
planes[2].a = mvp.m[1][0] + mvp.m[3][0];
planes[2].b = mvp.m[1][1] + mvp.m[3][1];
planes[2].c = mvp.m[1][2] + mvp.m[3][2];
planes[2].d = mvp.m[1][3] + mvp.m[3][3];
/* Top */
planes[3].a = -mvp.m[1][0] + mvp.m[3][0];
planes[3].b = -mvp.m[1][1] + mvp.m[3][1];
planes[3].c = -mvp.m[1][2] + mvp.m[3][2];
planes[3].d = -mvp.m[1][3] + mvp.m[3][3];
/* Near */
planes[4].a = mvp.m[2][0] + mvp.m[3][0];
planes[4].b = mvp.m[2][1] + mvp.m[3][1];
planes[4].c = mvp.m[2][2] + mvp.m[3][2];
planes[4].d = mvp.m[2][3] + mvp.m[3][3];
/* Far */
planes[5].a = -mvp.m[2][0] + mvp.m[3][0];
planes[5].b = -mvp.m[2][1] + mvp.m[3][1];
planes[5].c = -mvp.m[2][2] + mvp.m[3][2];
planes[5].d = -mvp.m[2][3] + mvp.m[3][3];
#endif
planes[0].normalize(); planes[0].normalize();
planes[1].normalize(); planes[1].normalize();
@ -77,8 +34,7 @@ void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projectio
valid = true; valid = true;
} }
void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& projection) void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& projection) {
{
zeus::CMatrix3f tmp(viewPointMtx.basis[0], viewPointMtx.basis[2], -viewPointMtx.basis[1]); zeus::CMatrix3f tmp(viewPointMtx.basis[0], viewPointMtx.basis[2], -viewPointMtx.basis[1]);
zeus::CTransform viewBasis = zeus::CTransform(tmp.transposed()); zeus::CTransform viewBasis = zeus::CTransform(tmp.transposed());
zeus::CTransform viewMtx = viewBasis * zeus::CTransform::Translate(-viewPointMtx.origin); zeus::CTransform viewMtx = viewBasis * zeus::CTransform::Translate(-viewPointMtx.origin);
@ -86,50 +42,44 @@ void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& p
updatePlanes(viewMtx.toMatrix4f(), projection.getCachedMatrix()); updatePlanes(viewMtx.toMatrix4f(), projection.getCachedMatrix());
} }
bool CFrustum::aabbFrustumTest(const CAABox& aabb) const bool CFrustum::aabbFrustumTest(const CAABox& aabb) const {
{
if (!valid) if (!valid)
return true; return true;
CVector3f center = aabb.center(); CVector3f center = aabb.center();
CVector3f extents = aabb.extents(); CVector3f extents = aabb.extents();
for (uint32_t i = 0; i < 6; ++i) for (uint32_t i = 0; i < 6; ++i) {
{
const CPlane& plane = planes[i]; const CPlane& plane = planes[i];
float m = plane.vec.dot(center) + plane.d; float m = plane.normal().dot(center) + plane.d();
float n = extents.dot({std::fabs(plane.a), std::fabs(plane.b), std::fabs(plane.c)}); float n = extents.dot({std::fabs(plane.x()), std::fabs(plane.y()), std::fabs(plane.z())});
if (m + n < 0) if (m + n < 0.f)
return false; return false;
} }
return true; return true;
} }
bool CFrustum::sphereFrustumTest(const CSphere& sphere) const bool CFrustum::sphereFrustumTest(const CSphere& sphere) const {
{
if (!valid) if (!valid)
return true; return true;
for (uint32_t i = 0 ; i<6 ; ++i) for (uint32_t i = 0; i < 6; ++i) {
{ float dadot = planes[i].normal().dot(sphere.position);
float dadot = planes[i].vec.dot(sphere.position); if ((dadot + planes[i].d() + sphere.radius) < 0.f)
if ((dadot + planes[i].d + sphere.radius) < 0)
return false; return false;
} }
return true; return true;
} }
bool CFrustum::pointFrustumTest(const CVector3f& point) const bool CFrustum::pointFrustumTest(const CVector3f& point) const {
{
if (!valid) if (!valid)
return true; return true;
for (uint32_t i = 0 ; i<6 ; ++i) for (uint32_t i = 0; i < 6; ++i) {
{ float dadot = planes[i].normal().dot(point);
float dadot = planes[i].vec.dot(point); if ((dadot + planes[i].d()) < 0.f)
if ((dadot + planes[i].d) < 0)
return false; return false;
} }
return true; return true;

View File

@ -2,45 +2,48 @@
#include "zeus/CQuaternion.hpp" #include "zeus/CQuaternion.hpp"
#include "zeus/Global.hpp" #include "zeus/Global.hpp"
namespace zeus namespace zeus {
{
const CMatrix3f CMatrix3f::skIdentityMatrix3f = CMatrix3f(); const CMatrix3f CMatrix3f::skIdentityMatrix3f = CMatrix3f();
CMatrix3f::CMatrix3f(const CQuaternion& quat) CMatrix3f::CMatrix3f(const CQuaternion& quat) {
{
CQuaternion nq = quat.normalized(); CQuaternion nq = quat.normalized();
float x2 = nq.x * nq.x; float x2 = nq.x() * nq.x();
float y2 = nq.y * nq.y; float y2 = nq.y() * nq.y();
float z2 = nq.z * nq.z; float z2 = nq.z() * nq.z();
m[0][0] = 1.0 - 2.0 * y2 - 2.0 * z2; m[0][0] = 1.0 - 2.0 * y2 - 2.0 * z2;
m[1][0] = 2.0 * nq.x * nq.y - 2.0 * nq.z * nq.w; m[1][0] = 2.0 * nq.x() * nq.y() - 2.0 * nq.z() * nq.w();
m[2][0] = 2.0 * nq.x * nq.z + 2.0 * nq.y * nq.w; m[2][0] = 2.0 * nq.x() * nq.z() + 2.0 * nq.y() * nq.w();
m[0][1] = 2.0 * nq.x * nq.y + 2.0 * nq.z * nq.w; m[0][1] = 2.0 * nq.x() * nq.y() + 2.0 * nq.z() * nq.w();
m[1][1] = 1.0 - 2.0 * x2 - 2.0 * z2; m[1][1] = 1.0 - 2.0 * x2 - 2.0 * z2;
m[2][1] = 2.0 * nq.y * nq.z - 2.0 * nq.x * nq.w; m[2][1] = 2.0 * nq.y() * nq.z() - 2.0 * nq.x() * nq.w();
m[0][2] = 2.0 * nq.x * nq.z - 2.0 * nq.y * nq.w; m[0][2] = 2.0 * nq.x() * nq.z() - 2.0 * nq.y() * nq.w();
m[1][2] = 2.0 * nq.y * nq.z + 2.0 * nq.x * nq.w; m[1][2] = 2.0 * nq.y() * nq.z() + 2.0 * nq.x() * nq.w();
m[2][2] = 1.0 - 2.0 * x2 - 2.0 * y2; m[2][2] = 1.0 - 2.0 * x2 - 2.0 * y2;
m[0][3] = 0.0f;
m[1][3] = 0.0f;
m[2][3] = 0.0f;
} }
void CMatrix3f::transpose() void CMatrix3f::transpose() {
{
#if __SSE__ #if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); __m128 zero = _mm_xor_ps(m[0].mSimd.native(), m[0].mSimd.native());
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); __m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); __m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero); __m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
vec[0].mVec128 = _mm_movelh_ps(T0, T2); m[0].mSimd = _mm_movelh_ps(T0, T2);
vec[1].mVec128 = _mm_movehl_ps(T2, T0); m[1].mSimd = _mm_movehl_ps(T2, T0);
vec[2].mVec128 = _mm_movelh_ps(T1, T3); m[2].mSimd = _mm_movelh_ps(T1, T3);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
m[0].mSimd = T0.val[0];
m[1].mSimd = T0.val[1];
m[2].mSimd = T1.val[0];
#else #else
float tmp; float tmp;
@ -58,15 +61,22 @@ void CMatrix3f::transpose()
#endif #endif
} }
CMatrix3f CMatrix3f::transposed() const CMatrix3f CMatrix3f::transposed() const {
{
#if __SSE__ #if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); __m128 zero = _mm_xor_ps(m[0].mSimd.native(), m[0].mSimd.native());
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); __m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); __m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero); __m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3)); return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3));
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
return CMatrix3f(T0.val[0], T0.val[1], T1.val[0]);
#else #else
CMatrix3f ret(*this); CMatrix3f ret(*this);
float tmp; float tmp;
@ -87,8 +97,7 @@ CMatrix3f CMatrix3f::transposed() const
#endif #endif
} }
CMatrix3f CMatrix3f::inverted() const CMatrix3f CMatrix3f::inverted() const {
{
float det = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - float det = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] -
m[0][2] * m[1][1] * m[2][0] - m[1][2] * m[2][1] * m[0][0] - m[2][2] * m[0][1] * m[1][0]; m[0][2] * m[1][1] * m[2][0] - m[1][2] * m[2][1] * m[0][0] - m[2][2] * m[0][1] * m[1][0];

View File

@ -9,14 +9,25 @@ CMatrix4f CMatrix4f::transposed() const
{ {
CMatrix4f ret; CMatrix4f ret;
#if __SSE__ #if __SSE__
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128); __m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), m[3].mSimd.native());
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); __m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128); __m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), m[3].mSimd.native());
ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2); ret.m[0].mSimd = _mm_movelh_ps(T0, T2);
ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0); ret.m[1].mSimd = _mm_movehl_ps(T2, T0);
ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3); ret.m[2].mSimd = _mm_movelh_ps(T1, T3);
ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1); ret.m[3].mSimd = _mm_movehl_ps(T3, T1);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
ret.m[0].mSimd = T0.val[0];
ret.m[1].mSimd = T0.val[1];
ret.m[2].mSimd = T1.val[0];
ret.m[3].mSimd = T1.val[1];
#else #else
ret.m[0][0] = m[0][0]; ret.m[0][0] = m[0][0];
ret.m[1][0] = m[0][1]; ret.m[1][0] = m[0][1];

View File

@ -1,15 +1,19 @@
#include "zeus/COBBox.hpp" #include "zeus/COBBox.hpp"
namespace zeus namespace zeus {
{
CAABox COBBox::calculateAABox(const CTransform& worldXf) const CAABox COBBox::calculateAABox(const CTransform& worldXf) const {
{
CAABox ret = CAABox::skInvertedBox; CAABox ret = CAABox::skInvertedBox;
CTransform trans = worldXf * transform; CTransform trans = worldXf * transform;
static const CVector3f basis[8] = {{1.f, 1.f, 1.f}, {1.f, 1.f, -1.f}, {1.f, -1.f, 1.f}, {1.f, -1.f, -1.f}, static const CVector3f basis[8] = {{1.f, 1.f, 1.f},
{-1.f, -1.f, -1.f}, {-1.f, -1.f, 1.f}, {-1.f, 1.f, -1.f}, {-1.f, 1.f, 1.f}}; {1.f, 1.f, -1.f},
{1.f, -1.f, 1.f},
{1.f, -1.f, -1.f},
{-1.f, -1.f, -1.f},
{-1.f, -1.f, 1.f},
{-1.f, 1.f, -1.f},
{-1.f, 1.f, 1.f}};
CVector3f p = extents * basis[0]; CVector3f p = extents * basis[0];
ret.accumulateBounds(trans * p); ret.accumulateBounds(trans * p);
p = extents * basis[1]; p = extents * basis[1];
@ -30,8 +34,7 @@ CAABox COBBox::calculateAABox(const CTransform& worldXf) const
return ret; return ret;
} }
bool COBBox::OBBIntersectsBox(const COBBox& other) const bool COBBox::OBBIntersectsBox(const COBBox& other) const {
{
CVector3f v = other.transform.origin - transform.origin; CVector3f v = other.transform.origin - transform.origin;
CVector3f T = CVector3f(v.dot(transform.basis[0]), CVector3f T = CVector3f(v.dot(transform.basis[0]),
v.dot(transform.basis[1]), v.dot(transform.basis[1]),
@ -45,8 +48,7 @@ bool COBBox::OBBIntersectsBox(const COBBox& other) const
for (int k = 0; k < 3; ++k) for (int k = 0; k < 3; ++k)
R[i][k] = transform.basis[i].dot(other.transform.basis[k]); R[i][k] = transform.basis[i].dot(other.transform.basis[k]);
for (int i = 0; i < 3; ++i) for (int i = 0; i < 3; ++i) {
{
ra = extents[i]; ra = extents[i];
rb = (other.extents[0] * std::fabs(R[i][0])) + rb = (other.extents[0] * std::fabs(R[i][0])) +
(other.extents[1] * std::fabs(R[i][1])) + (other.extents[1] * std::fabs(R[i][1])) +
@ -57,8 +59,7 @@ bool COBBox::OBBIntersectsBox(const COBBox& other) const
return false; return false;
} }
for (int k = 0; k < 3; ++k) for (int k = 0; k < 3; ++k) {
{
ra = (extents[0] * std::fabs(R[0][k])) + ra = (extents[0] * std::fabs(R[0][k])) +
(extents[1] * std::fabs(R[1][k])) + (extents[1] * std::fabs(R[1][k])) +
(extents[2] * std::fabs(R[2][k])); (extents[2] * std::fabs(R[2][k]));

View File

@ -1,14 +1,12 @@
#include "zeus/CPlane.hpp" #include "zeus/CPlane.hpp"
namespace zeus namespace zeus {
{
bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const {
{
zeus::CVector3f delta = to - from; zeus::CVector3f delta = to - from;
if (std::fabs(delta.normalized().dot(vec)) < 0.01f) if (std::fabs(delta.normalized().dot(normal())) < 0.01f)
return false; return false;
float tmp = -pointToPlaneDist(from) / delta.dot(vec); float tmp = -pointToPlaneDist(from) / delta.dot(normal());
if (tmp < -0.f || tmp > 1.0001f) if (tmp < -0.f || tmp > 1.0001f)
return false; return false;
point = delta * tmp + from; point = delta * tmp + from;

View File

@ -2,13 +2,10 @@
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include <cassert> #include <cassert>
namespace zeus namespace zeus {
{ void CProjection::_updateCachedMatrix() {
void CProjection::_updateCachedMatrix()
{
assert(m_projType == EProjType::Orthographic || m_projType == EProjType::Perspective); assert(m_projType == EProjType::Orthographic || m_projType == EProjType::Perspective);
if (m_projType == EProjType::Orthographic) if (m_projType == EProjType::Orthographic) {
{
float tmp; float tmp;
tmp = 1.0f / (m_ortho.right - m_ortho.left); tmp = 1.0f / (m_ortho.right - m_ortho.left);
@ -33,9 +30,7 @@ void CProjection::_updateCachedMatrix()
m_mtx.m[1][3] = 0.0f; m_mtx.m[1][3] = 0.0f;
m_mtx.m[2][3] = 0.0f; m_mtx.m[2][3] = 0.0f;
m_mtx.m[3][3] = 1.0f; m_mtx.m[3][3] = 1.0f;
} } else if (m_projType == EProjType::Perspective) {
else if (m_projType == EProjType::Perspective)
{
float tfov = std::tan(m_persp.fov * 0.5f); float tfov = std::tan(m_persp.fov * 0.5f);
float top = m_persp.znear * tfov; float top = m_persp.znear * tfov;
float bottom = -top; float bottom = -top;

View File

@ -1,262 +1,212 @@
#include "zeus/CQuaternion.hpp" #include "zeus/CQuaternion.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{
const CQuaternion CQuaternion::skNoRotation; const CQuaternion CQuaternion::skNoRotation;
CQuaternion::CQuaternion(const CMatrix3f& mat) CQuaternion::CQuaternion(const CMatrix3f& mat) {
{
float trace = mat[0][0] + mat[1][1] + mat[2][2]; float trace = mat[0][0] + mat[1][1] + mat[2][2];
if (trace >= 0.f) if (trace >= 0.f) {
{
float st = std::sqrt(trace + 1.0f); float st = std::sqrt(trace + 1.0f);
float s = 0.5f / st; float s = 0.5f / st;
w = 0.5f * st; w() = 0.5f * st;
x = (mat[1][2] - mat[2][1]) * s; x() = (mat[1][2] - mat[2][1]) * s;
y = (mat[2][0] - mat[0][2]) * s; y() = (mat[2][0] - mat[0][2]) * s;
z = (mat[0][1] - mat[1][0]) * s; z() = (mat[0][1] - mat[1][0]) * s;
} } else {
else
{
int idx = 0; int idx = 0;
if (mat[1][1] > mat[0][0]) if (mat[1][1] > mat[0][0]) {
{
idx = 1; idx = 1;
if (mat[2][2] > mat[1][1]) if (mat[2][2] > mat[1][1])
idx = 2; idx = 2;
} } else if (mat[2][2] > mat[0][0]) {
else if (mat[2][2] > mat[0][0])
{
idx = 2; idx = 2;
} }
switch (idx) switch (idx) {
{ case 0: {
case 0:
{
float st = std::sqrt(mat[0][0] - (mat[1][1] + mat[2][2]) + 1.f); float st = std::sqrt(mat[0][0] - (mat[1][1] + mat[2][2]) + 1.f);
float s = 0.5f / st; float s = 0.5f / st;
w = (mat[1][2] - mat[2][1]) * s; w() = (mat[1][2] - mat[2][1]) * s;
x = 0.5f * st; x() = 0.5f * st;
y = (mat[1][0] + mat[0][1]) * s; y() = (mat[1][0] + mat[0][1]) * s;
z = (mat[2][0] + mat[0][2]) * s; z() = (mat[2][0] + mat[0][2]) * s;
break; break;
} }
case 1: case 1: {
{
float st = std::sqrt(mat[1][1] - (mat[2][2] + mat[0][0]) + 1.f); float st = std::sqrt(mat[1][1] - (mat[2][2] + mat[0][0]) + 1.f);
float s = 0.5f / st; float s = 0.5f / st;
w = (mat[2][0] - mat[0][2]) * s; w() = (mat[2][0] - mat[0][2]) * s;
x = (mat[1][0] + mat[0][1]) * s; x() = (mat[1][0] + mat[0][1]) * s;
y = 0.5f * st; y() = 0.5f * st;
z = (mat[2][1] + mat[1][2]) * s; z() = (mat[2][1] + mat[1][2]) * s;
break; break;
} }
case 2: case 2: {
{
float st = std::sqrt(mat[2][2] - (mat[0][0] + mat[1][1]) + 1.f); float st = std::sqrt(mat[2][2] - (mat[0][0] + mat[1][1]) + 1.f);
float s = 0.5f / st; float s = 0.5f / st;
w = (mat[0][1] - mat[1][0]) * s; w() = (mat[0][1] - mat[1][0]) * s;
x = (mat[2][0] + mat[0][2]) * s; x() = (mat[2][0] + mat[0][2]) * s;
y = (mat[2][1] + mat[1][2]) * s; y() = (mat[2][1] + mat[1][2]) * s;
z = 0.5f * st; z() = 0.5f * st;
break; break;
} }
default: default:
w = 0.f; w() = 0.f;
x = 0.f; x() = 0.f;
y = 0.f; y() = 0.f;
z = 0.f; z() = 0.f;
break; break;
} }
} }
} }
void CQuaternion::fromVector3f(const CVector3f& vec) void CQuaternion::fromVector3f(const CVector3f& vec) {
{ float cosX = std::cos(0.5f * vec.x());
float cosX = std::cos(0.5f * vec.x); float cosY = std::cos(0.5f * vec.y());
float cosY = std::cos(0.5f * vec.y); float cosZ = std::cos(0.5f * vec.z());
float cosZ = std::cos(0.5f * vec.z);
float sinX = std::sin(0.5f * vec.x); float sinX = std::sin(0.5f * vec.x());
float sinY = std::sin(0.5f * vec.y); float sinY = std::sin(0.5f * vec.y());
float sinZ = std::sin(0.5f * vec.z); float sinZ = std::sin(0.5f * vec.z());
w = cosZ * cosY * cosX + sinZ * sinY * sinX; simd_floats f;
x = cosZ * cosY * sinX - sinZ * sinY * cosX; f[0] = cosZ * cosY * cosX + sinZ * sinY * sinX;
y = cosZ * sinY * cosX + sinZ * cosY * sinX; f[1] = cosZ * cosY * sinX - sinZ * sinY * cosX;
z = sinZ * cosY * cosX - cosZ * sinY * sinX; f[2] = cosZ * sinY * cosX + sinZ * cosY * sinX;
f[3] = sinZ * cosY * cosX - cosZ * sinY * sinX;
mSimd.copy_from(f);
} }
CQuaternion& CQuaternion::operator=(const CQuaternion& q) CQuaternion& CQuaternion::operator=(const CQuaternion& q) {
{ mSimd = q.mSimd;
#if __SSE__
mVec128 = q.mVec128;
#else
w = q.w;
x = q.x;
y = q.y;
z = q.z;
#endif
return *this; return *this;
} }
CQuaternion CQuaternion::operator+(const CQuaternion& q) const { return CQuaternion(w + q.w, x + q.x, y + q.y, z + q.z); } CQuaternion CQuaternion::operator+(const CQuaternion& q) const {
return mSimd + q.mSimd;
CQuaternion CQuaternion::operator-(const CQuaternion& q) const { return CQuaternion(w - q.w, x - q.x, y - q.y, z - q.z); }
CQuaternion CQuaternion::operator*(const CQuaternion& q) const
{
return CQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
} }
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const CQuaternion CQuaternion::operator-(const CQuaternion& q) const {
{ return mSimd - q.mSimd;
return CNUQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
} }
CQuaternion CQuaternion::operator/(const CQuaternion& q) const CQuaternion CQuaternion::operator*(const CQuaternion& q) const {
{ return CQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
y() * q.z() - z() * q.y() + w() * q.x() + x() * q.w(),
z() * q.x() - x() * q.z() + w() * q.y() + y() * q.w(),
x() * q.y() - y() * q.x() + w() * q.z() + z() * q.w());
}
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const {
return CNUQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
y() * q.z() - z() * q.y() + w() * q.x() + x() * q.w(),
z() * q.x() - x() * q.z() + w() * q.y() + y() * q.w(),
x() * q.y() - y() * q.x() + w() * q.z() + z() * q.w());
}
CQuaternion CQuaternion::operator/(const CQuaternion& q) const {
CQuaternion p(q); CQuaternion p(q);
p.invert(); p.invert();
return *this * p; return *this * p;
} }
CQuaternion CQuaternion::operator*(float scale) const { return CQuaternion(w * scale, x * scale, y * scale, z * scale); } CQuaternion CQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
}
CNUQuaternion CNUQuaternion::operator*(float scale) const { return CNUQuaternion(w * scale, x * scale, y * scale, z * scale); } CNUQuaternion CNUQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
}
CQuaternion CQuaternion::operator/(float scale) const { return CQuaternion(w / scale, x / scale, y / scale, z / scale); } CQuaternion CQuaternion::operator/(float scale) const {
return mSimd / simd<float>(scale);
}
CQuaternion CQuaternion::operator-() const { return CQuaternion(-w, -x, -y, -z); } CQuaternion CQuaternion::operator-() const { return -mSimd; }
const CQuaternion& CQuaternion::operator+=(const CQuaternion& q) const CQuaternion& CQuaternion::operator+=(const CQuaternion& q) {
{ mSimd += q.mSimd;
w += q.w;
x += q.x;
y += q.y;
z += q.z;
return *this; return *this;
} }
const CNUQuaternion& CNUQuaternion::operator+=(const CNUQuaternion& q) const CNUQuaternion& CNUQuaternion::operator+=(const CNUQuaternion& q) {
{ mSimd += q.mSimd;
w += q.w;
x += q.x;
y += q.y;
z += q.z;
return *this; return *this;
} }
const CQuaternion& CQuaternion::operator-=(const CQuaternion& q) const CQuaternion& CQuaternion::operator-=(const CQuaternion& q) {
{ mSimd -= q.mSimd;
w -= q.w;
x -= q.x;
y -= q.y;
z -= q.z;
return *this; return *this;
} }
const CQuaternion& CQuaternion::operator*=(const CQuaternion& q) const CQuaternion& CQuaternion::operator*=(const CQuaternion& q) {
{
CQuaternion orig = *this; CQuaternion orig = *this;
w = orig.w * q.w - CVector3f(orig.x, orig.y, orig.z).dot({q.x, q.y, q.z}); w() = orig.w() * q.w() - CVector3f(orig.x(), orig.y(), orig.z()).dot({q.x(), q.y(), q.z()});
x = orig.y * q.z - orig.z * q.y + orig.w * q.x + orig.x * q.w; x() = orig.y() * q.z() - orig.z() * q.y() + orig.w() * q.x() + orig.x() * q.w();
y = orig.z * q.x - orig.x * q.z + orig.w * q.y + orig.y * q.w; y() = orig.z() * q.x() - orig.x() * q.z() + orig.w() * q.y() + orig.y() * q.w();
z = orig.x * q.y - orig.y * q.x + orig.w * q.z + orig.z * q.w; z() = orig.x() * q.y() - orig.y() * q.x() + orig.w() * q.z() + orig.z() * q.w();
return *this; return *this;
} }
const CQuaternion& CQuaternion::operator*=(float scale) const CQuaternion& CQuaternion::operator*=(float scale) {
{ mSimd *= simd<float>(scale);
w *= scale;
x *= scale;
y *= scale;
z *= scale;
return *this; return *this;
} }
const CQuaternion& CQuaternion::operator/=(float scale) const CQuaternion& CQuaternion::operator/=(float scale) {
{ mSimd /= simd<float>(scale);
w /= scale;
x /= scale;
y /= scale;
z /= scale;
return *this; return *this;
} }
void CQuaternion::invert() static const simd<float> InvertQuat(1.f, -1.f, -1.f, -1.f);
{
x = -x; void CQuaternion::invert() {
y = -y; mSimd *= InvertQuat;
z = -z;
} }
CQuaternion CQuaternion::inverse() const { return CQuaternion(w, -x, -y, -z); } CQuaternion CQuaternion::inverse() const { return mSimd * InvertQuat; }
CQuaternion CQuaternion::log() const CQuaternion CQuaternion::log() const {
{ float a = std::acos(w());
float a = std::acos(w);
float sina = std::sin(a); float sina = std::sin(a);
CQuaternion ret; CQuaternion ret;
ret.w = 0.f;
if (sina > 0.f) if (sina > 0.f)
{ ret = a * *this / sina;
ret.x = a * x / sina;
ret.y = a * y / sina;
ret.z = a * z / sina;
}
else else
{ ret = simd<float>(0.f);
ret.x = 0.f;
ret.y = 0.f; ret.w() = 0.f;
ret.z = 0.f;
}
return ret; return ret;
} }
CQuaternion CQuaternion::exp() const CQuaternion CQuaternion::exp() const {
{ float a = (CVector3f(mSimd.shuffle<1, 2, 3, 3>()).magnitude());
float a = (CVector3f(x, y, z).magnitude());
float sina = std::sin(a); float sina = std::sin(a);
float cosa = std::cos(a); float cosa = std::cos(a);
CQuaternion ret; CQuaternion ret;
ret.w = cosa;
if (a > 0.f) if (a > 0.f)
{ ret = sina * *this / a;
ret.x = sina * x / a;
ret.y = sina * y / a;
ret.z = sina * z / a;
}
else else
{ ret = simd<float>(0.f);
ret.x = 0.f;
ret.y = 0.f; ret.w() = cosa;
ret.z = 0.f;
}
return ret; return ret;
} }
CQuaternion CQuaternion::lerp(const CQuaternion& a, const CQuaternion& b, double t) { return (a + t * (b - a)); } CQuaternion CQuaternion::lerp(const CQuaternion& a, const CQuaternion& b, double t) { return (a + t * (b - a)); }
CQuaternion CQuaternion::nlerp(const CQuaternion& a, const CQuaternion& b, double t) { return lerp(a, b, t).normalized(); } CQuaternion CQuaternion::nlerp(const CQuaternion& a, const CQuaternion& b, double t) {
return lerp(a, b, t).normalized();
}
CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, double t) CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, double t) {
{
if (t <= 0.0f) if (t <= 0.0f)
return a; return a;
if (t >= 1.0f) if (t >= 1.0f)
@ -268,8 +218,7 @@ CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, doubl
float prod = a.dot(b) / mag; float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f) if (std::fabs(prod) < 1.0f) {
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f; const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod); const double theta = std::acos(sign * prod);
@ -277,18 +226,14 @@ CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, doubl
const double d = 1.0 / std::sin(theta); const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0 - t) * theta); const double s0 = std::sin((1.0 - t) * theta);
ret.x = float((a.x * s0 + b.x * s1) * d); ret = (a * s0 + b * s1) * d;
ret.y = float((a.y * s0 + b.y * s1) * d);
ret.z = float((a.z * s0 + b.z * s1) * d);
ret.w = float((a.w * s0 + b.w * s1) * d);
return ret; return ret;
} }
return a; return a;
} }
CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1) CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1) {
{
CVector3f v0N = v0; CVector3f v0N = v0;
CVector3f v1N = v1; CVector3f v1N = v1;
@ -299,92 +244,78 @@ CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const ze
CVector3f cross = v0N.cross(v1N); CVector3f cross = v0N.cross(v1N);
if (cross.magSquared() < 0.001f) if (cross.magSquared() < 0.001f) {
{
if (v0N.dot(v1N) > 0.f) if (v0N.dot(v1N) > 0.f)
return CQuaternion::skNoRotation; return CQuaternion::skNoRotation;
if (cross.canBeNormalized()) if (cross.canBeNormalized())
return CQuaternion(0.0f, cross.normalized()); return CQuaternion(0.0f, cross.normalized());
return CQuaternion::skNoRotation; return CQuaternion::skNoRotation;
} } else {
else
{
float w = std::sqrt((1.f + zeus::clamp(-1.f, v0N.dot(v1N), 1.f)) * 2.f); float w = std::sqrt((1.f + zeus::clamp(-1.f, v0N.dot(v1N), 1.f)) * 2.f);
return CQuaternion(0.5f * w, cross * (1.f / w)); return CQuaternion(0.5f * w, cross * (1.f / w));
} }
} }
CQuaternion CQuaternion::clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1, CQuaternion CQuaternion::clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle) const zeus::CRelAngle& angle) {
{
CQuaternion arc = shortestRotationArc(v0, v1); CQuaternion arc = shortestRotationArc(v0, v1);
if (angle >= 2.f * std::acos(arc.w)) if (angle >= 2.f * std::acos(arc.w()))
return arc; return arc;
return fromAxisAngle(arc.getImaginary(), angle); return fromAxisAngle(arc.getImaginary(), angle);
} }
CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b, double t) CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b, double t) {
{
return zeus::CQuaternion::slerp((b.dot(a) >= 0.f) ? a : a.buildEquivalent(), b, t); return zeus::CQuaternion::slerp((b.dot(a) >= 0.f) ? a : a.buildEquivalent(), b, t);
} }
CQuaternion operator+(float lhs, const CQuaternion& rhs) CQuaternion operator+(float lhs, const CQuaternion& rhs) {
{ return simd<float>(lhs) + rhs.mSimd;
return CQuaternion(lhs + rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
} }
CQuaternion operator-(float lhs, const CQuaternion& rhs) CQuaternion operator-(float lhs, const CQuaternion& rhs) {
{ return simd<float>(lhs) - rhs.mSimd;
return CQuaternion(lhs - rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
} }
CQuaternion operator*(float lhs, const CQuaternion& rhs) CQuaternion operator*(float lhs, const CQuaternion& rhs) {
{ return simd<float>(lhs) * rhs.mSimd;
return CQuaternion(lhs * rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
} }
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs) CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs) {
{ return simd<float>(lhs) * rhs.mSimd;
return CNUQuaternion(lhs * rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
} }
CQuaternion CQuaternion::buildEquivalent() const CQuaternion CQuaternion::buildEquivalent() const {
{ float tmp = std::acos(clamp(-1.f, w(), 1.f)) * 2.f;
float tmp = std::acos(clamp(-1.f, w, 1.f)) * 2.0;
if (std::fabs(tmp) < 1.0e-7) if (std::fabs(tmp) < 1.0e-7)
return {-1.f, 0.f, 0.f, 0.f}; return {-1.f, 0.f, 0.f, 0.f};
else else
return CQuaternion::fromAxisAngle(CUnitVector3f(x, y, z), tmp + 2.0 * M_PI); return CQuaternion::fromAxisAngle(CUnitVector3f(mSimd.shuffle<1, 2, 3, 3>()), tmp + 2.0 * M_PI);
} }
CRelAngle CQuaternion::angleFrom(const zeus::CQuaternion& other) CRelAngle CQuaternion::angleFrom(const zeus::CQuaternion& other) {
{
return std::acos(zeus::clamp(-1.f, dot(other), 1.f)); return std::acos(zeus::clamp(-1.f, dot(other), 1.f));
} }
CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng) CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng) {
{
CQuaternion q = skNoRotation; CQuaternion q = skNoRotation;
zeus::CVector3f destNoZ = dest; zeus::CVector3f destNoZ = dest;
zeus::CVector3f sourceNoZ = source; zeus::CVector3f sourceNoZ = source;
destNoZ.z = 0.f; destNoZ.z() = 0.f;
sourceNoZ.z = 0.f; sourceNoZ.z() = 0.f;
zeus::CVector3f tmp; zeus::CVector3f tmp;
if (sourceNoZ.magSquared() > 0.0001f && destNoZ.magSquared() > 0.0001f) if (sourceNoZ.magSquared() > 0.0001f && destNoZ.magSquared() > 0.0001f) {
{
sourceNoZ.normalize(); sourceNoZ.normalize();
destNoZ.normalize(); destNoZ.normalize();
float angleBetween = float angleBetween =
normalize_angle(std::atan2(destNoZ.x, destNoZ.y) - std::atan2(sourceNoZ.x, sourceNoZ.y)); normalize_angle(std::atan2(destNoZ.x(), destNoZ.y()) - std::atan2(sourceNoZ.x(), sourceNoZ.y()));
float realAngle = zeus::clamp(-maxAng.asRadians(), angleBetween, maxAng.asRadians()); float realAngle = zeus::clamp(-maxAng.asRadians(), angleBetween, maxAng.asRadians());
CQuaternion tmpQ; CQuaternion tmpQ;
tmpQ.rotateZ(-realAngle); tmpQ.rotateZ(-realAngle);
q = tmpQ; q = tmpQ;
tmp = q.transform(sourceNoZ); tmp = q.transform(sourceNoZ);
} } else if (sourceNoZ.magSquared() > 0.0001f)
else if (sourceNoZ.magSquared() > 0.0001f)
tmp = sourceNoZ.normalized(); tmp = sourceNoZ.normalized();
else if (destNoZ.magSquared() > 0.0001f) else if (destNoZ.magSquared() > 0.0001f)
tmp = destNoZ.normalized(); tmp = destNoZ.normalized();
@ -392,7 +323,7 @@ CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f
return skNoRotation; return skNoRotation;
float realAngle = float realAngle =
zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z) - std::acos(source.z)), maxAng.asRadians()); zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z()) - std::acos(source.z())), maxAng.asRadians());
return CQuaternion::fromAxisAngle(tmp.cross(CVector3f::skUp), -realAngle) * q; return CQuaternion::fromAxisAngle(tmp.cross(CVector3f::skUp), -realAngle) * q;
} }

View File

@ -1,9 +1,7 @@
#include "zeus/CTransform.hpp" #include "zeus/CTransform.hpp"
namespace zeus namespace zeus {
{ CTransform CTransformFromEditorEuler(const CVector3f& eulerVec) {
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec)
{
CTransform result; CTransform result;
double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss; double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
@ -36,8 +34,7 @@ CTransform CTransformFromEditorEuler(const CVector3f& eulerVec)
return result; return result;
} }
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle) CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle) {
{
CTransform result; CTransform result;
CVector3f axisN = axis.normalized(); CVector3f axisN = axis.normalized();
@ -45,23 +42,22 @@ CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle)
float s = std::sin(angle); float s = std::sin(angle);
float t = 1.f - c; float t = 1.f - c;
result.basis.m[0][0] = t * axisN.v[0] * axisN.v[0] + c; result.basis.m[0][0] = t * axisN[0] * axisN[0] + c;
result.basis.m[1][0] = t * axisN.v[0] * axisN.v[1] - axisN.v[2] * s; result.basis.m[1][0] = t * axisN[0] * axisN[1] - axisN[2] * s;
result.basis.m[2][0] = t * axisN.v[0] * axisN.v[2] + axisN.v[1] * s; result.basis.m[2][0] = t * axisN[0] * axisN[2] + axisN[1] * s;
result.basis.m[0][1] = t * axisN.v[0] * axisN.v[1] + axisN.v[2] * s; result.basis.m[0][1] = t * axisN[0] * axisN[1] + axisN[2] * s;
result.basis.m[1][1] = t * axisN.v[1] * axisN.v[1] + c; result.basis.m[1][1] = t * axisN[1] * axisN[1] + c;
result.basis.m[2][1] = t * axisN.v[1] * axisN.v[2] - axisN.v[0] * s; result.basis.m[2][1] = t * axisN[1] * axisN[2] - axisN[0] * s;
result.basis.m[0][2] = t * axisN.v[0] * axisN.v[2] - axisN.v[1] * s; result.basis.m[0][2] = t * axisN[0] * axisN[2] - axisN[1] * s;
result.basis.m[1][2] = t * axisN.v[1] * axisN.v[2] + axisN.v[0] * s; result.basis.m[1][2] = t * axisN[1] * axisN[2] + axisN[0] * s;
result.basis.m[2][2] = t * axisN.v[2] * axisN.v[2] + c; result.basis.m[2][2] = t * axisN[2] * axisN[2] + c;
return result; return result;
} }
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin) CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin) {
{
CTransform ret = CTransformFromEditorEuler(eulerVec); CTransform ret = CTransformFromEditorEuler(eulerVec);
ret.origin = origin; ret.origin = origin;
return ret; return ret;

View File

@ -4,14 +4,12 @@
#include <cassert> #include <cassert>
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{
const CVector2f CVector2f::skOne = CVector2f(1.0); const CVector2f CVector2f::skOne = CVector2f(1.0);
const CVector2f CVector2f::skNegOne = CVector2f(-1.0); const CVector2f CVector2f::skNegOne = CVector2f(-1.0);
const CVector2f CVector2f::skZero(0.f, 0.f); const CVector2f CVector2f::skZero(0.f, 0.f);
float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b) float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b) {
{
float mag1 = a.magnitude(); float mag1 = a.magnitude();
float mag2 = b.magnitude(); float mag2 = b.magnitude();
@ -23,8 +21,7 @@ float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b)
return theta; return theta;
} }
CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t) CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t) {
{
if (t <= 0.0f) if (t <= 0.0f)
return a; return a;
if (t >= 1.0f) if (t >= 1.0f)
@ -36,8 +33,7 @@ CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t)
float prod = a.dot(b) / mag; float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f) if (std::fabs(prod) < 1.0f) {
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f; const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod); const double theta = std::acos(sign * prod);

View File

@ -5,8 +5,7 @@
#include <cassert> #include <cassert>
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{
const CVector3f CVector3f::skOne(1.f); const CVector3f CVector3f::skOne(1.f);
const CVector3f CVector3f::skNegOne(-1.f); const CVector3f CVector3f::skNegOne(-1.f);
const CVector3f CVector3f::skZero; const CVector3f CVector3f::skZero;
@ -20,20 +19,9 @@ const CVector3f CVector3f::skRadToDegVec(180.0f / M_PIF);
const CVector3f CVector3f::skDegToRadVec(M_PIF / 180.0f); const CVector3f CVector3f::skDegToRadVec(M_PIF / 180.0f);
const CVector3d CVector3d::skZero(0.0, 0.0, 0.0); const CVector3d CVector3d::skZero(0.0, 0.0, 0.0);
CVector3f::CVector3f(const CVector3d& vec) CVector3f::CVector3f(const CVector3d& vec) : mSimd(vec.mSimd) {}
{
#if __SSE__
mVec128 = _mm_cvtpd_ps(vec.mVec128[0]);
v[2] = vec.v[2];
#else
v[0] = vec.v[0];
v[1] = vec.v[1];
v[2] = vec.v[2];
#endif
}
float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b) float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b) {
{
float mag1 = a.magnitude(); float mag1 = a.magnitude();
float mag2 = b.magnitude(); float mag2 = b.magnitude();
@ -45,8 +33,7 @@ float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b)
return theta; return theta;
} }
CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t) CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t) {
{
if (t <= 0.0f) if (t <= 0.0f)
return a; return a;
if (t >= 1.0f) if (t >= 1.0f)
@ -58,8 +45,7 @@ CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t)
float prod = a.dot(b) / mag; float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f) if (std::fabs(prod) < 1.0f) {
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f; const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = acos(sign * prod); const double theta = acos(sign * prod);
@ -67,9 +53,7 @@ CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t)
const double d = 1.0 / sin(theta); const double d = 1.0 / sin(theta);
const double s0 = sin((1.0 - t) * theta); const double s0 = sin((1.0 - t) * theta);
ret.x = (float)(a.x * s0 + b.x * s1) * d; ret = (a * s0 + b * s1) * d;
ret.y = (float)(a.y * s0 + b.y * s1) * d;
ret.z = (float)(a.z * s0 + b.z * s1) * d;
return ret; return ret;
} }

View File

@ -1,19 +1,13 @@
#include "zeus/CVector4f.hpp" #include "zeus/CVector4f.hpp"
#include "zeus/CColor.hpp" #include "zeus/CColor.hpp"
namespace zeus namespace zeus {
{
const CVector4f CVector4f::skZero(0.f, 0.f, 0.f, 0.f); const CVector4f CVector4f::skZero(0.f, 0.f, 0.f, 0.f);
CVector4f::CVector4f(const zeus::CColor& other) : x(other.r), y(other.g), z(other.b), w(other.a) {} CVector4f::CVector4f(const zeus::CColor& other) : mSimd(other.mSimd) {}
CVector4f& CVector4f::operator=(const CColor& other)
{
x = other.r;
y = other.g;
z = other.b;
w = other.a;
CVector4f& CVector4f::operator=(const CColor& other) {
mSimd = other.mSimd;
return *this; return *this;
} }
} }

View File

@ -2,21 +2,22 @@
#include "zeus/CTransform.hpp" #include "zeus/CTransform.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#include "zeus/CVector2f.hpp" #include "zeus/CVector2f.hpp"
#if _WIN32 #if _WIN32
#include <intrin.h> #include <intrin.h>
#else #else
#include <cpuid.h> #include <cpuid.h>
#endif #endif
namespace zeus namespace zeus {
{
static bool isCPUInit = false; static bool isCPUInit = false;
static CPUInfo g_cpuFeatures = {}; static CPUInfo g_cpuFeatures = {};
static CPUInfo g_missingFeatures = {}; static CPUInfo g_missingFeatures = {};
void getCpuInfo(int eax, int regs[4]) void getCpuInfo(int eax, int regs[4]) {
{
#if !GEKKO #if !GEKKO
#if _WIN32 #if _WIN32
__cpuid(regs, eax); __cpuid(regs, eax);
@ -26,8 +27,7 @@ void getCpuInfo(int eax, int regs[4])
#endif #endif
} }
void getCpuInfoEx(int eax, int ecx, int regs[4]) void getCpuInfoEx(int eax, int ecx, int regs[4]) {
{
#if !GEKKO #if !GEKKO
#if _WIN32 #if _WIN32
__cpuidex(regs, eax, ecx); __cpuidex(regs, eax, ecx);
@ -37,8 +37,7 @@ void getCpuInfoEx(int eax, int ecx, int regs[4])
#endif #endif
} }
void detectCPU() void detectCPU() {
{
#if !GEKKO #if !GEKKO
if (isCPUInit) if (isCPUInit)
return; return;
@ -46,65 +45,61 @@ void detectCPU()
int regs[4]; int regs[4];
getCpuInfo(0, regs); getCpuInfo(0, regs);
int highestFeature = regs[0]; int highestFeature = regs[0];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor) = regs[1]; *reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor) = regs[1];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 4) = regs[3]; *reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 4) = regs[3];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 8) = regs[2]; *reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 8) = regs[2];
getCpuInfo(0x80000000, regs); getCpuInfo(0x80000000, regs);
if (regs[0] >= 0x80000004) if (regs[0] >= 0x80000004) {
{ for (unsigned int i = 0x80000002; i <= 0x80000004; i++) {
for (unsigned int i = 0x80000002; i <= 0x80000004; i++)
{
getCpuInfo(i, regs); getCpuInfo(i, regs);
// Interpret CPU brand string and cache information. // Interpret CPU brand string and cache information.
if (i == 0x80000002) if (i == 0x80000002)
memcpy((char*)g_cpuFeatures.cpuBrand, regs, sizeof(regs)); memcpy((char*) g_cpuFeatures.cpuBrand, regs, sizeof(regs));
else if (i == 0x80000003) else if (i == 0x80000003)
memcpy((char*)g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs)); memcpy((char*) g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs));
else if (i == 0x80000004) else if (i == 0x80000004)
memcpy((char*)g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs)); memcpy((char*) g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
} }
} }
if (highestFeature >= 1) if (highestFeature >= 1) {
{
getCpuInfo(1, regs); getCpuInfo(1, regs);
memset((bool*)&g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1); memset((bool*) &g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1); memset((bool*) &g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1); memset((bool*) &g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1); memset((bool*) &g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1);
memset((bool*)&g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1); memset((bool*) &g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1); memset((bool*) &g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1); memset((bool*) &g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1);
memset((bool*)&g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1); memset((bool*) &g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1);
} }
if (highestFeature >= 7) if (highestFeature >= 7) {
{
getCpuInfoEx(7, 0, regs); getCpuInfoEx(7, 0, regs);
memset((bool*)&g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1); memset((bool*) &g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1);
} }
isCPUInit = true; isCPUInit = true;
#endif #endif
} }
const CPUInfo& cpuFeatures() { detectCPU(); return g_cpuFeatures; } const CPUInfo& cpuFeatures() {
detectCPU();
return g_cpuFeatures;
}
std::pair<bool, const CPUInfo&> validateCPU() std::pair<bool, const CPUInfo&> validateCPU() {
{
detectCPU(); detectCPU();
bool ret = true; bool ret = true;
#if __AVX2__ #if __AVX2__
if (!g_cpuFeatures.AVX2) if (!g_cpuFeatures.AVX2) {
{
*(bool*) &g_missingFeatures.AVX2 = true; *(bool*) &g_missingFeatures.AVX2 = true;
ret = false; ret = false;
} }
#endif #endif
#if __AVX__ #if __AVX__
if (!g_cpuFeatures.AVX) if (!g_cpuFeatures.AVX) {
{
*(bool*) &g_missingFeatures.AVX = true; *(bool*) &g_missingFeatures.AVX = true;
ret = false; ret = false;
} }
@ -117,43 +112,37 @@ std::pair<bool, const CPUInfo&> validateCPU()
} }
#endif #endif
#if __SSE4_2__ #if __SSE4_2__
if (!g_cpuFeatures.SSE42) if (!g_cpuFeatures.SSE42) {
{
*(bool*) &g_missingFeatures.SSE42 = true; *(bool*) &g_missingFeatures.SSE42 = true;
ret = false; ret = false;
} }
#endif #endif
#if __SSE4_1__ #if __SSE4_1__
if (!g_cpuFeatures.SSE41) if (!g_cpuFeatures.SSE41) {
{
*(bool*) &g_missingFeatures.SSE41 = true; *(bool*) &g_missingFeatures.SSE41 = true;
ret = false; ret = false;
} }
#endif #endif
#if __SSSE3__ #if __SSSE3__
if (!g_cpuFeatures.SSSE3) if (!g_cpuFeatures.SSSE3) {
{
*(bool*) &g_missingFeatures.SSSE3 = true; *(bool*) &g_missingFeatures.SSSE3 = true;
ret = false; ret = false;
} }
#endif #endif
#if __SSE3__ #if __SSE3__
if (!g_cpuFeatures.SSE3) if (!g_cpuFeatures.SSE3) {
{
*(bool*) &g_missingFeatures.SSE3 = true; *(bool*) &g_missingFeatures.SSE3 = true;
ret = false; ret = false;
} }
#endif #endif
#if __SSE2__ #if __SSE2__
if (!g_cpuFeatures.SSE2) if (!g_cpuFeatures.SSE2) {
{
*(bool*) &g_missingFeatures.SSE2 = true; *(bool*) &g_missingFeatures.SSE2 = true;
ret = false; ret = false;
} }
#endif #endif
#if __SSE__ #if __SSE__
if (!g_cpuFeatures.SSE1) if (!g_cpuFeatures.SSE1) {
{
*(bool*) &g_missingFeatures.SSE1 = true; *(bool*) &g_missingFeatures.SSE1 = true;
ret = false; ret = false;
} }
@ -162,8 +151,7 @@ std::pair<bool, const CPUInfo&> validateCPU()
return {ret, g_missingFeatures}; return {ret, g_missingFeatures};
} }
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up) CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up) {
{
CVector3f vLook, vRight, vUp; CVector3f vLook, vRight, vUp;
vLook = lookPos - pos; vLook = lookPos - pos;
@ -173,11 +161,10 @@ CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3
vLook.normalize(); vLook.normalize();
vUp = up - vLook * clamp(-1.f, up.dot(vLook), 1.f); vUp = up - vLook * clamp(-1.f, up.dot(vLook), 1.f);
if (vUp.magnitude() <= FLT_EPSILON) {
vUp = CVector3f(0.f, 0.f, 1.f) - vLook * vLook.z();
if (vUp.magnitude() <= FLT_EPSILON) if (vUp.magnitude() <= FLT_EPSILON)
{ vUp = CVector3f(0.f, 1.f, 0.f) - vLook * vLook.y();
vUp = CVector3f(0.f, 0.f, 1.f) - vLook * vLook.z;
if (vUp.magnitude() <= FLT_EPSILON)
vUp = CVector3f(0.f, 1.f, 0.f) - vLook * vLook.y;
} }
vUp.normalize(); vUp.normalize();
vRight = vLook.cross(vUp); vRight = vLook.cross(vUp);
@ -187,15 +174,13 @@ CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3
} }
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b,
const CVector3f& c, const CVector3f& d, float t) const CVector3f& c, const CVector3f& d, float t) {
{
const float omt = 1.f - t; const float omt = 1.f - t;
return ((a * omt + b * t) * omt + (b * omt + c * t) * t) * omt + return ((a * omt + b * t) * omt + (b * omt + c * t) * t) * omt +
((b * omt + c * t) * omt + (c * omt + d * t) * t) * t; ((b * omt + c * t) * omt + (c * omt + d * t) * t) * t;
} }
int floorPowerOfTwo(int x) int floorPowerOfTwo(int x) {
{
if (x == 0) if (x == 0)
return 0; return 0;
/* /*
@ -211,8 +196,7 @@ int floorPowerOfTwo(int x)
return x - (x >> 1); return x - (x >> 1);
} }
int ceilingPowerOfTwo(int x) int ceilingPowerOfTwo(int x) {
{
if (x == 0) if (x == 0)
return 0; return 0;
@ -227,8 +211,7 @@ int ceilingPowerOfTwo(int x)
return x; return x;
} }
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t) float getCatmullRomSplinePoint(float a, float b, float c, float d, float t) {
{
if (t <= 0.0f) if (t <= 0.0f)
return b; return b;
if (t >= 1.0f) if (t >= 1.0f)
@ -237,12 +220,13 @@ float getCatmullRomSplinePoint(float a, float b, float c, float d, float t)
const float t2 = t * t; const float t2 = t * t;
const float t3 = t2 * t; const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) + c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) + return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2)); d * (0.5f * t3 - 0.5f * t2));
} }
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) CVector3f
{ getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
if (t <= 0.0f) if (t <= 0.0f)
return b; return b;
if (t >= 1.0f) if (t >= 1.0f)
@ -251,12 +235,13 @@ CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const
const float t2 = t * t; const float t2 = t * t;
const float t3 = t2 * t; const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) + c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) + return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2)); d * (0.5f * t3 - 0.5f * t2));
} }
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) CVector3f
{ getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
if (t >= 0.0f) if (t >= 0.0f)
return b; return b;
if (t <= 1.0f) if (t <= 1.0f)
@ -282,32 +267,27 @@ CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b,
return zeus::getCatmullRomSplinePoint(b, c, bVelocity * cbDistance, cVelocity * cbDistance, t); return zeus::getCatmullRomSplinePoint(b, c, bVelocity * cbDistance, cVelocity * cbDistance, t);
} }
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary) CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary) {
{ return bary.x() * p0 + bary.y() * p1 + bary.z() * p2;
return bary.x * p0 + bary.y * p1 + bary.z * p2;
} }
bool close_enough(const CVector3f& a, const CVector3f &b, float epsilon) bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon) {
{ return std::fabs(a.x() - b.x()) < epsilon &&
if (std::fabs(a.x - b.x) < epsilon && std::fabs(a.y - b.y) < epsilon && std::fabs(a.z - b.z) < epsilon) std::fabs(a.y() - b.y()) < epsilon &&
return true; std::fabs(a.z() - b.z()) < epsilon;
return false;
} }
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon) bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon) {
{ return std::fabs(a.x() - b.x()) < epsilon && std::fabs(a.y() - b.y()) < epsilon;
if (std::fabs(a.x - b.x) < epsilon && std::fabs(a.y - b.y) < epsilon)
return true;
return false;
} }
template <> CVector3f min(const CVector3f& a, const CVector3f& b) template<>
{ CVector3f min(const CVector3f& a, const CVector3f& b) {
return {min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)}; return {min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z())};
} }
template <> CVector3f max(const CVector3f& a, const CVector3f& b) template<>
{ CVector3f max(const CVector3f& a, const CVector3f& b) {
return {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)}; return {max(a.x(), b.x()), max(a.y(), b.y()), max(a.z(), b.z())};
} }
} }

View File

@ -30,6 +30,9 @@ int main()
CAABox test2{{-100, -100, -100}, {100, 100, 100}}; CAABox test2{{-100, -100, -100}, {100, 100, 100}};
CAABox test3{{-50, -50, -50}, {50, 50, 50}}; CAABox test3{{-50, -50, -50}, {50, 50, 50}};
CAABox test4{{-50, -50, -105}, {50, 50, 105}}; CAABox test4{{-50, -50, -105}, {50, 50, 105}};
CVector2f point2(-90, 67);
CVector2f point3(-90, 67);
CVector3f point4 = point2 + point3;
CVector3f point(-90, 67, -105); CVector3f point(-90, 67, -105);
test.closestPointAlongVector(point); test.closestPointAlongVector(point);
CVector3d(100, -100, -200); CVector3d(100, -100, -200);
@ -72,7 +75,7 @@ int main()
ctest1.fromHSV(0, 255 / 255.f, .5); ctest1.fromHSV(0, 255 / 255.f, .5);
float h, s, v; float h, s, v;
ctest1.toHSV(h, s, v); ctest1.toHSV(h, s, v);
std::cout << (int)ctest1.r << " " << (int)ctest1.g << " " << (int)ctest1.b << " " << (int)ctest1.a << std::endl; std::cout << (int)ctest1.r() << " " << (int)ctest1.g() << " " << (int)ctest1.b() << " " << (int)ctest1.a() << std::endl;
std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a / 255.f) << std::endl; std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a() / 255.f) << std::endl;
return 0; return 0;
} }