SIMD refactor

This commit is contained in:
Jack Andersen 2018-12-07 15:16:50 -10:00
parent d881e58f62
commit e8dfecbb6e
49 changed files with 6047 additions and 4721 deletions

View File

@ -1,5 +1,5 @@
--- ---
IndentWidth: 4 IndentWidth: 2
ColumnLimit: 128 ColumnLimit: 128
UseTab: Never UseTab: Never
--- ---

View File

@ -40,7 +40,6 @@ add_library(zeus
include/zeus/CColor.hpp include/zeus/CColor.hpp
include/zeus/Global.hpp include/zeus/Global.hpp
include/zeus/zeus.hpp include/zeus/zeus.hpp
include/zeus/TVectorUnion.hpp
include/zeus/CVector2i.hpp include/zeus/CVector2i.hpp
include/zeus/CVector2f.hpp include/zeus/CVector2f.hpp
include/zeus/CVector3f.hpp include/zeus/CVector3f.hpp
@ -56,7 +55,11 @@ add_library(zeus
include/zeus/CSphere.hpp include/zeus/CSphere.hpp
include/zeus/CUnitVector.hpp include/zeus/CUnitVector.hpp
include/zeus/CMRay.hpp include/zeus/CMRay.hpp
include/zeus/CEulerAngles.hpp) include/zeus/CEulerAngles.hpp
include/zeus/simd/simd.hpp
include/zeus/simd/simd_sse.hpp
include/zeus/simd/simd_avx.hpp
include/zeus/simd/parallelism_v2_simd.hpp)
add_subdirectory(test) add_subdirectory(test)

View File

@ -6,412 +6,367 @@
#include "zeus/CLineSeg.hpp" #include "zeus/CLineSeg.hpp"
#include "zeus/CSphere.hpp" #include "zeus/CSphere.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp> #include <athena/IStreamReader.hpp>
#endif #endif
namespace zeus namespace zeus {
{ class CAABox {
class alignas(16) CAABox
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); enum class EBoxEdgeId {
Z0,
X0,
Z1,
X1,
Z2,
X2,
Z3,
X3,
Y0,
Y1,
Y2,
Y3
};
enum class EBoxEdgeId enum class EBoxFaceID {
{ };
Z0,
X0,
Z1,
X1,
Z2,
X2,
Z3,
X3,
Y0,
Y1,
Y2,
Y3
};
enum class EBoxFaceID static const CAABox skInvertedBox;
{ static const CAABox skNullBox;
};
static const CAABox skInvertedBox; CVector3f min;
static const CAABox skNullBox; CVector3f max;
CVector3f min; // set default AABox to insane inverse min/max to allow for accumulation
CVector3f max; CAABox() : CAABox(1e16f, -1e16f) {}
// set default AABox to insane inverse min/max to allow for accumulation CAABox(const CVector3f& min, const CVector3f& max) : min(min), max(max) {}
CAABox() : CAABox(1e16f, -1e16f) {}
CAABox(const CVector3f& min, const CVector3f& max) : min(min), max(max) {} CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {}
CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {} CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ) {
}
CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ)
{
}
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
inline void readBoundingBoxBig(athena::io::IStreamReader& in)
{ void readBoundingBoxBig(athena::io::IStreamReader& in) {
min.readBig(in); min.readBig(in);
max.readBig(in); max.readBig(in);
} }
static inline CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in)
{ static CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in) {
CAABox ret; CAABox ret;
ret.readBoundingBoxBig(in); ret.readBoundingBoxBig(in);
return ret; return ret;
} }
#endif #endif
float distanceFromPointSquared(const CVector3f& other) const float distanceFromPointSquared(const CVector3f& other) const {
{ float dist = 0;
float dist = 0; for (int i = 0; i < 3; i++) {
for (int i = 0; i < 3; i++) if (other[i] < min[i]) {
{ const float tmp = (min[i] - other[i]);
if (other[i] < min[i]) dist += tmp * tmp;
{ } else if (other[i] > max[i]) {
const float tmp = (min[i] - other[i]); const float tmp = (other[i] - max[i]);
dist += tmp * tmp; dist += tmp * tmp;
} }
else if (other[i] > max[i])
{
const float tmp = (other[i] - max[i]);
dist += tmp * tmp;
}
}
return dist;
} }
float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); } return dist;
}
inline bool intersects(const CAABox& other) const float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); }
{
bool x1 = (max[0] >= other.min[0]); bool intersects(const CAABox& other) const {
bool x2 = (min[0] <= other.max[0]); bool x1 = (max[0] >= other.min[0]);
bool y1 = (max[1] >= other.min[1]); bool x2 = (min[0] <= other.max[0]);
bool y2 = (min[1] <= other.max[1]); bool y1 = (max[1] >= other.min[1]);
bool z1 = (max[2] >= other.min[2]); bool y2 = (min[1] <= other.max[1]);
bool z2 = (min[2] <= other.max[2]); bool z1 = (max[2] >= other.min[2]);
return x1 && x2 && y1 && y2 && z1 && z2; bool z2 = (min[2] <= other.max[2]);
return x1 && x2 && y1 && y2 && z1 && z2;
}
bool intersects(const CSphere& other) const {
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
}
float intersectionRadius(const CSphere& other) const {
float dist = distanceFromPoint(other.position);
return (dist < other.radius) ? dist : -1.f;
}
CAABox booleanIntersection(const CAABox& other) const {
CVector3f minVec = CVector3f::skZero;
CVector3f maxVec = CVector3f::skZero;
for (int i = 0; i < 3; ++i) {
if (min[i] <= other.min[i] && max[i] >= other.max[i]) {
minVec[i] = other.min[i];
maxVec[i] = other.max[i];
} else if (other.min[i] <= min[i] && other.max[i] >= max[i]) {
minVec[i] = min[i];
maxVec[i] = max[i];
} else if (other.min[i] <= min[i] && other.max[i] >= min[i]) {
minVec[i] = min[i];
maxVec[i] = other.max[i];
} else if (other.min[i] <= max[i] && other.max[i] >= max[i]) {
minVec[i] = other.min[i];
maxVec[i] = max[i];
}
} }
bool intersects(const CSphere& other) const return {minVec, maxVec};
{ }
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
bool inside(const CAABox& other) const {
bool x = min[0] >= other.min[0] && max[0] <= other.max[0];
bool y = min[1] >= other.min[1] && max[1] <= other.max[1];
bool z = min[2] >= other.min[2] && max[2] <= other.max[2];
return x && y && z;
}
bool insidePlane(const CPlane& plane) const {
CVector3f vmax;
/* X axis */
if (plane.x() >= 0.f)
vmax[0] = max[0];
else
vmax[0] = min[0];
/* Y axis */
if (plane.y() >= 0.f)
vmax[1] = max[1];
else
vmax[1] = min[1];
/* Z axis */
if (plane.z() >= 0.f)
vmax[2] = max[2];
else
vmax[2] = min[2];
return plane.normal().dot(vmax) + plane.d() >= 0.f;
}
CVector3f center() const { return (min + max) * 0.5f; }
CVector3f extents() const { return (max - min) * 0.5f; }
float volume() const {
auto delta = max - min;
return delta.x() * delta.y() * delta.z();
}
CLineSeg getEdge(EBoxEdgeId id) const {
switch (id) {
case EBoxEdgeId::Z0:
default:
return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), min.y(), min.z()});
case EBoxEdgeId::X0:
return CLineSeg({min.x(), min.y(), min.z()}, {max.x(), min.y(), min.z()});
case EBoxEdgeId::Z1:
return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), min.y(), max.z()});
case EBoxEdgeId::X1:
return CLineSeg({max.x(), min.y(), max.z()}, {min.x(), min.y(), max.z()});
case EBoxEdgeId::Z2:
return CLineSeg({max.x(), max.y(), max.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::X2:
return CLineSeg({max.x(), max.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Z3:
return CLineSeg({min.x(), max.y(), min.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::X3:
return CLineSeg({min.x(), max.y(), max.z()}, {max.x(), max.y(), max.z()});
case EBoxEdgeId::Y0:
return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::Y1:
return CLineSeg({min.x(), min.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Y2:
return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::Y3:
return CLineSeg({max.x(), min.y(), max.z()}, {max.x(), max.y(), max.z()});
}
}
CAABox getTransformedAABox(const CTransform& xfrm) const {
CAABox box;
CVector3f point = xfrm * getPoint(0);
box.accumulateBounds(point);
point = xfrm * getPoint(1);
box.accumulateBounds(point);
point = xfrm * getPoint(2);
box.accumulateBounds(point);
point = xfrm * getPoint(3);
box.accumulateBounds(point);
point = xfrm * getPoint(4);
box.accumulateBounds(point);
point = xfrm * getPoint(5);
box.accumulateBounds(point);
point = xfrm * getPoint(6);
box.accumulateBounds(point);
point = xfrm * getPoint(7);
box.accumulateBounds(point);
return box;
}
void accumulateBounds(const CVector3f& point) {
if (min.x() > point.x())
min.x() = point.x();
if (min.y() > point.y())
min.y() = point.y();
if (min.z() > point.z())
min.z() = point.z();
if (max.x() < point.x())
max.x() = point.x();
if (max.y() < point.y())
max.y() = point.y();
if (max.z() < point.z())
max.z() = point.z();
}
void accumulateBounds(const CAABox& other) {
accumulateBounds(other.min);
accumulateBounds(other.max);
}
bool pointInside(const CVector3f& other) const {
return (min.x() <= other.x() && other.x() <= max.x() &&
min.y() <= other.y() && other.y() <= max.y() &&
min.z() <= other.z() && other.z() <= max.z());
}
CVector3f closestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? min.x() : max.x()),
(other.y() >= 0.f ? min.y() : max.y()),
(other.z() >= 0.f ? min.z() : max.z())};
}
CVector3f furthestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? max.x() : min.x()),
(other.y() >= 0.f ? max.y() : min.y()),
(other.z() >= 0.f ? max.z() : min.z())};
}
float distanceBetween(const CAABox& other) {
int intersects = 0;
if (max.x() >= other.min.x() && min.x() <= other.max.x())
intersects |= 0x1;
if (max.y() >= other.min.y() && min.y() <= other.max.y())
intersects |= 0x2;
if (max.z() >= other.min.z() && min.z() <= other.max.z())
intersects |= 0x4;
float minX, maxX;
if (max.x() < other.min.x()) {
minX = max.x();
maxX = other.min.x();
} else {
minX = min.x();
maxX = other.max.x();
} }
float intersectionRadius(const CSphere& other) const float minY, maxY;
{ if (max.y() < other.min.y()) {
float dist = distanceFromPoint(other.position); minY = max.y();
return (dist < other.radius) ? dist : -1.f; maxY = other.min.y();
} else {
minY = min.y();
maxY = other.max.y();
} }
inline CAABox booleanIntersection(const CAABox& other) const float minZ, maxZ;
{ if (max.z() < other.min.z()) {
CVector3f minVec = CVector3f::skZero; minZ = max.z();
CVector3f maxVec = CVector3f::skZero; maxZ = other.min.z();
} else {
for (int i = 0; i < 3; ++i) minZ = min.z();
{ maxZ = other.max.z();
if (min[i] <= other.min[i] && max[i] >= other.max[i])
{
minVec[i] = other.min[i];
maxVec[i] = other.max[i];
}
else if (other.min[i] <= min[i] && other.max[i] >= max[i])
{
minVec[i] = min[i];
maxVec[i] = max[i];
}
else if (other.min[i] <= min[i] && other.max[i] >= min[i])
{
minVec[i] = min[i];
maxVec[i] = other.max[i];
}
else if (other.min[i] <= max[i] && other.max[i] >= max[i])
{
minVec[i] = other.min[i];
maxVec[i] = max[i];
}
}
return {minVec, maxVec};
} }
inline bool inside(const CAABox& other) const switch (intersects) {
{ case 0:
bool x = min[0] >= other.min[0] && max[0] <= other.max[0]; return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude();
bool y = min[1] >= other.min[1] && max[1] <= other.max[1]; case 1:
bool z = min[2] >= other.min[2] && max[2] <= other.max[2]; return zeus::CVector2f(maxY - minY, maxZ - minZ).magnitude();
return x && y && z; case 2:
return zeus::CVector2f(maxX - minX, maxZ - minZ).magnitude();
case 3:
return std::fabs(maxZ - minZ);
case 4:
return zeus::CVector2f(maxX - minX, maxY - minY).magnitude();
case 5:
return std::fabs(maxY - minY);
case 6:
return std::fabs(maxX - minX);
case 7:
default:
return 0.f;
} }
}
inline bool insidePlane(const CPlane& plane) const CVector3f getPoint(const int point) const {
{ const CVector3f* vecs = &min;
CVector3f vmax; return CVector3f(vecs[(point & 1) != 0].x(), vecs[(point & 2) != 0].y(), vecs[(point & 4) != 0].z());
/* X axis */ }
if (plane.a >= 0)
vmax[0] = max[0];
else
vmax[0] = min[0];
/* Y axis */
if (plane.b >= 0)
vmax[1] = max[1];
else
vmax[1] = min[1];
/* Z axis */
if (plane.c >= 0)
vmax[2] = max[2];
else
vmax[2] = min[2];
return plane.vec.dot(vmax) + plane.d >= 0.f;
}
CVector3f center() const { return (min + max) * 0.5f; } CVector3f clampToBox(const CVector3f& vec) const {
CVector3f ret = vec;
ret.x() = clamp(min.x(), float(ret.x()), max.x());
ret.y() = clamp(min.y(), float(ret.y()), max.y());
ret.z() = clamp(min.z(), float(ret.z()), max.z());
return ret;
}
CVector3f extents() const { return (max - min) * 0.5f; } void splitX(CAABox& negX, CAABox& posX) const {
float midX = (max.x() - min.x()) * .5f + min.x();
posX.max = max;
posX.min = min;
posX.min.x() = midX;
negX.max = max;
negX.max.x() = midX;
negX.min = min;
}
float volume() const { return (max.x - min.x) * (max.y - min.y) * (max.z - min.z); } void splitY(CAABox& negY, CAABox& posY) const {
float midY = (max.y() - min.y()) * .5f + min.y();
posY.max = max;
posY.min = min;
posY.min.y() = midY;
negY.max = max;
negY.max.y() = midY;
negY.min = min;
}
inline CLineSeg getEdge(EBoxEdgeId id) const void splitZ(CAABox& negZ, CAABox& posZ) const {
{ float midZ = (max.z() - min.z()) * .5f + min.z();
switch (id) posZ.max = max;
{ posZ.min = min;
case EBoxEdgeId::Z0: posZ.min.z() = midZ;
default: negZ.max = max;
return CLineSeg({min.x, min.y, max.z}, {min.x, min.y, min.z}); negZ.max.z() = midZ;
case EBoxEdgeId::X0: negZ.min = min;
return CLineSeg({min.x, min.y, min.z}, {max.x, min.y, min.z}); }
case EBoxEdgeId::Z1:
return CLineSeg({max.x, min.y, min.z}, {max.x, min.y, max.z});
case EBoxEdgeId::X1:
return CLineSeg({max.x, min.y, max.z}, {min.x, min.y, max.z});
case EBoxEdgeId::Z2:
return CLineSeg({max.x, max.y, max.z}, {max.x, max.y, min.z});
case EBoxEdgeId::X2:
return CLineSeg({max.x, max.y, min.z}, {min.x, max.y, min.z});
case EBoxEdgeId::Z3:
return CLineSeg({min.x, max.y, min.z}, {min.x, max.y, max.z});
case EBoxEdgeId::X3:
return CLineSeg({min.x, max.y, max.z}, {max.x, max.y, max.z});
case EBoxEdgeId::Y0:
return CLineSeg({min.x, min.y, max.z}, {min.x, max.y, max.z});
case EBoxEdgeId::Y1:
return CLineSeg({min.x, min.y, min.z}, {min.x, max.y, min.z});
case EBoxEdgeId::Y2:
return CLineSeg({max.x, min.y, min.z}, {max.x, max.y, min.z});
case EBoxEdgeId::Y3:
return CLineSeg({max.x, min.y, max.z}, {max.x, max.y, max.z});
}
}
inline CAABox getTransformedAABox(const CTransform& xfrm) const bool invalid() { return (max.x() < min.x() || max.y() < min.y() || max.z() < min.z()); }
{
CAABox box;
CVector3f point = xfrm * getPoint(0);
box.accumulateBounds(point);
point = xfrm * getPoint(1);
box.accumulateBounds(point);
point = xfrm * getPoint(2);
box.accumulateBounds(point);
point = xfrm * getPoint(3);
box.accumulateBounds(point);
point = xfrm * getPoint(4);
box.accumulateBounds(point);
point = xfrm * getPoint(5);
box.accumulateBounds(point);
point = xfrm * getPoint(6);
box.accumulateBounds(point);
point = xfrm * getPoint(7);
box.accumulateBounds(point);
return box;
}
inline void accumulateBounds(const CVector3f& point) float operator[](size_t idx) const {
{ assert(idx < 6);
if (min.x > point.x) if (idx < 3)
min.x = point.x; return min[idx];
if (min.y > point.y) else
min.y = point.y; return max[idx - 3];
if (min.z > point.z) }
min.z = point.z;
if (max.x < point.x)
max.x = point.x;
if (max.y < point.y)
max.y = point.y;
if (max.z < point.z)
max.z = point.z;
}
inline void accumulateBounds(const CAABox& other)
{
accumulateBounds(other.min);
accumulateBounds(other.max);
}
inline bool pointInside(const CVector3f& other) const
{
return (min.x <= other.x && other.x <= max.x &&
min.y <= other.y && other.y <= max.y &&
min.z <= other.z && other.z <= max.z);
}
inline CVector3f closestPointAlongVector(const CVector3f& other) const
{
return {(other.x >= 0.f ? min.x : max.x),
(other.y >= 0.f ? min.y : max.y),
(other.z >= 0.f ? min.z : max.z)};
}
inline CVector3f furthestPointAlongVector(const CVector3f& other) const
{
return {(other.x >= 0.f ? max.x : min.x),
(other.y >= 0.f ? max.y : min.y),
(other.z >= 0.f ? max.z : min.z)};
}
inline float distanceBetween(const CAABox& other)
{
int intersects = 0;
if (max.x >= other.min.x && min.x <= other.max.x)
intersects |= 0x1;
if (max.y >= other.min.y && min.y <= other.max.y)
intersects |= 0x2;
if (max.z >= other.min.z && min.z <= other.max.z)
intersects |= 0x4;
float minX, maxX;
if (max.x < other.min.x)
{
minX = max.x;
maxX = other.min.x;
}
else
{
minX = min.x;
maxX = other.max.x;
}
float minY, maxY;
if (max.y < other.min.y)
{
minY = max.y;
maxY = other.min.y;
}
else
{
minY = min.y;
maxY = other.max.y;
}
float minZ, maxZ;
if (max.z < other.min.z)
{
minZ = max.z;
maxZ = other.min.z;
}
else
{
minZ = min.z;
maxZ = other.max.z;
}
switch (intersects)
{
case 0:
return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude();
case 1:
return zeus::CVector2f(maxY - minY, maxZ - minZ).magnitude();
case 2:
return zeus::CVector2f(maxX - minX, maxZ - minZ).magnitude();
case 3:
return std::fabs(maxZ - minZ);
case 4:
return zeus::CVector2f(maxX - minX, maxY - minY).magnitude();
case 5:
return std::fabs(maxY - minY);
case 6:
return std::fabs(maxX - minX);
case 7:
default:
return 0.f;
}
}
inline CVector3f getPoint(const int point) const
{
const CVector3f* vecs = &min;
return CVector3f(vecs[(point & 1) != 0].x, vecs[(point & 2) != 0].y, vecs[(point & 4) != 0].z);
}
inline CVector3f clampToBox(const CVector3f& vec)
{
CVector3f ret = vec;
clamp(min.x, ret.x, max.x);
clamp(min.y, ret.y, max.y);
clamp(min.z, ret.z, max.z);
return ret;
}
inline void splitX(CAABox& negX, CAABox& posX) const
{
float midX = (max.x - min.x) * .5f + min.x;
posX.max = max;
posX.min = min;
posX.min.x = midX;
negX.max = max;
negX.max.x = midX;
negX.min = min;
}
inline void splitY(CAABox& negY, CAABox& posY) const
{
float midY = (max.y - min.y) * .5f + min.y;
posY.max = max;
posY.min = min;
posY.min.y = midY;
negY.max = max;
negY.max.y = midY;
negY.min = min;
}
inline void splitZ(CAABox& negZ, CAABox& posZ) const
{
float midZ = (max.z - min.z) * .5f + min.z;
posZ.max = max;
posZ.min = min;
posZ.min.z = midZ;
negZ.max = max;
negZ.max.z = midZ;
negZ.min = min;
}
inline bool invalid() { return (max.x < min.x || max.y < min.y || max.z < min.z); }
inline float operator[](size_t idx) const
{
assert(idx < 6);
if (idx < 3)
return min[idx];
else
return max[idx-3];
}
}; };
inline bool operator==(const CAABox& left, const CAABox& right) inline bool operator==(const CAABox& left, const CAABox& right) {
{ return (left.min == right.min && left.max == right.max);
return (left.min == right.min && left.max == right.max);
} }
inline bool operator!=(const CAABox& left, const CAABox& right)
{ inline bool operator!=(const CAABox& left, const CAABox& right) {
return (left.min != right.min || left.max != right.max); return (left.min != right.min || left.max != right.max);
} }
} }

View File

@ -4,22 +4,15 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#include "CUnitVector.hpp" #include "CUnitVector.hpp"
namespace zeus namespace zeus {
{ struct CAxisAngle : CVector3f {
struct alignas(16) CAxisAngle : CVector3f CAxisAngle() = default;
{ CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {}
ZE_DECLARE_ALIGNED_ALLOCATOR(); CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {}
CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {}
CAxisAngle() = default; float angle() const { return magnitude(); }
CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {} const CVector3f& getVector() const { return *this; }
CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {} static const CAxisAngle sIdentity;
CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {}
float angle() const { return magnitude(); }
const CVector3f& getVector() const { return *this; }
static const CAxisAngle sIdentity;
}; };
} }

View File

@ -2,11 +2,15 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "TVectorUnion.hpp" #include "CVector4f.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/FileReader.hpp>
#include <athena/FileWriter.hpp> #include "athena/FileReader.hpp"
#include "athena/FileWriter.hpp"
#endif #endif
#include <iostream> #include <iostream>
#include <cassert> #include <cassert>
@ -20,410 +24,315 @@
#define COLOR(rgba) rgba #define COLOR(rgba) rgba
#endif #endif
namespace zeus namespace zeus {
{
typedef uint8_t Comp8; typedef uint8_t Comp8;
typedef uint32_t Comp32; typedef uint32_t Comp32;
constexpr float OneOver255 = 1.f / 255.f; constexpr float OneOver255 = 1.f / 255.f;
typedef union { typedef union {
struct struct {
{ Comp8 r, g, b, a;
Comp8 r, g, b, a; };
}; Comp32 rgba;
Comp32 rgba;
} RGBA32; } RGBA32;
class CVector4f; class CVector4f;
class alignas(16) CColor class CColor {
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); simd<float> mSimd;
static const CColor skRed;
static const CColor skBlack;
static const CColor skBlue;
static const CColor skGreen;
static const CColor skGrey;
static const CColor skOrange;
static const CColor skPurple;
static const CColor skYellow;
static const CColor skWhite;
static const CColor skClear;
static const CColor skRed; CColor() : mSimd(1.f) {}
static const CColor skBlack;
static const CColor skBlue;
static const CColor skGreen;
static const CColor skGrey;
static const CColor skOrange;
static const CColor skPurple;
static const CColor skYellow;
static const CColor skWhite;
static const CColor skClear;
#if __SSE__ CColor(float rgb, float a = 1.0) { splat(rgb, a); }
CColor(const __m128& mVec128) : mVec128(mVec128) {}
#endif
CColor() : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {} CColor(float r, float g, float b, float a = 1.0f) : mSimd(r, g, b, a) {}
CColor(float rgb, float a = 1.0) { splat(rgb, a); }
CColor(float r, float g, float b, float a = 1.0f)
{
v[0] = r;
v[1] = g;
v[2] = b;
v[3] = a;
}
#if ZE_ATHENA_TYPES
CColor(const atVec4f& vec)
#if __SSE__ || __GEKKO_PS__
: mVec128(vec.mVec128)
{
}
#else
{
r = vec.vec[0], g = vec.vec[1], b = vec.vec[2], a = vec.vec[3];
}
#endif
#endif
CColor(Comp32 rgba) { fromRGBA32(rgba); }
CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
CColor(const CVector4f& other);
CColor& operator=(const CVector4f& other);
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
static inline CColor ReadRGBABig(athena::io::IStreamReader& reader) CColor(const atVec4f& vec) : mSimd(vec.simd) {}
{
CColor ret;
ret.readRGBABig(reader);
return ret;
}
inline void readRGBABig(athena::io::IStreamReader& reader)
{
r = reader.readFloatBig();
g = reader.readFloatBig();
b = reader.readFloatBig();
a = reader.readFloatBig();
}
inline void readBGRABig(athena::io::IStreamReader& reader)
{
b = reader.readFloatBig();
g = reader.readFloatBig();
r = reader.readFloatBig();
a = reader.readFloatBig();
}
inline void writeRGBABig(athena::io::IStreamWriter& writer) const
{
writer.writeFloatBig(r);
writer.writeFloatBig(g);
writer.writeFloatBig(b);
writer.writeFloatBig(a);
}
inline void writeBGRABig(athena::io::IStreamWriter& writer) const
{
writer.writeFloatBig(b);
writer.writeFloatBig(g);
writer.writeFloatBig(r);
writer.writeFloatBig(a);
}
inline void writeRGBA8(athena::io::IStreamWriter& writer) const
{
writer.writeUByte(this->r * 255);
writer.writeUByte(this->g * 255);
writer.writeUByte(this->b * 255);
writer.writeUByte(this->a * 255);
}
#endif #endif
inline bool operator==(const CColor& rhs) const { return (r == rhs.r && g == rhs.g && b == rhs.b && a == rhs.a); } CColor(Comp32 rgba) { fromRGBA32(rgba); }
inline bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
inline CColor operator+(const CColor& rhs) const CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
{
#if __SSE__ CColor(const CVector4f& other) : mSimd(other.mSimd) {}
return CColor(_mm_add_ps(mVec128, rhs.mVec128));
#else template <typename T>
return CColor(r + rhs.r, g + rhs.g, b + rhs.b, a + rhs.a); CColor(const simd<T>& s) : mSimd(s) {}
#endif
} CColor& operator=(const CVector4f& other) {
inline CColor operator-(const CColor& rhs) const mSimd = other.mSimd;
{ return *this;
#if __SSE__ }
return CColor(_mm_sub_ps(mVec128, rhs.mVec128));
#else #if ZE_ATHENA_TYPES
return CColor(r - rhs.r, g - rhs.g, b - rhs.b, a - rhs.a);
#endif static CColor ReadRGBABig(athena::io::IStreamReader& reader) {
} CColor ret;
inline CColor operator*(const CColor& rhs) const ret.readRGBABig(reader);
{ return ret;
#if __SSE__ }
return CColor(_mm_mul_ps(mVec128, rhs.mVec128));
#else void readRGBABig(athena::io::IStreamReader& reader) {
return CColor(r * rhs.r, g * rhs.g, b * rhs.b, a * rhs.a); simd_floats f;
#endif f[0] = reader.readFloatBig();
} f[1] = reader.readFloatBig();
inline CColor operator/(const CColor& rhs) const f[2] = reader.readFloatBig();
{ f[3] = reader.readFloatBig();
#if __SSE__ mSimd.copy_from(f);
return CColor(_mm_div_ps(mVec128, rhs.mVec128)); }
#else
return CColor(r / rhs.r, g / rhs.g, b / rhs.b, a / rhs.a); void readBGRABig(athena::io::IStreamReader& reader) {
#endif simd_floats f;
} f[2] = reader.readFloatBig();
inline CColor operator+(float val) const f[1] = reader.readFloatBig();
{ f[0] = reader.readFloatBig();
#if __SSE__ f[3] = reader.readFloatBig();
TVectorUnion splat = {{val, val, val, val}}; mSimd.copy_from(f);
return CColor(_mm_add_ps(mVec128, splat.mVec128)); }
#else
return CColor(r + val, g + val, b + val, a + val); void writeRGBABig(athena::io::IStreamWriter& writer) const {
#endif simd_floats f(mSimd);
} writer.writeFloatBig(f[0]);
inline CColor operator-(float val) const writer.writeFloatBig(f[1]);
{ writer.writeFloatBig(f[2]);
#if __SSE__ writer.writeFloatBig(f[3]);
TVectorUnion splat = {{val, val, val, val}}; }
return CColor(_mm_sub_ps(mVec128, splat.mVec128));
#else void writeBGRABig(athena::io::IStreamWriter& writer) const {
return CColor(r - val, g - val, b - val, a - val); simd_floats f(mSimd);
#endif writer.writeFloatBig(f[2]);
} writer.writeFloatBig(f[1]);
inline CColor operator*(float val) const writer.writeFloatBig(f[0]);
{ writer.writeFloatBig(f[3]);
#if __SSE__ }
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_mul_ps(mVec128, splat.mVec128)); void writeRGBA8(athena::io::IStreamWriter& writer) const {
#else simd_floats f(mSimd);
return CColor(r * val, g * val, b * val, a * val); writer.writeUByte(atUint8(f[0] * 255));
#endif writer.writeUByte(atUint8(f[1] * 255));
} writer.writeUByte(atUint8(f[2] * 255));
inline CColor operator/(float val) const writer.writeUByte(atUint8(f[3] * 255));
{ }
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_div_ps(mVec128, splat.mVec128));
#else
return CColor(r / val, g / val, b / val, a / val);
#endif
}
inline const CColor& operator+=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
r += rhs.r;
g += rhs.g;
b += rhs.b;
a += rhs.a;
#endif
return *this;
}
inline const CColor& operator-=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
r -= rhs.r;
g -= rhs.g;
b -= rhs.b;
a -= rhs.a;
#endif
return *this;
}
inline const CColor& operator*=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
r *= rhs.r;
g *= rhs.g;
b *= rhs.b;
a *= rhs.a;
#endif
return *this;
}
inline const CColor& operator/=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
r /= rhs.r;
g /= rhs.g;
b /= rhs.b;
a /= rhs.a;
#endif
return *this;
}
inline void normalize()
{
float mag = magnitude();
mag = 1.f / mag;
*this *= mag;
}
inline CColor normalized() const
{
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif #endif
#else
return r * r + g * g + b * b + a * a;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
static inline CColor lerp(const CColor& a, const CColor& b, float t) { return (a + (b - a) * t); }
static inline CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
inline float& operator[](const size_t& idx) { assert(idx < 4); return (&r)[idx]; }
inline const float& operator[](const size_t& idx) const { assert(idx < 4); return (&r)[idx]; }
inline void splat(float rgb, float a)
{
#if __SSE__
TVectorUnion splat = {{rgb, rgb, rgb, a}};
mVec128 = splat.mVec128;
#else
v[0] = rgb;
v[1] = rgb;
v[2] = rgb;
v[3] = a;
#endif
}
inline float rgbDot(const CColor& rhs) const bool operator==(const CColor& rhs) const {
{ return (r() == rhs.r() && g() == rhs.g() && b() == rhs.b() && a() == rhs.a());
#if __SSE__ }
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (r * rhs.r) + (g * rhs.g) + (b * rhs.b);
#endif
}
union { bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
struct
{
float r, g, b, a;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
void fromRGBA8(Comp8 r, Comp8 g, Comp8 b, Comp8 a) CColor operator+(const CColor& rhs) const {
{ return mSimd + rhs.mSimd;
this->r = r * OneOver255; }
this->g = g * OneOver255;
this->b = b * OneOver255;
this->a = a * OneOver255;
}
void fromRGBA32(Comp32 rgba) CColor operator-(const CColor& rhs) const {
{ return mSimd - rhs.mSimd;
static RGBA32 tmp; }
tmp.rgba = COLOR(rgba);
fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a);
}
/*! CColor operator*(const CColor& rhs) const {
* \brief Converts a CColor to RGBA8 return mSimd * rhs.mSimd;
* \param r }
* \param g
* \param b
* \param a
*/
void toRGBA8(Comp8& r, Comp8& g, Comp8& b, Comp8& a)
{
r = this->r * 255;
g = this->g * 255;
b = this->b * 255;
a = this->a * 255;
}
/** CColor operator/(const CColor& rhs) const {
* @brief Assigns rgba from hsv return mSimd / rhs.mSimd;
* @param h[0-1] The hue percentagee of the color. }
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void fromHSV(float h, float s, float v, float _a = 1.0);
/** CColor operator+(float val) const {
* @brief Converts rgba to hsv return mSimd + simd<float>(val);
* @param h[0-1] The hue percentagee of the color. }
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void toHSV(float& h, float& s, float& v) const;
void fromHSL(float h, float s, float l, float _a = 1.0); CColor operator-(float val) const {
return mSimd - simd<float>(val);
}
void toHSL(float& h, float& s, float& l); CColor operator*(float val) const {
return mSimd * simd<float>(val);
}
CColor toGrayscale() { return {std::sqrt((r * r + g * g + b * b) / 3), a}; } CColor operator/(float val) const {
return mSimd / simd<float>(val);
}
/** const CColor& operator+=(const CColor& rhs) {
* @brief Clamps to GPU-safe RGBA values [0,1] mSimd += rhs.mSimd;
*/ return *this;
void Clamp() }
{
this->r = std::min(1.f, std::max(0.f, this->r)); const CColor& operator-=(const CColor& rhs) {
this->g = std::min(1.f, std::max(0.f, this->g)); mSimd -= rhs.mSimd;
this->b = std::min(1.f, std::max(0.f, this->b)); return *this;
this->a = std::min(1.f, std::max(0.f, this->a)); }
}
const CColor& operator*=(const CColor& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CColor& operator/=(const CColor& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
const CColor& operator+=(float rhs) {
mSimd += simd<float>(rhs);
return *this;
}
const CColor& operator-=(float rhs) {
mSimd -= simd<float>(rhs);
return *this;
}
const CColor& operator*=(float rhs) {
mSimd *= simd<float>(rhs);
return *this;
}
const CColor& operator/=(float rhs) {
mSimd /= simd<float>(rhs);
return *this;
}
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= mag;
}
CColor normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
float magSquared() const {
return mSimd.dot4(mSimd);
}
float magnitude() const { return std::sqrt(magSquared()); }
static CColor lerp(const CColor& a, const CColor& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
simd<float>::reference operator[](const size_t& idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](const size_t& idx) const {
assert(idx < 4);
return mSimd[idx];
}
void splat(float rgb, float a) {
mSimd = simd<float>(rgb);
mSimd[3] = a;
}
float rgbDot(const CColor& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
void fromRGBA8(const Comp8 ri, const Comp8 gi, const Comp8 bi, const Comp8 ai) {
mSimd = simd<float>(ri * OneOver255, gi * OneOver255, bi * OneOver255, ai * OneOver255);
}
void fromRGBA32(Comp32 rgba) {
static RGBA32 tmp;
tmp.rgba = COLOR(rgba);
fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a);
}
/*!
* \brief Converts a CColor to RGBA8
* \param r
* \param g
* \param b
* \param a
*/
void toRGBA8(Comp8& ro, Comp8& go, Comp8& bo, Comp8& ao) const {
ro = Comp8(r() * 255);
go = Comp8(g() * 255);
bo = Comp8(b() * 255);
ao = Comp8(a() * 255);
}
/**
* @brief Assigns rgba from hsv
* @param h[0-1] The hue percentagee of the color.
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void fromHSV(float h, float s, float v, float _a = 1.0);
/**
* @brief Converts rgba to hsv
* @param h[0-1] The hue percentagee of the color.
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void toHSV(float& h, float& s, float& v) const;
void fromHSL(float h, float s, float l, float _a = 1.0);
void toHSL(float& h, float& s, float& l) const;
CColor toGrayscale() const { return {std::sqrt((r() * r() + g() * g() + b() * b()) / 3), a()}; }
/**
* @brief Clamps to GPU-safe RGBA values [0,1]
*/
void Clamp() {
r() = std::min(1.f, std::max(0.f, float(r())));
g() = std::min(1.f, std::max(0.f, float(g())));
b() = std::min(1.f, std::max(0.f, float(b())));
a() = std::min(1.f, std::max(0.f, float(a())));
}
float r() const { return mSimd[0]; }
float g() const { return mSimd[1]; }
float b() const { return mSimd[2]; }
float a() const { return mSimd[3]; }
simd<float>::reference r() { return mSimd[0]; }
simd<float>::reference g() { return mSimd[1]; }
simd<float>::reference b() { return mSimd[2]; }
simd<float>::reference a() { return mSimd[3]; }
}; };
static inline CColor operator+(float lhs, const CColor& rhs) static inline CColor operator+(float lhs, const CColor& rhs) {
{ return simd<float>(lhs) + rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs + rhs.r, lhs + rhs.g, lhs + rhs.b, lhs + rhs.a);
#endif
} }
static inline CColor operator-(float lhs, const CColor& rhs) static inline CColor operator-(float lhs, const CColor& rhs) {
{ return simd<float>(lhs) - rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs - rhs.r, lhs - rhs.g, lhs - rhs.b, lhs - rhs.a);
#endif
} }
static inline CColor operator*(float lhs, const CColor& rhs) static inline CColor operator*(float lhs, const CColor& rhs) {
{ return simd<float>(lhs) * rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs * rhs.r, lhs * rhs.g, lhs * rhs.b, lhs * rhs.a);
#endif
} }
static inline CColor operator/(float lhs, const CColor& rhs) static inline CColor operator/(float lhs, const CColor& rhs) {
{ return simd<float>(lhs) / rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs / rhs.r, lhs / rhs.g, lhs / rhs.b, lhs / rhs.a);
#endif
} }
} }

View File

@ -2,16 +2,14 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{
class CQuaternion; class CQuaternion;
class CEulerAngles : public CVector3f class CEulerAngles : public CVector3f {
{
public: public:
CEulerAngles(float x, float y, float z) { assign(x, y, z); } CEulerAngles(float x, float y, float z) { assign(x, y, z); }
CEulerAngles(const CQuaternion& quat); CEulerAngles(const CQuaternion& quat);
CEulerAngles(const CTransform& xf); CEulerAngles(const CTransform& xf);
}; };
} }

View File

@ -4,19 +4,16 @@
#include "zeus/CAABox.hpp" #include "zeus/CAABox.hpp"
#include "zeus/CProjection.hpp" #include "zeus/CProjection.hpp"
namespace zeus namespace zeus {
{ class CFrustum {
class CFrustum CPlane planes[6];
{ bool valid = false;
CPlane planes[6];
bool valid = false;
public: public:
void updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection); void updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection);
void updatePlanes(const CTransform& viewPointMtx, const CProjection& projection); void updatePlanes(const CTransform& viewPointMtx, const CProjection& projection);
bool aabbFrustumTest(const CAABox& aabb) const;
bool aabbFrustumTest(const CAABox& aabb) const; bool sphereFrustumTest(const CSphere& sphere) const;
bool sphereFrustumTest(const CSphere& sphere) const; bool pointFrustumTest(const CVector3f& point) const;
bool pointFrustumTest(const CVector3f& point) const;
}; };
} }

View File

@ -3,14 +3,13 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CLine {
class CLine
{
public: public:
CLine(const CVector3f& origin, const CVector3f& dir) : origin(origin), dir(dir) {} CLine(const CVector3f& origin, const CVector3f& dir) : origin(origin), dir(dir) {}
CVector3f origin;
CVector3f dir; CVector3f origin;
CVector3f dir;
}; };
} }

View File

@ -3,23 +3,20 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CLineSeg {
class CLineSeg
{
public: public:
CLineSeg(const CVector3f& start, const CVector3f& end) : x0_start(start), x18_end(end) CLineSeg(const CVector3f& start, const CVector3f& end) : x0_start(start), x18_end(end) {
{ CVector3f tmp = (end - start).normalized();
CVector3f tmp = (end - start).normalized(); if (tmp.x() != 0.f || tmp.y() != 0.f || tmp.z() != 0.f)
if (tmp.x != 0 || tmp.y != 0 || tmp.z != 0) xc_dir = tmp.normalized();
xc_dir = tmp.normalized(); else
else xc_dir = CVector3f::skZero;
xc_dir = CVector3f::skZero; }
}
CVector3f x0_start; CVector3f x0_start;
CVector3f xc_dir; CVector3f xc_dir;
CVector3f x18_end; CVector3f x18_end;
}; };
} }

View File

@ -1,38 +1,34 @@
#pragma once #pragma once
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#include "zeus/CTransform.hpp" #include "zeus/CTransform.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{ struct CMRay {
struct CMRay CMRay(const CVector3f& start, const CVector3f& dirin, float len)
{ : start(start), length(len), invLength(1.f / len), dir(dirin) {
CMRay(const CVector3f& start, const CVector3f& dirin, float len) end = start + (len * dirin);
: start(start), length(len), invLength(1.f / len), dir(dirin) delta = end - start;
{ }
end = start + (len * dirin);
delta = end - start;
}
CMRay(const CVector3f& start, const CVector3f& end, float len, float invLen) CMRay(const CVector3f& start, const CVector3f& end, float len, float invLen)
: start(start), end(end), length(len), invLength(invLen) : start(start), end(end), length(len), invLength(invLen) {
{ delta = end - start;
delta = end - start; dir = invLen * delta;
dir = invLen * delta; }
}
CMRay getInvUnscaledTransformRay(const CTransform& xfrm) const CMRay getInvUnscaledTransformRay(const CTransform& xfrm) const {
{ const CTransform inv = xfrm.inverse();
const CTransform inv = xfrm.inverse(); return CMRay(inv * start, inv * end, length, invLength);
return CMRay(inv * start, inv * end, length, invLength); }
}
CVector3f start; // x0 CVector3f start; // x0
CVector3f end; // xc CVector3f end; // xc
CVector3f delta; // x18 CVector3f delta; // x18
float length; // x24 float length; // x24
float invLength; // x28 float invLength; // x28
CVector3f dir; // x2c CVector3f dir; // x2c
}; };
} }

View File

@ -6,254 +6,185 @@
#include <cstring> #include <cstring>
/* Column-major matrix class */ /* Column-major matrix class */
namespace zeus namespace zeus {
{
class CQuaternion; class CQuaternion;
class alignas(16) CMatrix3f
{ class CMatrix3f {
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
explicit CMatrix3f(bool zero = false) explicit CMatrix3f(bool zero = false) {
{ m[0] = simd<float>(0.f);
memset(m, 0, sizeof(m)); m[1] = simd<float>(0.f);
if (!zero) m[2] = simd<float>(0.f);
{ if (!zero) {
m[0][0] = 1.0; m[0][0] = 1.0;
m[1][1] = 1.0; m[1][1] = 1.0;
m[2][2] = 1.0; m[2][2] = 1.0;
}
} }
CMatrix3f(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21, float m22) }
{
m[0][0] = m00, m[1][0] = m01, m[2][0] = m02; CMatrix3f(float m00, float m01, float m02,
m[0][1] = m10, m[1][1] = m11, m[2][1] = m12; float m10, float m11, float m12,
m[0][2] = m20, m[1][2] = m21, m[2][2] = m22; float m20, float m21, float m22)
} : m{{m00, m10, m20},
CMatrix3f(const CVector3f& scaleVec) {m01, m11, m21},
{ {m02, m12, m22}} {}
memset(m, 0, sizeof(m));
m[0][0] = scaleVec[0]; CMatrix3f(const CVector3f& scaleVec) {
m[1][1] = scaleVec[1]; m[0] = simd<float>(0.f);
m[2][2] = scaleVec[2]; m[1] = simd<float>(0.f);
} m[2] = simd<float>(0.f);
CMatrix3f(float scale) : CMatrix3f(CVector3f(scale)) {} m[0][0] = scaleVec[0];
CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2) m[1][1] = scaleVec[1];
{ m[2][2] = scaleVec[2];
vec[0] = r0; }
vec[1] = r1;
vec[2] = r2; CMatrix3f(float scale) : CMatrix3f(CVector3f(scale)) {}
}
CMatrix3f(const CMatrix3f& other) CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2) {
{ m[0] = r0;
vec[0] = other.vec[0]; m[1] = r1;
vec[1] = other.vec[1]; m[2] = r2;
vec[2] = other.vec[2]; }
}
#if __SSE__ CMatrix3f(const CMatrix3f& other) {
CMatrix3f(const __m128& r0, const __m128& r1, const __m128& r2) m[0] = other.m[0];
{ m[1] = other.m[1];
vec[0].mVec128 = r0; m[2] = other.m[2];
vec[1].mVec128 = r1; }
vec[2].mVec128 = r2;
} CMatrix3f(const simd<float>& r0, const simd<float>& r1, const simd<float>& r2) {
#endif m[0].mSimd = r0;
m[1].mSimd = r1;
m[2].mSimd = r2;
}
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CMatrix3f(const atVec4f& r0, const atVec4f& r1, const atVec4f& r2)
{ CMatrix3f(const atVec4f& r0, const atVec4f& r1, const atVec4f& r2) {
#if __SSE__ m[0].mSimd = r0.simd;
vec[0].mVec128 = r0.mVec128; m[1].mSimd = r1.simd;
vec[1].mVec128 = r1.mVec128; m[2].mSimd = r2.simd;
vec[2].mVec128 = r2.mVec128; }
#else
vec[0].x = r0.vec[0]; void readBig(athena::io::IStreamReader& input) {
vec[0].y = r0.vec[1]; m[0][0] = input.readFloatBig();
vec[0].z = r0.vec[2]; m[1][0] = input.readFloatBig();
vec[1].x = r1.vec[0]; m[2][0] = input.readFloatBig();
vec[1].y = r1.vec[1]; m[0][1] = input.readFloatBig();
vec[1].z = r1.vec[2]; m[1][1] = input.readFloatBig();
vec[2].x = r2.vec[0]; m[2][1] = input.readFloatBig();
vec[2].y = r2.vec[1]; m[0][2] = input.readFloatBig();
vec[2].z = r2.vec[2]; m[1][2] = input.readFloatBig();
m[2][2] = input.readFloatBig();
}
static CMatrix3f ReadBig(athena::io::IStreamReader& input) {
CMatrix3f ret;
ret.readBig(input);
return ret;
}
#endif #endif
}
void readBig(athena::io::IStreamReader& input)
{
m[0][0] = input.readFloatBig();
m[1][0] = input.readFloatBig();
m[2][0] = input.readFloatBig();
m[0][1] = input.readFloatBig();
m[1][1] = input.readFloatBig();
m[2][1] = input.readFloatBig();
m[0][2] = input.readFloatBig();
m[1][2] = input.readFloatBig();
m[2][2] = input.readFloatBig();
}
static CMatrix3f ReadBig(athena::io::IStreamReader& input) CMatrix3f(const CVector3f& axis, float angle);
{
CMatrix3f ret;
ret.readBig(input);
return ret;
}
#endif
CMatrix3f(const CVector3f& axis, float angle);
CMatrix3f(const CQuaternion& quat);
CMatrix3f(const TVectorUnion& r0, const TVectorUnion& r1, const TVectorUnion& r2)
{
#if __SSE__
vec[0].mVec128 = r0.mVec128;
vec[1].mVec128 = r1.mVec128;
vec[2].mVec128 = r2.mVec128;
#else
vec[0].x = r0.vec[0];
vec[0].y = r0.vec[1];
vec[0].z = r0.vec[2];
vec[1].x = r1.vec[0];
vec[1].y = r1.vec[1];
vec[1].z = r1.vec[2];
vec[2].x = r2.vec[0];
vec[2].y = r2.vec[1];
vec[2].z = r2.vec[2];
#endif
}
inline CMatrix3f& operator=(const CMatrix3f& other) CMatrix3f(const CQuaternion& quat);
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
return *this;
}
inline CVector3f operator*(const CVector3f& other) const CMatrix3f& operator=(const CMatrix3f& other) {
{ m[0] = other.m[0];
#if __SSE__ m[1] = other.m[1];
TVectorUnion res; m[2] = other.m[2];
res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)), return *this;
_mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))), }
_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)));
return CVector3f(res.mVec128);
#else
return CVector3f(m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2],
m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2],
m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2]);
#endif
}
inline CVector3f& operator[](int i) CVector3f operator*(const CVector3f& other) const {
{ return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
assert(0 <= i && i < 3); m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
return vec[i]; m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>();
} }
inline const CVector3f& operator[](int i) const CVector3f& operator[](size_t i) {
{ assert(i < 3);
assert(0 <= i && i < 3); return m[i];
return vec[i]; }
}
inline CMatrix3f orthonormalized() const const CVector3f& operator[](size_t i) const {
{ assert(i < 3);
CMatrix3f ret; return m[i];
ret[0] = vec[0].normalized(); }
ret[2] = ret[0].cross(vec[1]);
ret[2].normalize();
ret[1] = ret[2].cross(ret[0]);
return ret;
}
inline bool operator==(const CMatrix3f& other) const CMatrix3f orthonormalized() const {
{ CMatrix3f ret;
return vec[0] == other.vec[0] && vec[1] == other.vec[1] && vec[2] == other.vec[2]; ret[0] = m[0].normalized();
} ret[2] = ret[0].cross(m[1]);
ret[2].normalize();
ret[1] = ret[2].cross(ret[0]);
return ret;
}
static const CMatrix3f skIdentityMatrix3f; bool operator==(const CMatrix3f& other) const {
return m[0] == other.m[0] && m[1] == other.m[1] && m[2] == other.m[2];
}
void transpose(); static const CMatrix3f skIdentityMatrix3f;
void transposeSSE3();
CMatrix3f transposed() const;
CMatrix3f transposedSSE3() const;
inline void invert() { *this = inverted(); } void transpose();
CMatrix3f inverted() const;
void addScaledMatrix(const CMatrix3f& other, float scale) CMatrix3f transposed() const;
{
CVector3f scaleVec(scale);
vec[0] += other.vec[0] * scaleVec;
vec[1] += other.vec[1] * scaleVec;
vec[2] += other.vec[2] * scaleVec;
}
static inline CMatrix3f RotateX(float theta) void invert() { *this = inverted(); }
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{1.f, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, cosT, sinT, 0.f}},
TVectorUnion{{0.f, -sinT, cosT, 0.f}});
}
static inline CMatrix3f RotateY(float theta) CMatrix3f inverted() const;
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{cosT, 0.f, -sinT, 0.f}},
TVectorUnion{{0.f, 1.f, 0.f, 0.f}},
TVectorUnion{{sinT, 0.f, cosT, 0.f}});
}
static inline CMatrix3f RotateZ(float theta) void addScaledMatrix(const CMatrix3f& other, float scale) {
{ CVector3f scaleVec(scale);
float sinT = std::sin(theta); m[0] += other.m[0] * scaleVec;
float cosT = std::cos(theta); m[1] += other.m[1] * scaleVec;
return CMatrix3f(TVectorUnion{{cosT, sinT, 0.f, 0.f}}, m[2] += other.m[2] * scaleVec;
TVectorUnion{{-sinT, cosT, 0.f, 0.f}}, }
TVectorUnion{{0.f, 0.f, 1.f, 0.f}});
}
float determinant() const static CMatrix3f RotateX(float theta) {
{ float sinT = std::sin(theta);
return float cosT = std::cos(theta);
m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) + return CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) + simd<float>{0.f, cosT, sinT, 0.f},
m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]); simd<float>{0.f, -sinT, cosT, 0.f});
} }
union { static CMatrix3f RotateY(float theta) {
float m[3][4]; /* 4th row for union-alignment */ float sinT = std::sin(theta);
struct float cosT = std::cos(theta);
{ return CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
CVector3f vec[3]; simd<float>{0.f, 1.f, 0.f, 0.f},
}; simd<float>{sinT, 0.f, cosT, 0.f});
}; }
static CMatrix3f RotateZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
simd<float>{-sinT, cosT, 0.f, 0.f},
simd<float>{0.f, 0.f, 1.f, 0.f});
}
float determinant() const {
return
m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) +
m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) +
m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
}
CVector3f m[3];
}; };
static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs) static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs) {
{ simd<float> v[3];
#if __SSE__ for (int i = 0; i < 3; ++i)
unsigned i; v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
TVectorUnion resVec[3]; lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
for (i = 0; i < 3; ++i) lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>();
{ return CMatrix3f(v[0], v[1], v[2]);
resVec[i].mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
_mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
_mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
resVec[i].v[3] = 0.0;
}
return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
#else
return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
#endif
} }
} }

View File

@ -1,176 +1,116 @@
#pragma once #pragma once
#include "zeus/CMatrix3f.hpp" #include "zeus/CMatrix3f.hpp"
#include "zeus/CVector4f.hpp" #include "zeus/CVector4f.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CMatrix4f {
class alignas(16) CMatrix4f
{
public: public:
static const CMatrix4f skIdentityMatrix4f; static const CMatrix4f skIdentityMatrix4f;
ZE_DECLARE_ALIGNED_ALLOCATOR();
explicit CMatrix4f(bool zero = false)
{
memset(m, 0, sizeof(m));
if (!zero) explicit CMatrix4f(bool zero = false) {
{ if (!zero) {
m[0][0] = 1.0; m[0][0] = 1.0;
m[1][1] = 1.0; m[1][1] = 1.0;
m[2][2] = 1.0; m[2][2] = 1.0;
m[3][3] = 1.0; m[3][3] = 1.0;
}
} }
CMatrix4f(float m00, float m01, float m02, float m03, float m10, float m11, float m12, float m13, float m20, float m21, }
float m22, float m23, float m30, float m31, float m32, float m33)
{
m[0][0] = m00, m[1][0] = m01, m[2][0] = m02, m[3][0] = m03;
m[0][1] = m10, m[1][1] = m11, m[2][1] = m12, m[3][1] = m13;
m[0][2] = m20, m[1][2] = m21, m[2][2] = m22, m[3][2] = m23;
m[0][3] = m30, m[1][3] = m31, m[2][3] = m32, m[3][3] = m33;
}
CMatrix4f(const CVector3f& scaleVec)
{
memset(m, 0, sizeof(m));
m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2];
m[3][3] = 1.0f;
}
CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3)
{
vec[0] = r0;
vec[1] = r1;
vec[2] = r2;
vec[3] = r3;
}
CMatrix4f(const CMatrix4f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = other.vec[3];
}
#if __SSE__
CMatrix4f(const __m128& r0, const __m128& r1, const __m128& r2, const __m128& r3)
{
vec[0].mVec128 = r0;
vec[1].mVec128 = r1;
vec[2].mVec128 = r2;
vec[3].mVec128 = r3;
}
#endif
CMatrix4f(const CMatrix3f& other)
{
memset(m, 0, sizeof(m));
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = CVector4f(0, 0, 0, 1.0f);
}
inline CMatrix4f& operator=(const CMatrix4f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = other.vec[3];
return *this;
}
inline CVector4f operator*(const CVector4f& other) const
{
#if __SSE__
TVectorUnion res;
res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)),
_mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))),
_mm_add_ps(_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)),
_mm_mul_ps(vec[3].mVec128, ze_splat_ps(other.mVec128, 3))));
return CVector4f(res.mVec128); CMatrix4f(float m00, float m01, float m02, float m03,
#else float m10, float m11, float m12, float m13,
return CVector4f(m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2] + m[3][0] * other.v[3], float m20, float m21, float m22, float m23,
m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2] + m[3][1] * other.v[3], float m30, float m31, float m32, float m33)
m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2] + m[3][2] * other.v[3], : m{{m00, m10, m20, m30},
m[0][3] * other.v[0] + m[1][3] * other.v[1] + m[2][3] * other.v[2] + m[3][3] * other.v[3]); {m01, m11, m21, m31},
#endif {m02, m12, m22, m32},
} {m03, m13, m23, m33}} {}
inline CVector4f& operator[](int i) CMatrix4f(const CVector3f& scaleVec) {
{ m[0][0] = scaleVec[0];
assert(0 <= i && i < 4); m[1][1] = scaleVec[1];
return vec[i]; m[2][2] = scaleVec[2];
} m[3][3] = 1.0f;
}
inline const CVector4f& operator[](int i) const CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3) {
{ m[0] = r0;
assert(0 <= i && i < 4); m[1] = r1;
return vec[i]; m[2] = r2;
} m[3] = r3;
}
CMatrix4f transposed() const; CMatrix4f(const CMatrix4f& other) {
CMatrix4f transposedSSE3() const; m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
m[3] = other.m[3];
}
inline CVector3f multiplyOneOverW(const CVector3f& point) const CMatrix4f(const simd<float>& r0, const simd<float>& r1, const simd<float>& r2, const simd<float>& r3) {
{ m[0].mSimd = r0;
CVector4f xfVec = *this * point; m[1].mSimd = r1;
return xfVec.toVec3f() / xfVec.w; m[2].mSimd = r2;
} m[3].mSimd = r3;
}
inline CVector3f multiplyOneOverW(const CVector3f& point, float& wOut) const CMatrix4f(const CMatrix3f& other) {
{ m[0] = other.m[0];
CVector4f xfVec = *this * point; m[1] = other.m[1];
wOut = xfVec.w; m[2] = other.m[2];
return xfVec.toVec3f() / xfVec.w; m[3] = CVector4f(0.f, 0.f, 0.f, 1.0f);
} }
union { CMatrix4f& operator=(const CMatrix4f& other) {
float m[4][4]; m[0] = other.m[0];
struct m[1] = other.m[1];
{ m[2] = other.m[2];
CVector4f vec[4]; m[3] = other.m[3];
}; return *this;
}; }
CVector4f operator*(const CVector4f& other) const {
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>() +
m[3].mSimd * other.mSimd.shuffle<3, 3, 3, 3>();
}
CVector4f& operator[](size_t i) {
assert(i < 4);
return m[i];
}
const CVector4f& operator[](size_t i) const {
assert(i < 4);
return m[i];
}
CMatrix4f transposed() const;
CVector3f multiplyOneOverW(const CVector3f& point) const {
CVector4f xfVec = *this * point;
return xfVec.toVec3f() / xfVec.w();
}
CVector3f multiplyOneOverW(const CVector3f& point, float& wOut) const {
CVector4f xfVec = *this * point;
wOut = xfVec.w();
return xfVec.toVec3f() / xfVec.w();
}
CVector4f m[4];
}; };
static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs)
{
CMatrix4f ret;
#if __SSE__
unsigned i;
for (i = 0; i < 4; ++i) static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs) {
{ simd<float> v[4];
ret.vec[i].mVec128 = _mm_add_ps( for (int i = 0; i < 4; ++i)
_mm_add_ps(_mm_add_ps(_mm_mul_ps(lhs.vec[0].mVec128, v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(0, 0, 0, 0))), lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
_mm_mul_ps(lhs.vec[1].mVec128, lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>() +
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(1, 1, 1, 1)))), lhs.m[3].mSimd * rhs[i].mSimd.shuffle<3, 3, 3, 3>();
_mm_mul_ps(lhs.vec[2].mVec128, return CMatrix4f(v[0], v[1], v[2], v[3]);
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(2, 2, 2, 2)))),
_mm_mul_ps(lhs.vec[3].mVec128, _mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(3, 3, 3, 3))));
}
#else
ret.m[0][0] = lhs.m[0][0] * rhs.m[0][0] + lhs.m[1][0] * rhs.m[0][1] + lhs.m[2][0] * rhs.m[0][2] + lhs.m[3][0] * rhs.m[0][3];
ret.m[1][0] = lhs.m[0][0] * rhs.m[1][0] + lhs.m[1][0] * rhs.m[1][1] + lhs.m[2][0] * rhs.m[1][2] + lhs.m[3][0] * rhs.m[1][3];
ret.m[2][0] = lhs.m[0][0] * rhs.m[2][0] + lhs.m[1][0] * rhs.m[2][1] + lhs.m[2][0] * rhs.m[2][2] + lhs.m[3][0] * rhs.m[2][3];
ret.m[3][0] = lhs.m[0][0] * rhs.m[3][0] + lhs.m[1][0] * rhs.m[3][1] + lhs.m[2][0] * rhs.m[3][2] + lhs.m[3][0] * rhs.m[3][3];
ret.m[0][1] = lhs.m[0][1] * rhs.m[0][0] + lhs.m[1][1] * rhs.m[0][1] + lhs.m[2][1] * rhs.m[0][2] + lhs.m[3][1] * rhs.m[0][3];
ret.m[1][1] = lhs.m[0][1] * rhs.m[1][0] + lhs.m[1][1] * rhs.m[1][1] + lhs.m[2][1] * rhs.m[1][2] + lhs.m[3][1] * rhs.m[1][3];
ret.m[2][1] = lhs.m[0][1] * rhs.m[2][0] + lhs.m[1][1] * rhs.m[2][1] + lhs.m[2][1] * rhs.m[2][2] + lhs.m[3][1] * rhs.m[2][3];
ret.m[3][1] = lhs.m[0][1] * rhs.m[3][0] + lhs.m[1][1] * rhs.m[3][1] + lhs.m[2][1] * rhs.m[3][2] + lhs.m[3][1] * rhs.m[3][3];
ret.m[0][2] = lhs.m[0][2] * rhs.m[0][0] + lhs.m[1][2] * rhs.m[0][1] + lhs.m[2][2] * rhs.m[0][2] + lhs.m[3][2] * rhs.m[0][3];
ret.m[1][2] = lhs.m[0][2] * rhs.m[1][0] + lhs.m[1][2] * rhs.m[1][1] + lhs.m[2][2] * rhs.m[1][2] + lhs.m[3][2] * rhs.m[1][3];
ret.m[2][2] = lhs.m[0][2] * rhs.m[2][0] + lhs.m[1][2] * rhs.m[2][1] + lhs.m[2][2] * rhs.m[2][2] + lhs.m[3][2] * rhs.m[2][3];
ret.m[3][2] = lhs.m[0][2] * rhs.m[3][0] + lhs.m[1][2] * rhs.m[3][1] + lhs.m[2][2] * rhs.m[3][2] + lhs.m[3][2] * rhs.m[3][3];
ret.m[0][3] = lhs.m[0][3] * rhs.m[0][0] + lhs.m[1][3] * rhs.m[0][1] + lhs.m[2][3] * rhs.m[0][2] + lhs.m[3][3] * rhs.m[0][3];
ret.m[1][3] = lhs.m[0][3] * rhs.m[1][0] + lhs.m[1][3] * rhs.m[1][1] + lhs.m[2][3] * rhs.m[1][2] + lhs.m[3][3] * rhs.m[1][3];
ret.m[2][3] = lhs.m[0][3] * rhs.m[2][0] + lhs.m[1][3] * rhs.m[2][1] + lhs.m[2][3] * rhs.m[2][2] + lhs.m[3][3] * rhs.m[2][3];
ret.m[3][3] = lhs.m[0][3] * rhs.m[3][0] + lhs.m[1][3] * rhs.m[3][1] + lhs.m[2][3] * rhs.m[2][2] + lhs.m[3][3] * rhs.m[3][3];
#endif
return ret;
} }
} }

View File

@ -5,51 +5,46 @@
#include "zeus/CAABox.hpp" #include "zeus/CAABox.hpp"
#include "zeus/CMRay.hpp" #include "zeus/CMRay.hpp"
namespace zeus namespace zeus {
{ class COBBox {
class alignas(16) COBBox
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
void readBig(athena::io::IStreamReader& in)
{ void readBig(athena::io::IStreamReader& in) {
transform.read34RowMajor(in); transform.read34RowMajor(in);
extents.readBig(in); extents.readBig(in);
} }
static COBBox ReadBig(athena::io::IStreamReader& in)
{ static COBBox ReadBig(athena::io::IStreamReader& in) {
COBBox out; COBBox out;
out.readBig(in); out.readBig(in);
return out; return out;
} }
#endif #endif
CTransform transform; CTransform transform;
CVector3f extents; CVector3f extents;
COBBox() {} COBBox() = default;
COBBox(const CAABox& aabb) : extents(aabb.extents()) { transform.origin = aabb.center(); } COBBox(const CAABox& aabb) : extents(aabb.extents()) { transform.origin = aabb.center(); }
COBBox(const CTransform& xf, const CVector3f& extents) : transform(xf), extents(extents) {} COBBox(const CTransform& xf, const CVector3f& extents) : transform(xf), extents(extents) {}
CAABox calculateAABox(const CTransform& worldXf = CTransform()) const; CAABox calculateAABox(const CTransform& worldXf = CTransform()) const;
static COBBox FromAABox(const CAABox& box, const CTransform& xf) static COBBox FromAABox(const CAABox& box, const CTransform& xf) {
{ const CVector3f extents = box.max - box.center();
const CVector3f extents = box.max - box.center(); const CTransform newXf = CTransform::Translate(box.center()) * xf;
const CTransform newXf = CTransform::Translate(box.center()) * xf; return COBBox(newXf, extents);
return COBBox(newXf, extents); }
}
bool OBBIntersectsBox(const COBBox& other) const; bool OBBIntersectsBox(const COBBox& other) const;
bool AABoxIntersectsBox(const CAABox& other) bool AABoxIntersectsBox(const CAABox& other) {
{ return OBBIntersectsBox(FromAABox(other, CTransform::Identity()));
return OBBIntersectsBox(FromAABox(other, CTransform::Identity())); }
}
}; };
} }

View File

@ -4,72 +4,67 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{ class CPlane {
class alignas(16) CPlane
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CPlane() : mSimd(1.0, 0.f, 0.f, 0.f) {}
inline CPlane() : a(1.f), b(0.f), c(0.f), d(0.f) {} CPlane(float a, float b, float c, float d) : mSimd(a, b, c, d) {}
CPlane(float a, float b, float c, float d) : a(a), b(b), c(c), d(d) {}
CPlane(const CVector3f& a, const CVector3f& b, const CVector3f& c)
{
vec = (b - a).cross(c - a).normalized();
d = a.dot(vec);
}
CPlane(const CVector3f& point, float displacement) CPlane(const CVector3f& a, const CVector3f& b, const CVector3f& c) {
{ mSimd = (b - a).cross(c - a).normalized().mSimd;
#if __SSE__ mSimd[3] = a.dot(normal());
mVec128 = point.mVec128; }
#else
a = point[0];
b = point[1];
c = point[2];
#endif
d = displacement;
}
float clipLineSegment(const CVector3f& a, const CVector3f& b) CPlane(const CVector3f& point, float displacement) {
{ mSimd = point.mSimd;
float mag = (b-a).dot(vec); mSimd[3] = displacement;
float dis = (-(vec.y - d)) / mag; }
return clamp(0.0f, dis, 1.0f);
}
inline void normalize() float clipLineSegment(const CVector3f& a, const CVector3f& b) {
{ float mag = (b - a).dot(normal());
float nd = d; float dis = (-(y() - d())) / mag;
float mag = vec.magnitude(); return clamp(0.0f, dis, 1.0f);
mag = 1.f / mag; }
vec = vec * mag;
d = nd * mag;
}
float pointToPlaneDist(const CVector3f& pos) const void normalize() {
{ float nd = d();
return pos.dot(vec) - d; auto norm = normal();
} float mag = norm.magnitude();
mag = 1.f / mag;
mSimd = (norm * mag).mSimd;
mSimd[3] = nd * mag;
}
bool rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const; float pointToPlaneDist(const CVector3f& pos) const {
return pos.dot(normal()) - d();
}
const CVector3f& normal() const { return vec; } bool rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const;
inline float& operator[](size_t idx) { assert(idx < 4); return p[idx]; } CVector3f normal() const { return mSimd; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return p[idx]; }
union { zeus::simd<float>::reference operator[](size_t idx) {
struct assert(idx < 4);
{ return mSimd[idx];
float a, b, c, d; }
};
float p[4]; float operator[](size_t idx) const {
CVector3f vec; assert(idx < 4);
#ifdef __SSE__ return mSimd[idx];
__m128 mVec128; }
#endif
}; float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
float d() const { return mSimd[3]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
simd<float>::reference d() { return mSimd[3]; }
zeus::simd<float> mSimd;
}; };
} }

View File

@ -6,121 +6,117 @@
#include <cstdio> #include <cstdio>
#include <cmath> #include <cmath>
namespace zeus namespace zeus {
{ enum class EProjType {
enum class EProjType None = 0,
{ Orthographic = 1,
None = 0, Perspective = 2
Orthographic = 1,
Perspective = 2
}; };
class SProjOrtho class SProjOrtho {
{
public: public:
float top, bottom, left, right, znear, zfar; float top, bottom, left, right, znear, zfar;
explicit SProjOrtho(float p_top = 1.0f, float p_bottom = -1.0f, float p_left = -1.0f, float p_right = 1.0f,
float p_near = 1.0f, float p_far = -1.0f) explicit SProjOrtho(float p_top = 1.0f, float p_bottom = -1.0f, float p_left = -1.0f, float p_right = 1.0f,
: top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far) float p_near = 1.0f, float p_far = -1.0f)
{ : top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far) {
} }
}; };
struct SProjPersp
{ struct SProjPersp {
float fov, aspect, znear, zfar; float fov, aspect, znear, zfar;
SProjPersp(float p_fov = degToRad(55.0f), float p_aspect = 1.0f, float p_near = 0.1f, float p_far = 4096.f)
: fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far) SProjPersp(float p_fov = degToRad(55.0f), float p_aspect = 1.0f, float p_near = 0.1f, float p_far = 4096.f)
{ : fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far) {
} }
}; };
extern const SProjOrtho kOrthoIdentity; extern const SProjOrtho kOrthoIdentity;
class alignas(16) CProjection class CProjection {
{ void _updateCachedMatrix();
void _updateCachedMatrix();
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CProjection() {
m_projType = EProjType::Orthographic;
m_ortho = SProjOrtho();
m_mtx = CMatrix4f::skIdentityMatrix4f;
}
CProjection() CProjection(const CProjection& other) { *this = other; }
{
m_projType = EProjType::Orthographic;
m_ortho = SProjOrtho();
m_mtx = CMatrix4f::skIdentityMatrix4f;
}
CProjection(const CProjection& other) { *this = other; }
CProjection(const SProjOrtho& ortho) { setOrtho(ortho); }
CProjection(const SProjPersp& persp) { setPersp(persp); }
inline CProjection& operator=(const CProjection& other) CProjection(const SProjOrtho& ortho) { setOrtho(ortho); }
{
if (this != &other)
{
m_projType = other.m_projType;
m_ortho = other.m_ortho;
m_mtx = other.m_mtx;
}
return *this;
}
inline void setOrtho(const SProjOrtho& ortho) CProjection(const SProjPersp& persp) { setPersp(persp); }
{
m_projType = EProjType::Orthographic;
m_ortho = ortho;
_updateCachedMatrix();
}
inline void setPersp(const SProjPersp& persp)
{
m_projType = EProjType::Perspective;
m_persp = persp;
_updateCachedMatrix();
}
inline EProjType getType() const { return m_projType; } CProjection& operator=(const CProjection& other) {
inline const SProjOrtho& getOrtho() const if (this != &other) {
{ m_projType = other.m_projType;
if (m_projType != EProjType::Orthographic) m_ortho = other.m_ortho;
{ m_mtx = other.m_mtx;
std::fprintf(stderr, "attempted to access orthographic structure of non-ortho projection");
std::abort();
}
return m_ortho;
}
inline const SProjPersp& getPersp() const
{
if (m_projType != EProjType::Perspective)
{
std::fprintf(stderr, "attempted to access perspective structure of non-persp projection");
std::abort();
}
return m_persp;
} }
return *this;
}
inline const CMatrix4f& getCachedMatrix() const { return m_mtx; } void setOrtho(const SProjOrtho& ortho) {
m_projType = EProjType::Orthographic;
m_ortho = ortho;
_updateCachedMatrix();
}
void setPersp(const SProjPersp& persp) {
m_projType = EProjType::Perspective;
m_persp = persp;
_updateCachedMatrix();
}
EProjType getType() const { return m_projType; }
const SProjOrtho& getOrtho() const {
#ifndef NDEBUG
if (m_projType != EProjType::Orthographic) {
std::fprintf(stderr, "attempted to access orthographic structure of non-ortho projection");
std::abort();
}
#endif
return m_ortho;
}
const SProjPersp& getPersp() const {
#ifndef NDEBUG
if (m_projType != EProjType::Perspective) {
std::fprintf(stderr, "attempted to access perspective structure of non-persp projection");
std::abort();
}
#endif
return m_persp;
}
const CMatrix4f& getCachedMatrix() const { return m_mtx; }
protected: protected:
/* Projection type */ /* Projection type */
EProjType m_projType; EProjType m_projType;
/* Projection intermediate */ /* Projection intermediate */
union { union {
#ifdef _MSC_VER #ifdef _MSC_VER
struct struct
{ {
SProjOrtho m_ortho;
};
struct
{
SProjPersp m_persp;
};
#else
SProjOrtho m_ortho; SProjOrtho m_ortho;
SProjPersp m_persp;
#endif
}; };
struct
{
SProjPersp m_persp;
};
#else
SProjOrtho m_ortho;
SProjPersp m_persp;
#endif
};
/* Cached projection matrix */ /* Cached projection matrix */
CMatrix4f m_mtx; CMatrix4f m_mtx;
}; };
} }

View File

@ -8,262 +8,303 @@
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "zeus/CRelAngle.hpp" #include "zeus/CRelAngle.hpp"
#include "zeus/CTransform.hpp" #include "zeus/CTransform.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp> #include <athena/IStreamReader.hpp>
#endif #endif
namespace zeus namespace zeus {
{
static inline float normalize_angle(float angle) static float normalize_angle(float angle) {
{ if (angle > M_PIF)
if (angle > M_PIF) angle -= 2.f * M_PIF;
angle -= 2.f * M_PIF; else if (angle < -M_PIF)
else if (angle < -M_PIF) angle += 2.f * M_PIF;
angle += 2.f * M_PIF;
return angle; return angle;
} }
class CNUQuaternion; class CNUQuaternion;
/** Unit quaternion, used for all quaternion arithmetic */ /** Unit quaternion, used for all quaternion arithmetic */
class alignas(16) CQuaternion class CQuaternion {
{
#if __atdna__ && ZE_ATHENA_TYPES
float clangVec __attribute__((__vector_size__(16)));
#endif
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CQuaternion() : mSimd(1.f, 0.f, 0.f, 0.f) {}
CQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
CQuaternion(float xi, float yi, float zi) { fromVector3f(CVector3f(xi, yi, zi)); }
CQuaternion(float wi, const CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
mSimd[0] = wi;
}
template <typename T>
CQuaternion(const simd<T>& s) : mSimd(s) {}
CQuaternion() : w(1.0f), x(0.0f), y(0.0f), z(0.0f) {}
CQuaternion(float wi, float xi, float yi, float zi) : w(wi), x(xi), y(yi), z(zi) {}
CQuaternion(float xi, float yi, float zi) { fromVector3f(CVector3f(xi, yi, zi)); }
CQuaternion(float wi, const CVector3f& vec) : w(wi), x(vec.x), y(vec.y), z(vec.z) {}
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
inline void readBig(athena::io::IStreamReader& input)
{
w = input.readFloatBig();
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
}
CQuaternion(const atVec4f& vec)
{
#if __SSE__
mVec128 = vec.mVec128;
#else
x = vec.vec[1];
y = vec.vec[2];
z = vec.vec[3];
w = vec.vec[0];
#endif
}
operator atVec4f&() void readBig(athena::io::IStreamReader& input) {
{ simd_floats f;
return *reinterpret_cast<atVec4f*>(v); f[0] = input.readFloatBig();
} f[1] = input.readFloatBig();
operator const atVec4f&() const f[2] = input.readFloatBig();
{ f[3] = input.readFloatBig();
return *reinterpret_cast<const atVec4f*>(v); mSimd.copy_from(f);
} }
CQuaternion(const atVec4f& vec) : mSimd(vec.simd) {}
operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
}
operator const atVec4f&() const {
return *reinterpret_cast<const atVec4f*>(this);
}
#endif #endif
CQuaternion(const CMatrix3f& mat); CQuaternion(const CMatrix3f& mat);
CQuaternion(const CVector3f& vec) { fromVector3f(vec); }
CQuaternion(const CVector4f& vec)
{
#if __SSE__
mVec128 = vec.mVec128;
#else
x = vec[1];
y = vec[2];
z = vec[3];
w = vec[0];
#endif
}
CQuaternion(const CVector3f& vecA, const CVector3f& vecB) CQuaternion(const CVector3f& vec) { fromVector3f(vec); }
{
CVector3f vecAN = vecA.normalized();
CVector3f vecBN = vecB.normalized();
CVector3f w = vecAN.cross(vecBN);
*this = CQuaternion(1.f + vecAN.dot(vecBN), w.x, w.y, w.z).normalized();
}
void fromVector3f(const CVector3f& vec); CQuaternion(const CVector4f& vec) : mSimd(vec.mSimd) {}
CQuaternion& operator=(const CQuaternion& q); CQuaternion(const CVector3f& vecA, const CVector3f& vecB) {
CQuaternion operator+(const CQuaternion& q) const; CVector3f vecAN = vecA.normalized();
CQuaternion operator-(const CQuaternion& q) const; CVector3f vecBN = vecB.normalized();
CQuaternion operator*(const CQuaternion& q) const; CVector3f w = vecAN.cross(vecBN);
CQuaternion operator/(const CQuaternion& q) const; *this = CQuaternion(1.f + vecAN.dot(vecBN), w).normalized();
CQuaternion operator*(float scale) const; }
CQuaternion operator/(float scale) const;
CQuaternion operator-() const;
const CQuaternion& operator+=(const CQuaternion& q);
const CQuaternion& operator-=(const CQuaternion& q);
const CQuaternion& operator*=(const CQuaternion& q);
const CQuaternion& operator*=(float scale);
const CQuaternion& operator/=(float scale);
float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return w * w + x * x + y * y + z * z; }
void normalize() { *this /= magnitude(); }
CQuaternion normalized() const { return *this / magnitude(); }
void invert();
CQuaternion inverse() const;
/** void fromVector3f(const CVector3f& vec);
* @brief Set the rotation using axis angle notation
* @param axis The axis to rotate around
* @param angle The magnitude of the rotation in radians
* @return
*/
static inline CQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle)
{
return CQuaternion(std::cos(angle / 2.f), axis * std::sin(angle / 2.f));
}
void rotateX(const CRelAngle& angle) { *this *= fromAxisAngle({1.0f, 0.0f, 0.0f}, angle); } CQuaternion& operator=(const CQuaternion& q);
void rotateY(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 1.0f, 0.0f}, angle); }
void rotateZ(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 0.0f, 1.0f}, angle); }
static inline CVector3f rotate(const CQuaternion& rotation, const CAxisAngle& v) CQuaternion operator+(const CQuaternion& q) const;
{
CQuaternion q = rotation * v;
q *= rotation.inverse();
return {q.x, q.y, q.z}; CQuaternion operator-(const CQuaternion& q) const;
}
static CQuaternion lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng); CQuaternion operator*(const CQuaternion& q) const;
CVector3f transform(const CVector3f& v) const CQuaternion operator/(const CQuaternion& q) const;
{
CQuaternion r(0.f, v);
return (*this * r * inverse()).getImaginary();
}
CQuaternion log() const; CQuaternion operator*(float scale) const;
CQuaternion exp() const; CQuaternion operator/(float scale) const;
inline CTransform toTransform() const { return CTransform(CMatrix3f(*this)); } CQuaternion operator-() const;
inline CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
inline float dot(const CQuaternion& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
}
static CQuaternion lerp(const CQuaternion& a, const CQuaternion& b, double t); const CQuaternion& operator+=(const CQuaternion& q);
static CQuaternion slerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerpShort(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion nlerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1);
static CQuaternion clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle);
inline float roll() const { return std::atan2(2.f * (x * y + w * z), w * w + x * x - y * y - z * z); } const CQuaternion& operator-=(const CQuaternion& q);
inline float pitch() const { return std::atan2(2.f * (y * z + w * x), w * w - x * x - y * y + z * z); } const CQuaternion& operator*=(const CQuaternion& q);
inline float yaw() const { return std::asin(-2.f * (x * z - w * y)); } const CQuaternion& operator*=(float scale);
CQuaternion buildEquivalent() const; const CQuaternion& operator/=(float scale);
zeus::CVector3f getImaginary() const { return {x, y, z}; } float magnitude() const { return std::sqrt(magSquared()); }
void setImaginary(const zeus::CVector3f& i) { x = i.x; y = i.y; z = i.z; }
CRelAngle angleFrom(const zeus::CQuaternion& other); float magSquared() const { return mSimd.dot4(mSimd); }
inline float& operator[](size_t idx) { assert(idx < 4); return (&w)[idx]; } void normalize() { *this /= magnitude(); }
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&w)[idx]; }
union CQuaternion normalized() const { return *this / magnitude(); }
{
__m128 mVec128;
struct
{
float w, x, y, z;
};
float v[4];
};
static const CQuaternion skNoRotation; void invert();
static CQuaternion fromNUQuaternion(const CNUQuaternion& q); CQuaternion inverse() const;
/**
* @brief Set the rotation using axis angle notation
* @param axis The axis to rotate around
* @param angle The magnitude of the rotation in radians
* @return
*/
static CQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) {
return CQuaternion(std::cos(angle / 2.f), axis * std::sin(angle / 2.f));
}
void rotateX(const CRelAngle& angle) { *this *= fromAxisAngle({1.0f, 0.0f, 0.0f}, angle); }
void rotateY(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 1.0f, 0.0f}, angle); }
void rotateZ(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 0.0f, 1.0f}, angle); }
static CVector3f rotate(const CQuaternion& rotation, const CAxisAngle& v) {
CQuaternion q = rotation * v;
q *= rotation.inverse();
return {q.mSimd.shuffle<1, 2, 3, 3>()};
}
static CQuaternion lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng);
CVector3f transform(const CVector3f& v) const {
CQuaternion r(0.f, v);
return (*this * r * inverse()).getImaginary();
}
CQuaternion log() const;
CQuaternion exp() const;
CTransform toTransform() const { return CTransform(CMatrix3f(*this)); }
CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
float dot(const CQuaternion& rhs) const {
return mSimd.dot4(rhs.mSimd);
}
static CQuaternion lerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerpShort(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion nlerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1);
static CQuaternion clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle);
float roll() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[1] * f[2] + f[0] * f[3]), f[0] * f[0] + f[1] * f[1] - f[2] * f[2] - f[3] * f[3]);
}
float pitch() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[2] * f[3] + f[0] * f[1]), f[0] * f[0] - f[1] * f[1] - f[2] * f[2] + f[3] * f[3]);
}
float yaw() const {
simd_floats f(mSimd);
return std::asin(-2.f * (f[1] * f[3] - f[0] * f[2]));
}
CQuaternion buildEquivalent() const;
zeus::CVector3f getImaginary() const { return mSimd.shuffle<1, 2, 3, 3>(); }
void setImaginary(const zeus::CVector3f& i) {
x() = i.x();
y() = i.y();
z() = i.z();
}
CRelAngle angleFrom(const zeus::CQuaternion& other);
simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float w() const { return mSimd[0]; }
float x() const { return mSimd[1]; }
float y() const { return mSimd[2]; }
float z() const { return mSimd[3]; }
simd<float>::reference w() { return mSimd[0]; }
simd<float>::reference x() { return mSimd[1]; }
simd<float>::reference y() { return mSimd[2]; }
simd<float>::reference z() { return mSimd[3]; }
simd<float> mSimd;
static const CQuaternion skNoRotation;
static CQuaternion fromNUQuaternion(const CNUQuaternion& q);
}; };
/** Non-unit quaternion, no guarantee that it's normalized. /** Non-unit quaternion, no guarantee that it's normalized.
* Converting to CQuaternion will perform normalize operation. * Converting to CQuaternion will perform normalize operation.
*/ */
class alignas(16) CNUQuaternion class CNUQuaternion {
{
public: public:
CNUQuaternion() : w(1.0f), x(0.0f), y(0.0f), z(0.0f) {} CNUQuaternion() : mSimd(1.f, 0.f, 0.f, 0.f) {}
CNUQuaternion(float wi, float xi, float yi, float zi) : w(wi), x(xi), y(yi), z(zi) {}
CNUQuaternion(float win, const zeus::CVector3f& vec) { w = win; x = vec.x; y = vec.y; z = vec.z; }
CNUQuaternion(const CQuaternion& other) { w = other.w; x = other.x; y = other.y; z = other.z; }
CNUQuaternion(const CMatrix3f& mtx) : CNUQuaternion(CQuaternion(mtx)) {}
static inline CNUQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle)
{
return CNUQuaternion(CQuaternion::fromAxisAngle(axis, angle));
}
float magnitude() const { return std::sqrt(magSquared()); } CNUQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
float magSquared() const { return w * w + x * x + y * y + z * z; }
void normalize()
{
float magDiv = 1.f / magnitude();
w *= magDiv;
x *= magDiv;
y *= magDiv;
z *= magDiv;
}
CNUQuaternion normalized() const
{
float magDiv = 1.f / magnitude();
return { w * magDiv, x * magDiv, y * magDiv, z * magDiv };
}
CNUQuaternion operator*(const CNUQuaternion& q) const; CNUQuaternion(float win, const zeus::CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
CNUQuaternion operator*(float f) const; w() = win;
const CNUQuaternion& operator+=(const CNUQuaternion& q); }
inline float& operator[](size_t idx) { assert(idx < 4); return (&w)[idx]; } CNUQuaternion(const CQuaternion& other) : mSimd(other.mSimd) {}
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&w)[idx]; }
union CNUQuaternion(const CMatrix3f& mtx) : CNUQuaternion(CQuaternion(mtx)) {}
{
__m128 mVec128; CNUQuaternion(const simd<float>& s) : mSimd(s) {}
struct
{ static CNUQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) {
float w, x, y, z; return CNUQuaternion(CQuaternion::fromAxisAngle(axis, angle));
}; }
};
float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return mSimd.dot4(mSimd); }
void normalize() {
float magDiv = 1.f / magnitude();
mSimd *= magDiv;
}
CNUQuaternion normalized() const {
float magDiv = 1.f / magnitude();
return mSimd * simd<float>(magDiv);
}
CNUQuaternion operator*(const CNUQuaternion& q) const;
CNUQuaternion operator*(float f) const;
const CNUQuaternion& operator+=(const CNUQuaternion& q);
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float w() const { return mSimd[0]; }
float x() const { return mSimd[1]; }
float y() const { return mSimd[2]; }
float z() const { return mSimd[3]; }
simd<float>::reference w() { return mSimd[0]; }
simd<float>::reference x() { return mSimd[1]; }
simd<float>::reference y() { return mSimd[2]; }
simd<float>::reference z() { return mSimd[3]; }
simd<float> mSimd;
}; };
inline CQuaternion CQuaternion::fromNUQuaternion(const CNUQuaternion& q) inline CQuaternion CQuaternion::fromNUQuaternion(const CNUQuaternion& q) {
{ auto norm = q.normalized();
auto norm = q.normalized(); return norm.mSimd;
return { norm.w, norm.x, norm.y, norm.z };
} }
CQuaternion operator+(float lhs, const CQuaternion& rhs); CQuaternion operator+(float lhs, const CQuaternion& rhs);
CQuaternion operator-(float lhs, const CQuaternion& rhs); CQuaternion operator-(float lhs, const CQuaternion& rhs);
CQuaternion operator*(float lhs, const CQuaternion& rhs); CQuaternion operator*(float lhs, const CQuaternion& rhs);
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs); CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs);
} }

View File

@ -1,32 +1,30 @@
#pragma once #pragma once
#include "zeus/CVector2f.hpp" #include "zeus/CVector2f.hpp"
namespace zeus namespace zeus {
{ class CRectangle {
class CRectangle
{
public: public:
CRectangle() {} CRectangle() {}
CRectangle(float x, float y, float w, float h) : position(x, y), size(w, h) {}
inline bool contains(const CVector2f& point) const CRectangle(float x, float y, float w, float h) : position(x, y), size(w, h) {}
{
if (point.x < position.x || point.x > position.x + size.x)
return false;
if (point.y < position.y || point.y > position.y + size.y)
return false;
return true; bool contains(const CVector2f& point) const {
} if (point.x() < position.x() || point.x() > position.x() + size.x())
return false;
if (point.y() < position.y() || point.y() > position.y() + size.y())
return false;
inline bool intersects(const CRectangle& rect) const return true;
{ }
return !(position.x > rect.position.x + rect.size.x || rect.position.x > position.x + size.x ||
position.y > rect.position.y + rect.size.y || rect.position.y > position.y + size.y);
}
CVector2f position; bool intersects(const CRectangle& rect) const {
CVector2f size; return !(position.x() > rect.position.x() + rect.size.x() || rect.position.x() > position.x() + size.x() ||
position.y() > rect.position.y() + rect.size.y() || rect.position.y() > position.y() + size.y());
}
CVector2f position;
CVector2f size;
}; };
} }

View File

@ -4,51 +4,92 @@
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include <cmath> #include <cmath>
namespace zeus namespace zeus {
{
/** /**
* @brief The CRelAngle class represents relative angle in radians * @brief The CRelAngle class represents relative angle in radians
*/ */
struct CRelAngle struct CRelAngle {
{ float angle = 0.f;
float angle = 0.f;
static float MakeRelativeAngle(float angle) static float MakeRelativeAngle(float angle) {
{ float absAngle = std::fabs(angle);
float absAngle = std::fabs(angle); if (absAngle == 2.f * M_PIF)
if (absAngle == 2.f * M_PIF) return std::copysign(absAngle, angle);
return std::copysign(absAngle, angle); float ret = absAngle - std::floor(absAngle / (2.f * M_PIF)) * (2.f * M_PIF);
float ret = absAngle - std::floor(absAngle / (2.f * M_PIF)) * (2.f * M_PIF); return std::copysign(ret, angle);
return std::copysign(ret, angle); }
}
CRelAngle() = default; CRelAngle() = default;
CRelAngle(float angle) : angle(MakeRelativeAngle(angle)) {}
CRelAngle& operator=(float ang) { angle = MakeRelativeAngle(ang); return *this; }
CRelAngle& operator=(const CRelAngle& ang) { angle = ang.angle; return *this; }
float asDegrees() const { return radToDeg(angle); }
float asRadians() const { return angle; }
float arcCosine() const { return std::acos(angle); }
static CRelAngle FromDegrees(float angle) CRelAngle(float angle) : angle(MakeRelativeAngle(angle)) {}
{
CRelAngle ret;
ret.angle = MakeRelativeAngle(degToRad(angle));
return ret;
}
operator float() const { return angle; } CRelAngle& operator=(float ang) {
static CRelAngle FromRadians(float angle) { return CRelAngle(angle); } angle = MakeRelativeAngle(ang);
return *this;
}
bool operator <(const CRelAngle& other) const { return angle < other.angle; } CRelAngle& operator=(const CRelAngle& ang) {
CRelAngle& operator +=(const CRelAngle& other) { angle = MakeRelativeAngle(angle + other.angle); return *this; } angle = ang.angle;
CRelAngle& operator +=(float r) { angle = MakeRelativeAngle(angle + r); return *this; } return *this;
CRelAngle& operator -=(const CRelAngle& other) { angle = MakeRelativeAngle(angle - other.angle); return *this; } }
CRelAngle& operator -=(float r) { angle = MakeRelativeAngle(angle - r); return *this; }
CRelAngle& operator *=(const CRelAngle& other) { angle = MakeRelativeAngle(angle * other.angle); return *this; } float asDegrees() const { return radToDeg(angle); }
CRelAngle& operator *=(float r) { angle = MakeRelativeAngle(angle * r); return *this; }
CRelAngle& operator /=(const CRelAngle& other) { angle = MakeRelativeAngle(angle / other.angle); return *this; } float asRadians() const { return angle; }
CRelAngle& operator /=(float r) { angle = MakeRelativeAngle(angle / r); return *this; }
float arcCosine() const { return std::acos(angle); }
static CRelAngle FromDegrees(float angle) {
CRelAngle ret;
ret.angle = MakeRelativeAngle(degToRad(angle));
return ret;
}
operator float() const { return angle; }
static CRelAngle FromRadians(float angle) { return CRelAngle(angle); }
bool operator<(const CRelAngle& other) const { return angle < other.angle; }
CRelAngle& operator+=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle + other.angle);
return *this;
}
CRelAngle& operator+=(float r) {
angle = MakeRelativeAngle(angle + r);
return *this;
}
CRelAngle& operator-=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle - other.angle);
return *this;
}
CRelAngle& operator-=(float r) {
angle = MakeRelativeAngle(angle - r);
return *this;
}
CRelAngle& operator*=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle * other.angle);
return *this;
}
CRelAngle& operator*=(float r) {
angle = MakeRelativeAngle(angle * r);
return *this;
}
CRelAngle& operator/=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle / other.angle);
return *this;
}
CRelAngle& operator/=(float r) {
angle = MakeRelativeAngle(angle / r);
return *this;
}
}; };
} }

View File

@ -2,25 +2,20 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CSphere {
class alignas(16) CSphere
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CSphere(const CVector3f& position, float radius) : position(position), radius(radius) {}
CSphere(const CVector3f& position, float radius) : position(position), radius(radius) {} CVector3f getSurfaceNormal(const CVector3f& coord) const { return (coord - position).normalized(); }
inline CVector3f getSurfaceNormal(const CVector3f& coord) const { return (coord - position).normalized(); } bool intersects(const CSphere& other) {
float dist = (position - other.position).magnitude();
return dist < (radius + other.radius);
}
inline bool intersects(const CSphere& other) CVector3f position;
{ float radius;
float dist = (position - other.position).magnitude();
return dist < (radius + other.radius);
}
CVector3f position;
float radius;
}; };
} }

View File

@ -8,273 +8,277 @@
#include <cstdint> #include <cstdint>
#include <cstdio> #include <cstdio>
namespace zeus namespace zeus {
{ class CTransform {
class alignas(16) CTransform
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CTransform() : basis(false) {}
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero)
: basis(basis), origin(offset) {}
CTransform() : basis(false) {}
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero) : basis(basis), origin(offset) {}
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CTransform(const atVec4f* mtx) : basis(mtx[0], mtx[1], mtx[2]), origin(mtx[0].vec[3], mtx[1].vec[3], mtx[2].vec[3]) {}
void read34RowMajor(athena::io::IStreamReader& r) CTransform(const atVec4f* mtx)
{ : basis(mtx[0], mtx[1], mtx[2])
atVec4f r0 = r.readVec4fBig(); , origin(mtx[0].simd[3], mtx[1].simd[3], mtx[2].simd[3]) {}
atVec4f r1 = r.readVec4fBig();
atVec4f r2 = r.readVec4fBig(); void read34RowMajor(athena::io::IStreamReader& r) {
basis = CMatrix3f(r0, r1, r2); atVec4f r0 = r.readVec4fBig();
basis.transpose(); atVec4f r1 = r.readVec4fBig();
origin = CVector3f(r0.vec[3], r1.vec[3], r2.vec[3]); atVec4f r2 = r.readVec4fBig();
} basis = CMatrix3f(r0, r1, r2);
basis.transpose();
origin = CVector3f(r0.simd[3], r1.simd[3], r2.simd[3]);
}
#endif #endif
/* Column constructor */ /* Column constructor */
CTransform(const CVector3f& c0, const CVector3f& c1, const CVector3f& c2, const CVector3f& c3) CTransform(const CVector3f& c0, const CVector3f& c1, const CVector3f& c2, const CVector3f& c3)
: basis(c0, c1, c2), origin(c3) {} : basis(c0, c1, c2), origin(c3) {}
static inline CTransform Identity() { return CTransform(CMatrix3f::skIdentityMatrix3f); } static CTransform Identity() {
return CTransform(CMatrix3f::skIdentityMatrix3f);
}
inline bool operator ==(const CTransform& other) const bool operator==(const CTransform& other) const {
{ return origin == other.origin && basis == other.basis;
return origin == other.origin && basis == other.basis; }
}
inline CTransform operator*(const CTransform& rhs) const CTransform operator*(const CTransform& rhs) const {
{ return CTransform(basis * rhs.basis, origin + (basis * rhs.origin));
return CTransform(basis * rhs.basis, origin + (basis * rhs.origin)); }
}
inline CTransform inverse() const CTransform inverse() const {
{ CMatrix3f inv = basis.inverted();
CMatrix3f inv = basis.inverted(); return CTransform(inv, inv * -origin);
return CTransform(inv, inv * -origin); }
}
static inline CTransform Translate(const CVector3f& position) { return {CMatrix3f::skIdentityMatrix3f, position}; } static CTransform Translate(const CVector3f& position) {
return {CMatrix3f::skIdentityMatrix3f, position};
}
static inline CTransform Translate(float x, float y, float z) { return Translate({x, y, z}); } static CTransform Translate(float x, float y, float z) {
return Translate({x, y, z});
}
inline CTransform operator+(const CVector3f& other) { return CTransform(basis, origin + other); } CTransform operator+(const CVector3f& other) {
return CTransform(basis, origin + other);
}
inline CTransform& operator+=(const CVector3f& other) CTransform& operator+=(const CVector3f& other) {
{ origin += other;
origin += other; return *this;
return *this; }
}
inline CTransform operator-(const CVector3f& other) { return CTransform(basis, origin - other); } CTransform operator-(const CVector3f& other) {
return CTransform(basis, origin - other);
}
inline CTransform& operator-=(const CVector3f& other) CTransform& operator-=(const CVector3f& other) {
{ origin -= other;
origin -= other; return *this;
return *this; }
}
inline zeus::CVector3f rotate(const CVector3f& vec) const { return basis * vec; } zeus::CVector3f rotate(const CVector3f& vec) const {
return basis * vec;
}
static inline CTransform RotateX(float theta) static CTransform RotateX(float theta) {
{ float sinT = std::sin(theta);
float sinT = std::sin(theta); float cosT = std::cos(theta);
float cosT = std::cos(theta); return CTransform(CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
return CTransform(CMatrix3f(TVectorUnion{{1.f, 0.f, 0.f, 0.f}}, simd<float>{0.f, cosT, sinT, 0.f},
TVectorUnion{{0.f, cosT, sinT, 0.f}}, simd<float>{0.f, -sinT, cosT, 0.f}));
TVectorUnion{{0.f, -sinT, cosT, 0.f}})); }
}
static inline CTransform RotateY(float theta) static CTransform RotateY(float theta) {
{ float sinT = std::sin(theta);
float sinT = std::sin(theta); float cosT = std::cos(theta);
float cosT = std::cos(theta); return CTransform(CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
return CTransform(CMatrix3f(TVectorUnion{{cosT, 0.f, -sinT, 0.f}}, simd<float>{0.f, 1.f, 0.f, 0.f},
TVectorUnion{{0.f, 1.f, 0.f, 0.f}}, simd<float>{sinT, 0.f, cosT, 0.f}));
TVectorUnion{{sinT, 0.f, cosT, 0.f}})); }
}
static inline CTransform RotateZ(float theta) static CTransform RotateZ(float theta) {
{ float sinT = std::sin(theta);
float sinT = std::sin(theta); float cosT = std::cos(theta);
float cosT = std::cos(theta); return CTransform(CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
return CTransform(CMatrix3f(TVectorUnion{{cosT, sinT, 0.f, 0.f}}, simd<float>{-sinT, cosT, 0.f, 0.f},
TVectorUnion{{-sinT, cosT, 0.f, 0.f}}, simd<float>{0.f, 0.f, 1.f, 0.f}));
TVectorUnion{{0.f, 0.f, 1.f, 0.f}})); }
}
inline void rotateLocalX(float theta) void rotateLocalX(float theta) {
{ float sinT = std::sin(theta);
float sinT = std::sin(theta); float cosT = std::cos(theta);
float cosT = std::cos(theta);
zeus::CVector3f b2 = basis[2] * sinT; zeus::CVector3f b2 = basis[2] * sinT;
zeus::CVector3f b1 = basis[1] * sinT; zeus::CVector3f b1 = basis[1] * sinT;
zeus::CVector3f cosV(cosT); zeus::CVector3f cosV(cosT);
basis[1] *= cosV; basis[1] *= cosV;
basis[2] *= cosV; basis[2] *= cosV;
basis[1] += b2; basis[1] += b2;
basis[2] -= b1; basis[2] -= b1;
} }
inline void rotateLocalY(float theta) void rotateLocalY(float theta) {
{ float sinT = std::sin(theta);
float sinT = std::sin(theta); float cosT = std::cos(theta);
float cosT = std::cos(theta);
zeus::CVector3f b0 = basis[0] * sinT; zeus::CVector3f b0 = basis[0] * sinT;
zeus::CVector3f b2 = basis[2] * sinT; zeus::CVector3f b2 = basis[2] * sinT;
zeus::CVector3f cosV(cosT); zeus::CVector3f cosV(cosT);
basis[0] *= cosV; basis[0] *= cosV;
basis[2] *= cosV; basis[2] *= cosV;
basis[2] += b0; basis[2] += b0;
basis[0] -= b2; basis[0] -= b2;
} }
inline void rotateLocalZ(float theta) void rotateLocalZ(float theta) {
{ float sinT = std::sin(theta);
float sinT = std::sin(theta); float cosT = std::cos(theta);
float cosT = std::cos(theta);
zeus::CVector3f b0 = basis[0] * sinT; zeus::CVector3f b0 = basis[0] * sinT;
zeus::CVector3f b1 = basis[1] * sinT; zeus::CVector3f b1 = basis[1] * sinT;
zeus::CVector3f cosV(cosT); zeus::CVector3f cosV(cosT);
basis[0] *= cosV; basis[0] *= cosV;
basis[1] *= cosV; basis[1] *= cosV;
basis[0] += b1; basis[0] += b1;
basis[1] -= b0; basis[1] -= b0;
} }
inline CVector3f transposeRotate(const CVector3f& in) const CVector3f transposeRotate(const CVector3f& in) const {
{ return CVector3f(basis[0].dot(in), basis[1].dot(in), basis[2].dot(in));
return CVector3f(basis[0].dot(in), basis[1].dot(in), basis[2].dot(in)); }
}
inline void scaleBy(float factor) void scaleBy(float factor) {
{ CTransform xfrm(CMatrix3f(CVector3f(factor, factor, factor)));
CTransform xfrm(CMatrix3f(CVector3f(factor, factor, factor))); *this = *this * xfrm;
*this = *this * xfrm; }
}
static inline CTransform Scale(const CVector3f& factor) static CTransform Scale(const CVector3f& factor) {
{ return CTransform(CMatrix3f(simd<float>{factor.x(), 0.f, 0.f, 0.f},
return CTransform(CMatrix3f(TVectorUnion{{factor.x, 0.f, 0.f, 0.f}}, simd<float>{0.f, factor.y(), 0.f, 0.f},
TVectorUnion{{0.f, factor.y, 0.f, 0.f}}, simd<float>{0.f, 0.f, factor.z(), 0.f}));
TVectorUnion{{0.f, 0.f, factor.z, 0.f}})); }
}
static inline CTransform Scale(float x, float y, float z) static CTransform Scale(float x, float y, float z) {
{ return CTransform(CMatrix3f(simd<float>{x, 0.f, 0.f, 0.f},
return CTransform( simd<float>{0.f, y, 0.f, 0.f},
CMatrix3f(TVectorUnion{{x, 0.f, 0.f, 0.f}}, simd<float>{0.f, 0.f, z, 0.f}));
TVectorUnion{{0.f, y, 0.f, 0.f}}, }
TVectorUnion{{0.f, 0.f, z, 0.f}}));
}
static inline CTransform Scale(float factor) static CTransform Scale(float factor) {
{ return CTransform(CMatrix3f(simd<float>{factor, 0.f, 0.f, 0.f},
return CTransform(CMatrix3f(TVectorUnion{{factor, 0.f, 0.f, 0.f}}, simd<float>{0.f, factor, 0.f, 0.f},
TVectorUnion{{0.f, factor, 0.f, 0.f}}, simd<float>{0.f, 0.f, factor, 0.f}));
TVectorUnion{{0.f, 0.f, factor, 0.f}})); }
}
inline CTransform multiplyIgnoreTranslation(const CTransform& xfrm) const CTransform multiplyIgnoreTranslation(const CTransform& xfrm) const {
{ CTransform ret;
CTransform ret; ret.basis = basis * xfrm.basis;
ret.basis = basis * xfrm.basis; return ret;
return ret; }
}
inline CTransform getRotation() const CTransform getRotation() const {
{ CTransform ret = *this;
CTransform ret = *this; ret.origin.zeroOut();
ret.origin.zeroOut(); return ret;
return ret; }
}
void setRotation(const CMatrix3f& mat) { basis = mat; }
void setRotation(const CTransform& xfrm) { setRotation(xfrm.basis); }
/** void setRotation(const CMatrix3f& mat) {
* @brief buildMatrix3f Returns the stored matrix basis = mat;
* buildMatrix3f is here for compliance with Retro's Math API }
* @return The Matrix (Neo, you are the one)
*/
inline const CMatrix3f& buildMatrix3f() const { return basis; }
inline CVector3f operator*(const CVector3f& other) const { return origin + basis * other; } void setRotation(const CTransform& xfrm) {
setRotation(xfrm.basis);
}
inline CMatrix4f toMatrix4f() const /**
{ * @brief buildMatrix3f Returns the stored matrix
CMatrix4f ret(basis[0], basis[1], basis[2], origin); * buildMatrix3f is here for compliance with Retro's Math API
ret[0][3] = 0.0f; * @return The Matrix (Neo, you are the one)
ret[1][3] = 0.0f; */
ret[2][3] = 0.0f; const CMatrix3f& buildMatrix3f() const {
ret[3][3] = 1.0f; return basis;
return ret; }
}
inline CVector3f upVector() const CVector3f operator*(const CVector3f& other) const {
{ return origin + basis * other;
return basis.vec[2]; }
}
inline CVector3f frontVector() const CMatrix4f toMatrix4f() const {
{ CMatrix4f ret(basis[0], basis[1], basis[2], origin);
return basis.vec[1]; ret[0][3] = 0.0f;
} ret[1][3] = 0.0f;
ret[2][3] = 0.0f;
ret[3][3] = 1.0f;
return ret;
}
inline CVector3f rightVector() const CVector3f upVector() const {
{ return basis.m[2];
return basis.vec[0]; }
}
inline void orthonormalize() CVector3f frontVector() const {
{ return basis.m[1];
basis[0].normalize(); }
basis[2] = basis[0].cross(basis[1]);
basis[2].normalize();
basis[1] = basis[2].cross(basis[0]);
}
void printMatrix() const CVector3f rightVector() const {
{ return basis.m[0];
printf("%f %f %f %f\n" }
"%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n",
basis[0][0], basis[1][0], basis[2][0], origin[0],
basis[0][1], basis[1][1], basis[2][1], origin[1],
basis[0][2], basis[1][2], basis[2][2], origin[2],
0.f, 0.f, 0.f, 1.f);
}
static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec) void orthonormalize() {
{ basis[0].normalize();
uint32_t i; basis[2] = basis[0].cross(basis[1]);
if (uVec.y < uVec.x || uVec.z < uVec.y || uVec.z < uVec.x) basis[2].normalize();
i = 2; basis[1] = basis[2].cross(basis[0]);
else }
i = 1;
CVector3f v = CVector3f::skZero; void printMatrix() const {
v[i] = 1.f; printf("%f %f %f %f\n"
CUnitVector3f newUVec(uVec.cross(v)); "%f %f %f %f\n"
return {newUVec, uVec, uVec.cross(newUVec), CVector3f::skZero}; "%f %f %f %f\n"
} "%f %f %f %f\n",
basis[0][0], basis[1][0], basis[2][0], origin[0],
basis[0][1], basis[1][1], basis[2][1], origin[1],
basis[0][2], basis[1][2], basis[2][2], origin[2],
0.f, 0.f, 0.f, 1.f);
}
CMatrix3f basis; static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec) {
CVector3f origin; uint32_t i;
if (uVec.y() < uVec.x() || uVec.z() < uVec.y() || uVec.z() < uVec.x())
i = 2;
else
i = 1;
CVector3f v = CVector3f::skZero;
v[i] = 1.f;
CUnitVector3f newUVec(uVec.cross(v));
return {newUVec, uVec, uVec.cross(newUVec), CVector3f::skZero};
}
CMatrix3f basis;
CVector3f origin;
}; };
static inline CTransform CTransformFromScaleVector(const CVector3f& scale) { return CTransform(CMatrix3f(scale)); } static inline CTransform CTransformFromScaleVector(const CVector3f& scale) {
return CTransform(CMatrix3f(scale));
}
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec); CTransform CTransformFromEditorEuler(const CVector3f& eulerVec);
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin); CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin);
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle); CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle);
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up = CVector3f::skUp); CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up = CVector3f::skUp);
} }

View File

@ -2,23 +2,19 @@
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{ class CUnitVector3f : public CVector3f {
class alignas(16) CUnitVector3f : public CVector3f
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); CUnitVector3f() : CVector3f(0.f, 1.f, 0.f) {}
CUnitVector3f() : CVector3f(0, 1, 0) {} CUnitVector3f(float x, float y, float z, bool doNormalize = true) : CVector3f(x, y, z) {
CUnitVector3f(float x, float y, float z, bool doNormalize = true) : CVector3f(x, y, z) if (doNormalize && canBeNormalized())
{ normalize();
if (doNormalize && canBeNormalized()) }
normalize();
} CUnitVector3f(const CVector3f& vec, bool doNormalize = true) : CVector3f(vec) {
CUnitVector3f(const CVector3f& vec, bool doNormalize = true) : CVector3f(vec) if (doNormalize && canBeNormalized())
{ normalize();
if (doNormalize && canBeNormalized()) }
normalize();
}
}; };
} }

View File

@ -2,431 +2,260 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include <cassert> #include <cassert>
namespace zeus namespace zeus {
{ class CVector2f {
class alignas(16) CVector2f
{
#if __atdna__
float clangVec __attribute__((__vector_size__(8)));
#endif
public: public:
// ZE_DECLARE_ALIGNED_ALLOCATOR(); simd<float> mSimd;
union { CVector2f() : mSimd(0.f) {}
struct
{ template <typename T>
float x, y; CVector2f(const simd<T>& s) : mSimd(s) {}
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
inline CVector2f() { zeroOut(); }
#if __SSE__
CVector2f(const __m128& mVec128) : mVec128(mVec128)
{
v[2] = 0.0f;
v[3] = 0.0f;
}
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CVector2f(const atVec2f& vec)
#if __SSE__ CVector2f(const atVec2f& vec) : mSimd(vec.simd) {}
: mVec128(vec.mVec128)
{ operator atVec2f&() {
} return *reinterpret_cast<atVec2f*>(this);
#else }
{
x = vec.vec[0], y = vec.vec[1], v[2] = 0.0f, v[3] = 0.0f; operator const atVec2f&() const {
} return *reinterpret_cast<const atVec2f*>(this);
}
void readBig(athena::io::IStreamReader& input) {
mSimd[0] = input.readFloatBig();
mSimd[1] = input.readFloatBig();
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
static CVector2f ReadBig(athena::io::IStreamReader& input) {
CVector2f ret;
ret.readBig(input);
return ret;
}
#endif #endif
operator atVec2f&() explicit CVector2f(float xy) { splat(xy); }
{
return *reinterpret_cast<atVec2f*>(v);
}
operator const atVec2f&() const
{
return *reinterpret_cast<const atVec2f*>(v);
}
void readBig(athena::io::IStreamReader& input) void assign(float x, float y) {
{ mSimd[0] = x;
x = input.readFloatBig(); mSimd[1] = y;
y = input.readFloatBig(); mSimd[2] = 0.0f;
v[2] = 0.0f; mSimd[3] = 0.0f;
v[3] = 0.0f; }
}
static CVector2f ReadBig(athena::io::IStreamReader& input) CVector2f(float x, float y) { assign(x, y); }
{
CVector2f ret;
ret.readBig(input);
return ret;
}
#endif
explicit CVector2f(float xy) { splat(xy); } bool operator==(const CVector2f& rhs) const {
void assign(float x, float y) return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1];
{ }
v[0] = x;
v[1] = y;
v[2] = 0.0f;
v[3] = 0.0f;
}
CVector2f(float x, float y) { assign(x, y); }
inline bool operator==(const CVector2f& rhs) const { return (x == rhs.x && y == rhs.y); } bool operator!=(const CVector2f& rhs) const {
inline bool operator!=(const CVector2f& rhs) const { return !(*this == rhs); } return mSimd[0] != rhs.mSimd[0] || mSimd[1] != rhs.mSimd[1];
inline bool operator<(const CVector2f& rhs) const }
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x < rhs.x || y < rhs.y);
#endif
}
inline bool operator<=(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x <= rhs.x || y <= rhs.y);
#endif
}
inline bool operator>(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x > rhs.x || y > rhs.y);
#endif
}
inline bool operator>=(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x >= rhs.x || y >= rhs.y);
#endif
}
inline CVector2f operator+(const CVector2f& rhs) const bool operator<(const CVector2f& rhs) const {
{ return mSimd[0] < rhs.mSimd[0] && mSimd[1] < rhs.mSimd[1];
#if __SSE__ }
return CVector2f(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x + rhs.x, y + rhs.y);
#endif
}
inline CVector2f operator-(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x - rhs.x, y - rhs.y);
#endif
}
inline CVector2f operator-() const
{
#if __SSE__
return CVector2f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#else
return CVector2f(-x, -y);
#endif
}
inline CVector2f operator*(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x * rhs.x, y * rhs.y);
#endif
}
inline CVector2f operator/(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x / rhs.x, y / rhs.y);
#endif
}
inline CVector2f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector2f(x + val, y + val);
#endif
}
inline CVector2f operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CVector2f(x - val, y - val);
#endif
}
inline CVector2f operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x * val, y * val);
#endif
}
inline CVector2f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__
TVectorUnion splat = {{ooval, ooval, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x * ooval, y * ooval);
#endif
}
inline const CVector2f& operator+=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
#endif
return *this;
}
inline const CVector2f& operator-=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
#endif
return *this;
}
inline const CVector2f& operator*=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
#endif
return *this;
}
inline const CVector2f& operator/=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
#endif
return *this;
}
inline const CVector2f& operator+=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_add_ps(mVec128, splat.mVec128);
#else
x += rhs;
y += rhs;
#endif
return *this;
}
inline const CVector2f& operator-=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_sub_ps(mVec128, splat.mVec128);
#else
x -= rhs;
y -= rhs;
#endif
return *this;
}
inline const CVector2f& operator*=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x *= rhs;
y *= rhs;
#endif
return *this;
}
inline const CVector2f& operator/=(float rhs)
{
float oorhs = 1.f / rhs;
#if __SSE__
TVectorUnion splat = {{oorhs, oorhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x *= oorhs;
y *= oorhs;
#endif
return *this;
}
inline void normalize()
{
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector2f(mag);
}
inline CVector2f normalized() const bool operator<=(const CVector2f& rhs) const {
{ return mSimd[0] <= rhs.mSimd[0] && mSimd[1] <= rhs.mSimd[1];
float mag = magnitude(); }
mag = 1.f / mag;
return *this * mag;
}
inline CVector2f perpendicularVector() const { return {-y, x}; } bool operator>(const CVector2f& rhs) const {
return mSimd[0] > rhs.mSimd[0] && mSimd[1] > rhs.mSimd[1];
}
inline float cross(const CVector2f& rhs) const { return (x * rhs.y) - (y * rhs.x); } bool operator>=(const CVector2f& rhs) const {
inline float dot(const CVector2f& rhs) const return mSimd[0] >= rhs.mSimd[0] && mSimd[1] >= rhs.mSimd[1];
{ }
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1];
#endif
#else
return (x * rhs.x) + (y * rhs.y);
#endif
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1];
#endif
#else
return x * x + y * y;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
inline void zeroOut() CVector2f operator+(const CVector2f& rhs) const {
{ return mSimd + rhs.mSimd;
*this = CVector2f::skZero; }
}
inline void splat(float xy) CVector2f operator-(const CVector2f& rhs) const {
{ return mSimd - rhs.mSimd;
#if __SSE__ }
TVectorUnion splat = {{xy, xy, 0.0f, 0.0f}};
mVec128 = splat.mVec128;
#else
v[0] = xy;
v[1] = xy;
v[2] = 0.0f;
v[3] = 0.0f;
#endif
}
static float getAngleDiff(const CVector2f& a, const CVector2f& b); CVector2f operator-() const {
return -mSimd;
}
static inline CVector2f lerp(const CVector2f& a, const CVector2f& b, float t) { return (a + (b - a) * t); } CVector2f operator*(const CVector2f& rhs) const {
static inline CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) { return lerp(a, b, t).normalized(); } return mSimd * rhs.mSimd;
static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t); }
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; } CVector2f operator/(const CVector2f& rhs) const {
return mSimd / rhs.mSimd;
}
inline bool canBeNormalized() const CVector2f operator+(float val) const {
{ return mSimd + simd<float>(val);
if (std::isinf(x) || std::isinf(y)) }
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON;
}
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; } CVector2f operator-(float val) const {
return mSimd - simd<float>(val);
}
inline bool isEqu(const CVector2f& other, float epsilon = 1.1920929e-7f) CVector2f operator*(float val) const {
{ return mSimd * simd<float>(val);
const CVector2f diffVec = other - *this; }
return (diffVec.x <= epsilon && diffVec.y <= epsilon);
}
inline float& operator[](size_t idx) { assert(idx < 2); return (&x)[idx]; } CVector2f operator/(float val) const {
inline const float& operator[](size_t idx) const { assert(idx < 2); return (&x)[idx]; } float ooval = 1.f / val;
return mSimd * simd<float>(ooval);
}
static const CVector2f skOne; const CVector2f& operator+=(const CVector2f& rhs) {
static const CVector2f skNegOne; mSimd += rhs.mSimd;
static const CVector2f skZero; return *this;
}
const CVector2f& operator-=(const CVector2f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CVector2f& operator*=(const CVector2f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CVector2f& operator/=(const CVector2f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
const CVector2f& operator+=(float rhs) {
mSimd += simd<float>(rhs);
return *this;
}
const CVector2f& operator-=(float rhs) {
mSimd -= simd<float>(rhs);
return *this;
}
const CVector2f& operator*=(float rhs) {
mSimd *= simd<float>(rhs);
return *this;
}
const CVector2f& operator/=(float rhs) {
float oorhs = 1.f / rhs;
mSimd /= simd<float>(oorhs);
return *this;
}
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector2f(mag);
}
CVector2f normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
CVector2f perpendicularVector() const { return {-y(), x()}; }
float cross(const CVector2f& rhs) const { return (x() * rhs.y()) - (y() * rhs.x()); }
float dot(const CVector2f& rhs) const {
return mSimd.dot2(rhs.mSimd);
}
float magSquared() const {
return mSimd.dot2(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
void zeroOut() {
*this = CVector2f::skZero;
}
void splat(float xy) {
mSimd = zeus::simd<float>(xy);
}
static float getAngleDiff(const CVector2f& a, const CVector2f& b);
static CVector2f lerp(const CVector2f& a, const CVector2f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) {
return lerp(a, b, t).normalized();
}
static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON;
}
bool isZero() const {
return magSquared() <= FLT_EPSILON;
}
bool isEqu(const CVector2f& other, float epsilon = FLT_EPSILON) {
const CVector2f diffVec = other - *this;
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon);
}
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 2);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 2);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
static const CVector2f skOne;
static const CVector2f skNegOne;
static const CVector2f skZero;
}; };
static inline CVector2f operator+(float lhs, const CVector2f& rhs) static inline CVector2f operator+(float lhs, const CVector2f& rhs) {
{ return zeus::simd<float>(lhs) + rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs + rhs.x, lhs + rhs.y);
#endif
} }
static inline CVector2f operator-(float lhs, const CVector2f& rhs) static inline CVector2f operator-(float lhs, const CVector2f& rhs) {
{ return zeus::simd<float>(lhs) - rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs - rhs.x, lhs - rhs.y);
#endif
} }
static inline CVector2f operator*(float lhs, const CVector2f& rhs) static inline CVector2f operator*(float lhs, const CVector2f& rhs) {
{ return zeus::simd<float>(lhs) * rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs * rhs.x, lhs * rhs.y);
#endif
} }
static inline CVector2f operator/(float lhs, const CVector2f& rhs) static inline CVector2f operator/(float lhs, const CVector2f& rhs) {
{ return zeus::simd<float>(lhs) / rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs / rhs.x, lhs / rhs.y);
#endif
} }
} }

View File

@ -5,56 +5,57 @@
#include "CVector2f.hpp" #include "CVector2f.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp> #include <athena/IStreamReader.hpp>
#endif #endif
namespace zeus namespace zeus {
{
class CVector2i class CVector2i {
{
public: public:
union { union {
struct struct {
{ int x, y;
int x, y;
};
int v[2];
}; };
CVector2i() = default; int v[2];
CVector2i(int xin, int yin) : x(xin), y(yin) {} };
CVector2i(const CVector2f& vec) : x(int(vec.x)), y(int(vec.y)) {}
CVector2f toVec2f() const { return CVector2f(x, y); } CVector2i() = default;
inline CVector2i operator+(const CVector2i& val) const CVector2i(int xin, int yin) : x(xin), y(yin) {}
{
return CVector2i(x + val.x, y + val.y); CVector2i(const CVector2f& vec) : x(int(vec.x())), y(int(vec.y())) {}
}
inline CVector2i operator-(const CVector2i& val) const CVector2f toVec2f() const { return CVector2f(x, y); }
{
return CVector2i(x - val.x, y - val.y); CVector2i operator+(const CVector2i& val) const {
} return CVector2i(x + val.x, y + val.y);
inline CVector2i operator*(const CVector2i& val) const }
{
return CVector2i(x * val.x, y * val.y); CVector2i operator-(const CVector2i& val) const {
} return CVector2i(x - val.x, y - val.y);
inline CVector2i operator/(const CVector2i& val) const }
{
return CVector2i(x / val.x, y / val.y); CVector2i operator*(const CVector2i& val) const {
} return CVector2i(x * val.x, y * val.y);
inline bool operator==(const CVector2i& other) const }
{
return x == other.x && y == other.y; CVector2i operator/(const CVector2i& val) const {
} return CVector2i(x / val.x, y / val.y);
inline bool operator!=(const CVector2i& other) const }
{
return x != other.x || y != other.y; bool operator==(const CVector2i& other) const {
} return x == other.x && y == other.y;
inline CVector2i operator*(int val) const }
{
return CVector2i(x * val, y * val); bool operator!=(const CVector2i& other) const {
} return x != other.x || y != other.y;
}
CVector2i operator*(int val) const {
return CVector2i(x * val, y * val);
}
}; };
} }

View File

@ -1,288 +1,118 @@
#pragma once #pragma once
#include <athena/Types.hpp> #include "athena/Types.hpp"
#include "Global.hpp" #include "Global.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
namespace zeus namespace zeus {
{
class alignas(32) CVector3d class CVector3d {
{
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR32(); zeus::simd<double> mSimd;
CVector3d() { zeroOut(); } CVector3d() : mSimd(0.0) {}
template <typename T>
CVector3d(const simd<T>& s) : mSimd(s) {}
#if __AVX__
CVector3d(const __m256d& mVec256)
{
this->mVec256 = mVec256;
v[3] = 0.0;
}
#elif __SSE__
CVector3d(const __m128d mVec128[2])
{
this->mVec128[0] = mVec128[0];
this->mVec128[1] = mVec128[1];
v[3] = 0.0;
}
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CVector3d(const atVec3d& vec) CVector3d(const atVec3d& vec) : mSimd(vec.simd) {}
{
#if __AVX__
mVec256 = vec.mVec256;
#elif __SSE__
mVec128[0] = vec.mVec128[0];
mVec128[1] = vec.mVec128[1];
#else
x = v[0], y = v[1], z = v[2], v[3] = 0.0f;
#endif
}
#endif #endif
explicit CVector3d(double xyz) { splat(xyz); } explicit CVector3d(double xyz) : mSimd(xyz) {}
CVector3d(const CVector3f& vec) CVector3d(const CVector3f& vec) : mSimd(vec.mSimd) {}
{
#if __AVX__
mVec256 = _mm256_cvtps_pd(vec.mVec128);
#elif __SSE__
mVec128[0] = _mm_cvtps_pd(vec.mVec128);
v[2] = vec[2];
#else
v[0] = vec[0];
v[1] = vec[1];
v[2] = vec[2];
v[3] = 0.0;
#endif
}
CVector3d(double x, double y, double z) CVector3d(double x, double y, double z) : mSimd(x, y, z) {}
{
#if __AVX__
TDblVectorUnion splat{{x, y, z, 0.0}};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat{{x, y, z, 0.0}};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
#else
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = 0.0;
#endif
}
CVector3f asCVector3f() CVector3f asCVector3f() {
{ return mSimd;
#if __AVX__ }
return CVector3f(_mm256_cvtpd_ps(mVec256));
#else
return CVector3f(float(x), float(y), float(z));
#endif
}
double magSquared() const double magSquared() const {
{ return mSimd.dot3(mSimd);
#if __SSE__ }
TDblVectorUnion result;
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
return result.v[0] + (v[2] * v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
}
double magnitude() const { return sqrt(magSquared()); } double magnitude() const {
inline CVector3d cross(const CVector3d& rhs) const return sqrt(magSquared());
{ }
return {y * rhs.z - z * rhs.y,
z * rhs.x - x * rhs.z,
x * rhs.y - y * rhs.x};
}
double dot(const CVector3d& rhs) const CVector3d cross(const CVector3d& rhs) const {
{ return {y() * rhs.z() - z() * rhs.y(),
#if __SSE__ z() * rhs.x() - x() * rhs.z(),
TDblVectorUnion result; x() * rhs.y() - y() * rhs.x()};
#if __SSE4_1__ }
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
return result.v[0] + (v[2] * rhs.v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
}
CVector3d asNormalized() double dot(const CVector3d& rhs) const {
{ return mSimd.dot3(rhs.mSimd);
double mag = magnitude(); }
mag = 1.0 / mag;
return {x * mag, y * mag, z * mag};
}
void splat(double xyz) CVector3d asNormalized() {
{ double mag = magnitude();
#if __AVX__ mag = 1.0 / mag;
TDblVectorUnion splat = {{xyz, xyz, xyz, 0.0}}; return mSimd * zeus::simd<double>(mag);
mVec256 = splat.mVec256; }
#elif __SSE__
TDblVectorUnion splat = {{xyz, xyz, xyz, 0.0}};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = 0.0;
#endif
}
void zeroOut() void splat(double xyz) {
{ mSimd = zeus::simd<double>(xyz);
*this = skZero; }
}
inline CVector3d operator+(const CVector3d& rhs) const void zeroOut() {
{ *this = skZero;
#if __AVX__ }
return _mm256_add_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]),
_mm_add_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x + rhs.x, y + rhs.y, z + rhs.z);
#endif
}
inline CVector3d operator-(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_sub_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_sub_pd(mVec128[0], rhs.mVec128[0]),
_mm_sub_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x - rhs.x, y - rhs.y, z - rhs.z);
#endif
}
inline CVector3d operator*(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_mul_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_mul_pd(mVec128[0], rhs.mVec128[0]),
_mm_mul_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x * rhs.x, y * rhs.y, z * rhs.z);
#endif
}
inline CVector3d operator/(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_div_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_div_pd(mVec128[0], rhs.mVec128[0]),
_mm_div_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x / rhs.x, y / rhs.y, z / rhs.z);
#endif
}
inline double& operator[](size_t idx) { assert(idx < 3); return v[idx]; } CVector3d operator+(const CVector3d& rhs) const {
inline const double& operator[](size_t idx) const { assert(idx < 3); return v[idx]; } return mSimd + rhs.mSimd;
}
union { CVector3d operator-(const CVector3d& rhs) const {
struct return mSimd - rhs.mSimd;
{ }
double x, y, z;
};
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif
};
static const CVector3d skZero; CVector3d operator*(const CVector3d& rhs) const {
return mSimd * rhs.mSimd;
}
CVector3d operator/(const CVector3d& rhs) const {
return mSimd / rhs.mSimd;
}
zeus::simd<double>::reference operator[](size_t idx) {
assert(idx < 3);
return mSimd[idx];
}
double operator[](size_t idx) const {
assert(idx < 3);
return mSimd[idx];
}
double x() const { return mSimd[0]; }
double y() const { return mSimd[1]; }
double z() const { return mSimd[2]; }
simd<double>::reference x() { return mSimd[0]; }
simd<double>::reference y() { return mSimd[1]; }
simd<double>::reference z() { return mSimd[2]; }
static const CVector3d skZero;
}; };
static inline CVector3d operator+(double lhs, const CVector3d& rhs) static inline CVector3d operator+(double lhs, const CVector3d& rhs) {
{ return zeus::simd<double>(lhs) + rhs.mSimd;
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_add_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_add_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_add_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs + rhs.x, lhs + rhs.y, lhs + rhs.z};
#endif
} }
static inline CVector3d operator-(double lhs, const CVector3d& rhs) static inline CVector3d operator-(double lhs, const CVector3d& rhs) {
{ return zeus::simd<double>(lhs) - rhs.mSimd;
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_sub_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_sub_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_sub_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs - rhs.x, lhs - rhs.y, lhs - rhs.z};
#endif
} }
static inline CVector3d operator*(double lhs, const CVector3d& rhs) static inline CVector3d operator*(double lhs, const CVector3d& rhs) {
{ return zeus::simd<double>(lhs) * rhs.mSimd;
#if __AVX__ }
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_mul_pd(splat.mVec256, rhs.mVec256); static inline CVector3d operator/(double lhs, const CVector3d& rhs) {
#elif __SSE__ return zeus::simd<double>(lhs) / rhs.mSimd;
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_mul_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_mul_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs * rhs.x, lhs * rhs.y, lhs * rhs.z};
#endif
} }
static inline CVector3d operator/(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_div_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_div_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_div_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs.x / rhs.x, lhs.y / rhs.y, lhs.z / rhs.z};
#endif
}
} }

View File

@ -3,440 +3,278 @@
#include "Global.hpp" #include "Global.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include "zeus/CVector2f.hpp" #include "zeus/CVector2f.hpp"
#include "TVectorUnion.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp> #include "athena/IStreamReader.hpp"
#endif #endif
namespace zeus namespace zeus {
{
class CVector3d; class CVector3d;
class alignas(16) CVector3f
{ class CVector3f {
#if __atdna__
float clangVec __attribute__((__vector_size__(12)));
#endif
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); zeus::simd<float> mSimd;
CVector3f() : mSimd(0.f) {}
union { template <typename T>
struct CVector3f(const simd<T>& s) : mSimd(s) {}
{
float x, y, z;
};
float v[4];
#if __SSE__
__m128 mVec128;
#elif __GEKKO_PS__
ps128_t mVec128;
#endif
};
inline CVector3f() { zeroOut(); }
#if __SSE__ || __GEKKO_PS__
CVector3f(const __m128& mVec128) : mVec128(mVec128) { v[3] = 0.0f; }
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CVector3f(const atVec3f& vec)
#if __SSE__ || __GEKKO_PS__ CVector3f(const atVec3f& vec) : mSimd(vec.simd) {}
: mVec128(vec.mVec128)
{ operator atVec3f&() {
} return *reinterpret_cast<atVec3f*>(this);
#else }
{
x = vec.vec[0], y = vec.vec[1], z = vec.vec[2], v[3] = 0.0f; operator const atVec3f&() const {
} return *reinterpret_cast<const atVec3f*>(this);
}
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = 0.0f;
mSimd.copy_from(f);
}
static CVector3f ReadBig(athena::io::IStreamReader& input) {
CVector3f ret;
ret.readBig(input);
return ret;
}
#endif #endif
operator atVec3f&() CVector3f(const CVector3d& vec);
{
return *reinterpret_cast<atVec3f*>(v); explicit CVector3f(float xyz) : mSimd(xyz) {}
}
operator const atVec3f&() const void assign(float x, float y, float z) {
{ mSimd = zeus::simd<float>(x, y, z);
return *reinterpret_cast<const atVec3f*>(v); }
CVector3f(float x, float y, float z) : mSimd(x, y, z) {}
CVector3f(const float* floats) : mSimd(floats[0], floats[1], floats[2]) {}
CVector3f(const CVector2f& other) {
mSimd = other.mSimd;
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
CVector2f toVec2f() const {
return CVector2f(mSimd);
}
bool operator==(const CVector3f& rhs) const {
return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1] && mSimd[2] == rhs.mSimd[2];
}
bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); }
CVector3f operator+(const CVector3f& rhs) const {
return mSimd + rhs.mSimd;
}
CVector3f operator-(const CVector3f& rhs) const {
return mSimd - rhs.mSimd;
}
CVector3f operator-() const {
return -mSimd;
}
CVector3f operator*(const CVector3f& rhs) const {
return mSimd * rhs.mSimd;
}
CVector3f operator/(const CVector3f& rhs) const {
return mSimd / rhs.mSimd;
}
CVector3f operator+(float val) const {
return mSimd + zeus::simd<float>(val);
}
CVector3f operator-(float val) const {
return mSimd - zeus::simd<float>(val);
}
CVector3f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
}
CVector3f operator/(float val) const {
float ooval = 1.f / val;
return mSimd * zeus::simd<float>(ooval);
}
const CVector3f& operator+=(const CVector3f& rhs) {
mSimd += rhs.mSimd;
return *this;
}
const CVector3f& operator-=(const CVector3f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CVector3f& operator*=(const CVector3f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CVector3f& operator/=(const CVector3f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
void normalize() {
float mag = 1.f / magnitude();
*this *= CVector3f(mag);
}
CVector3f normalized() const {
float mag = 1.f / magnitude();
return *this * mag;
}
CVector3f cross(const CVector3f& rhs) const {
return CVector3f(y() * rhs.z() - z() * rhs.y(),
z() * rhs.x() - x() * rhs.z(),
x() * rhs.y() - y() * rhs.x());
}
float dot(const CVector3f& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
float magSquared() const {
return mSimd.dot3(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
bool isNotInf() const {
return !(std::isinf(x()) || std::isinf(y()) || std::isinf(z()));
}
bool isMagnitudeSafe() const {
return isNotInf() && magSquared() >= 9.9999994e-29;
}
void zeroOut() {
*this = CVector3f::skZero;
}
void splat(float xyz) {
mSimd = zeus::simd<float>(xyz);
}
static float getAngleDiff(const CVector3f& a, const CVector3f& b);
static CVector3f lerp(const CVector3f& a, const CVector3f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) {
return lerp(a, b, t).normalized();
}
static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON || std::fabs(z()) >= FLT_EPSILON;
}
bool isZero() const {
return magSquared() <= FLT_EPSILON;
}
void scaleToLength(float newLength) {
float length = magSquared();
if (length < FLT_EPSILON) {
mSimd[0] = newLength, mSimd[1] = 0.f, mSimd[2] = 0.f;
return;
} }
void readBig(athena::io::IStreamReader& input) length = std::sqrt(length);
{ float scalar = newLength / length;
x = input.readFloatBig(); *this *= CVector3f(scalar);
y = input.readFloatBig(); }
z = input.readFloatBig();
v[3] = 0.0f;
}
static CVector3f ReadBig(athena::io::IStreamReader& input) CVector3f scaledToLength(float newLength) const {
{ CVector3f v = *this;
CVector3f ret; v.scaleToLength(newLength);
ret.readBig(input); return v;
return ret; }
}
#endif
CVector3f(const CVector3d& vec); bool isEqu(const CVector3f& other, float epsilon = FLT_EPSILON) {
const CVector3f diffVec = other - *this;
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon && diffVec.z() <= epsilon);
}
explicit CVector3f(float xyz) { splat(xyz); } zeus::simd<float>::reference operator[](size_t idx) {
void assign(float x, float y, float z) assert(idx < 3);
{ return mSimd[idx];
v[0] = x; }
v[1] = y;
v[2] = z;
v[3] = 0.0f;
}
CVector3f(float x, float y, float z) { assign(x, y, z); }
CVector3f(const float* floats) float operator[](size_t idx) const {
{ assert(idx < 3);
#if __SSE__ return mSimd[idx];
mVec128 = _mm_loadu_ps(floats); }
#else
x = floats[0];
y = floats[1];
z = floats[2];
#endif
v[3] = 0.0f;
}
CVector3f(const CVector2f& other) float x() const { return mSimd[0]; }
{ float y() const { return mSimd[1]; }
x = other.x; float z() const { return mSimd[2]; }
y = other.y;
z = 0.0f;
v[3] = 0.0f;
}
inline CVector2f toVec2f() const simd<float>::reference x() { return mSimd[0]; }
{ simd<float>::reference y() { return mSimd[1]; }
#if __SSE__ simd<float>::reference z() { return mSimd[2]; }
return CVector2f(mVec128);
#else
return CVector2f(x, y);
#endif
}
inline bool operator==(const CVector3f& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); } static const CVector3f skOne;
inline bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); } static const CVector3f skNegOne;
inline CVector3f operator+(const CVector3f& rhs) const static const CVector3f skZero;
{ static const CVector3f skForward;
#if __SSE__ static const CVector3f skBack;
return CVector3f(_mm_add_ps(mVec128, rhs.mVec128)); static const CVector3f skLeft;
#elif __GEKKO_PS__ static const CVector3f skRight;
return CVector3f(__mm_gekko_add_ps(mVec128, rhs.mVec128)); static const CVector3f skUp;
#else static const CVector3f skDown;
return CVector3f(x + rhs.x, y + rhs.y, z + rhs.z); static const CVector3f skRadToDegVec;
#endif static const CVector3f skDegToRadVec;
}
inline CVector3f operator-(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x - rhs.x, y - rhs.y, z - rhs.z);
#endif
}
inline CVector3f operator-() const
{
#if __SSE__
return CVector3f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_neg_ps(mVec128));
#else
return CVector3f(-x, -y, -z);
#endif
}
inline CVector3f operator*(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x * rhs.x, y * rhs.y, z * rhs.z);
#endif
}
inline CVector3f operator/(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x / rhs.x, y / rhs.y, z / rhs.z);
#endif
}
inline CVector3f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, 0.0f}};
return CVector3f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector3f(x + val, y + val, z + val);
#endif
}
inline CVector3f operator-(float val) const
{
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{val, val, val, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_sub_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_sub_ps(mVec128, splat.mVec128));
#else
return CVector3f(x - val, y - val, z - val);
#endif
}
inline CVector3f operator*(float val) const
{
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{val, val, val, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x * val, y * val, z * val);
#endif
}
inline CVector3f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{ooval, ooval, ooval, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x * ooval, y * ooval, z * ooval);
#endif
}
inline const CVector3f& operator+=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#elif __GEKKO_PS__
mVec128 = _mm_gekko_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
z += rhs.z;
#endif
return *this;
}
inline const CVector3f& operator-=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
z -= rhs.z;
#endif
return *this;
}
inline const CVector3f& operator*=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
#endif
return *this;
}
inline const CVector3f& operator/=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
z /= rhs.z;
#endif
return *this;
}
inline void normalize() static CVector3f radToDeg(const CVector3f& rad) { return rad * skRadToDegVec; }
{
float mag = 1.f / magnitude();
*this *= CVector3f(mag);
}
inline CVector3f normalized() const
{
float mag = 1.f / magnitude();
return *this * mag;
}
inline CVector3f cross(const CVector3f& rhs) const
{
return CVector3f(y * rhs.z - z * rhs.y,
z * rhs.x - x * rhs.z,
x * rhs.y - y * rhs.x);
}
inline float dot(const CVector3f& rhs) const static CVector3f degToRad(const CVector3f& deg) { return deg * skDegToRadVec; }
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
inline bool isNotInf() const
{
return !(std::isinf(x) || std::isinf(y) || std::isinf(z));
}
inline bool isMagnitudeSafe() const
{
return isNotInf() && magSquared() >= 9.9999994e-29;
}
inline void zeroOut()
{
*this = CVector3f::skZero;
}
inline void splat(float xyz)
{
#if __SSE__
TVectorUnion splat = {{xyz, xyz, xyz, 0.0f}};
mVec128 = splat.mVec128;
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = 0.0f;
#endif
}
static float getAngleDiff(const CVector3f& a, const CVector3f& b);
static inline CVector3f lerp(const CVector3f& a, const CVector3f& b, float t) { return (a + (b - a) * t); }
static inline CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) { return lerp(a, b, t).normalized(); }
static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y) || std::isinf(z))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON;
}
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; }
inline void scaleToLength(float newLength)
{
float length = magSquared();
if (length < 1.1920929e-7f)
{
x = newLength, y = 0.f, z = 0.f;
return;
}
length = std::sqrt(length);
float scalar = newLength / length;
*this *= CVector3f(scalar);
}
inline CVector3f scaledToLength(float newLength) const
{
CVector3f v = *this;
v.scaleToLength(newLength);
return v;
}
inline bool isEqu(const CVector3f& other, float epsilon = 1.1920929e-7f)
{
const CVector3f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon && diffVec.z <= epsilon);
}
inline float& operator[](size_t idx) { assert(idx < 3); return (&x)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 3); return (&x)[idx]; }
static const CVector3f skOne;
static const CVector3f skNegOne;
static const CVector3f skZero;
static const CVector3f skForward;
static const CVector3f skBack;
static const CVector3f skLeft;
static const CVector3f skRight;
static const CVector3f skUp;
static const CVector3f skDown;
static const CVector3f skRadToDegVec;
static const CVector3f skDegToRadVec;
static CVector3f radToDeg(const CVector3f& rad) { return rad * skRadToDegVec; }
static CVector3f degToRad(const CVector3f& deg) { return deg * skDegToRadVec; }
}; };
static inline CVector3f operator+(float lhs, const CVector3f& rhs) static inline CVector3f operator+(float lhs, const CVector3f& rhs) {
{ return zeus::simd<float>(lhs) + rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z);
#endif
} }
static inline CVector3f operator-(float lhs, const CVector3f& rhs) static inline CVector3f operator-(float lhs, const CVector3f& rhs) {
{ return zeus::simd<float>(lhs) - rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z);
#endif
} }
static inline CVector3f operator*(float lhs, const CVector3f& rhs) static inline CVector3f operator*(float lhs, const CVector3f& rhs) {
{ return zeus::simd<float>(lhs) * rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
#endif
} }
static inline CVector3f operator/(float lhs, const CVector3f& rhs) static inline CVector3f operator/(float lhs, const CVector3f& rhs) {
{ return zeus::simd<float>(lhs) / rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z);
#endif
} }
} }

View File

@ -1,420 +1,261 @@
#pragma once #pragma once
#include "Global.hpp" #include "Global.hpp"
#include "TVectorUnion.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#include "athena/IStreamReader.hpp"
#endif #endif
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include <cfloat> #include <cfloat>
#include <cassert> #include <cassert>
namespace zeus namespace zeus {
{
class CColor; class CColor;
class alignas(16) CVector4f
{ class CVector4f {
#if __atdna__
float clangVec __attribute__((__vector_size__(16)));
#endif
public: public:
ZE_DECLARE_ALIGNED_ALLOCATOR(); zeus::simd<float> mSimd;
union {
struct CVector4f() : mSimd(0.f) {}
{
float x, y, z, w; template <typename T>
}; CVector4f(const simd<T>& s) : mSimd(s) {}
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
inline CVector4f() { zeroOut(); }
#if __SSE__
CVector4f(const __m128& mVec128) : mVec128(mVec128) {}
#endif
#if ZE_ATHENA_TYPES #if ZE_ATHENA_TYPES
CVector4f(const atVec4f& vec)
#if __SSE__ CVector4f(const atVec4f& vec) : mSimd(vec.simd) {}
: mVec128(vec.mVec128)
{ operator atVec4f&() {
} return *reinterpret_cast<atVec4f*>(this);
#else }
{
x = vec.vec[0], y = vec.vec[1], z = vec.vec[2], w = vec.vec[3]; operator const atVec4f&() const {
} return *reinterpret_cast<const atVec4f*>(this);
}
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = input.readFloatBig();
mSimd.copy_from(f);
}
#endif #endif
operator atVec4f&() explicit CVector4f(float xyzw) : mSimd(xyzw) {}
{
return *reinterpret_cast<atVec4f*>(v);
}
operator const atVec4f&() const
{
return *reinterpret_cast<const atVec4f*>(v);
}
void readBig(athena::io::IStreamReader& input) void assign(float x, float y, float z, float w) {
{ mSimd = simd<float>(x, y, z, w);
x = input.readFloatBig(); }
y = input.readFloatBig();
z = input.readFloatBig();
w = input.readFloatBig();
}
#endif
explicit CVector4f(float xyzw) { splat(xyzw); } CVector4f(float x, float y, float z, float w) : mSimd(x, y, z, w) {}
void assign(float x, float y, float z, float w)
{
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = w;
}
CVector4f(float x, float y, float z, float w) { assign(x, y, z, w); }
CVector4f(const CColor& other);
CVector4f(const CVector3f& other, float wIn = 1.f) CVector4f(const CColor& other);
{
#if __SSE__
mVec128 = other.mVec128;
#else
x = other.x;
y = other.y;
z = other.z;
#endif
w = wIn;
}
static CVector4f ToClip(const zeus::CVector3f& v, float w) CVector4f(const CVector3f& other, float wIn = 1.f) : mSimd(other.mSimd) {
{ mSimd[3] = wIn;
return CVector4f(v * w, w); }
}
inline CVector3f toVec3f() const static CVector4f ToClip(const zeus::CVector3f& v, float w) {
{ return CVector4f(v * w, w);
#if __SSE__ }
return CVector3f(mVec128);
#else
return CVector3f(x, y, z);
#endif
}
CVector4f& operator=(const CColor& other); CVector3f toVec3f() const {
inline bool operator==(const CVector4f& rhs) const return CVector3f(mSimd);
{ }
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpeq_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
#else
return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w);
#endif
}
inline bool operator!=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpneq_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
#else
return !(*this == rhs);
#endif
}
inline bool operator<(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x < rhs.x || y < rhs.y || z < rhs.z || w < rhs.w);
#endif
}
inline bool operator<=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x <= rhs.x || y <= rhs.y || z <= rhs.z || w <= rhs.w);
#endif
}
inline bool operator>(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x > rhs.x || y > rhs.y || z > rhs.z || w > rhs.w);
#endif
}
inline bool operator>=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x >= rhs.x || y >= rhs.y || z >= rhs.z || w >= rhs.w);
#endif
}
inline CVector4f operator+(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w);
#endif
}
inline CVector4f operator-(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w);
#endif
}
inline CVector4f operator-() const
{
#if __SSE__
return CVector4f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#else
return CVector4f(-x, -y, -z, -w);
#endif
}
inline CVector4f operator*(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w);
#endif
}
inline CVector4f operator/(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w);
#endif
}
inline CVector4f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector4f(x + val, y + val, z + val, w + val);
#endif
}
inline CVector4f operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CVector4f(x - val, y - val, z - val, w - val);
#endif
}
inline CVector4f operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x * val, y * val, z * val, w * val);
#endif
}
inline CVector4f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__
TVectorUnion splat = {{ooval, ooval, ooval, ooval}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x * ooval, y * ooval, z * ooval, w * ooval);
#endif
}
inline const CVector4f& operator+=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
z += rhs.z;
w += rhs.w;
#endif
return *this;
}
inline const CVector4f& operator-=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
z -= rhs.z;
w -= rhs.w;
#endif
return *this;
}
inline const CVector4f& operator*=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
w *= rhs.w;
#endif
return *this;
}
inline const CVector4f& operator/=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
z /= rhs.z;
w /= rhs.w;
#endif
return *this;
}
inline void normalize()
{
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector4f(mag);
}
inline CVector4f normalized() const
{
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
inline float dot(const CVector4f& rhs) const CVector4f& operator=(const CColor& other);
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z + w * w;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
inline void zeroOut() bool operator==(const CVector4f& rhs) const {
{ auto eq_mask = mSimd == rhs.mSimd;
*this = CVector4f::skZero; return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
} }
inline void splat(float xyzw) bool operator!=(const CVector4f& rhs) const {
{ auto eq_mask = mSimd != rhs.mSimd;
#if __SSE__ return eq_mask[0] || eq_mask[1] || eq_mask[2] || eq_mask[3];
TVectorUnion splat = {{xyzw, xyzw, xyzw, xyzw}}; }
mVec128 = splat.mVec128;
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = xyzw;
#endif
}
static inline CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) { return (a + (b - a) * t); } bool operator<(const CVector4f& rhs) const {
static inline CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) { return lerp(a, b, t).normalized(); } auto eq_mask = mSimd < rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; } bool operator<=(const CVector4f& rhs) const {
auto eq_mask = mSimd <= rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool canBeNormalized() const bool operator>(const CVector4f& rhs) const {
{ auto eq_mask = mSimd > rhs.mSimd;
if (std::isinf(x) || std::isinf(y) || std::isinf(z) || std::isinf(w)) return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
return false; }
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON || std::fabs(w) >= FLT_EPSILON;
}
inline bool isEqu(const CVector4f& other, float epsilon = 1.1920929e-7f) bool operator>=(const CVector4f& rhs) const {
{ auto eq_mask = mSimd >= rhs.mSimd;
const CVector4f diffVec = other - *this; return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
return (diffVec.x <= epsilon && diffVec.y <= epsilon && diffVec.z <= epsilon && diffVec.w <= epsilon); }
}
inline float& operator[](size_t idx) { assert(idx < 4); return (&x)[idx]; } CVector4f operator+(const CVector4f& rhs) const {
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&x)[idx]; } return mSimd + rhs.mSimd;
}
static const CVector4f skOne; CVector4f operator-(const CVector4f& rhs) const {
static const CVector4f skNegOne; return mSimd - rhs.mSimd;
static const CVector4f skZero; }
CVector4f operator-() const {
return -mSimd;
}
CVector4f operator*(const CVector4f& rhs) const {
return mSimd * rhs.mSimd;
}
CVector4f operator/(const CVector4f& rhs) const {
return mSimd / rhs.mSimd;
}
CVector4f operator+(float val) const {
return mSimd + zeus::simd<float>(val);
}
CVector4f operator-(float val) const {
return mSimd - zeus::simd<float>(val);
}
CVector4f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
}
CVector4f operator/(float val) const {
float ooval = 1.f / val;
return mSimd * zeus::simd<float>(ooval);
}
const CVector4f& operator+=(const CVector4f& rhs) {
mSimd += rhs.mSimd;
return *this;
}
const CVector4f& operator-=(const CVector4f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CVector4f& operator*=(const CVector4f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CVector4f& operator/=(const CVector4f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector4f(mag);
}
CVector4f normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
float dot(const CVector4f& rhs) const {
return mSimd.dot4(rhs.mSimd);
}
float magSquared() const {
return mSimd.dot4(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
void zeroOut() {
*this = CVector4f::skZero;
}
void splat(float xyzw) {
mSimd = zeus::simd<float>(xyzw);
}
static CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) {
return lerp(a, b, t).normalized();
}
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()) || std::isinf(w()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON ||
std::fabs(z()) >= FLT_EPSILON || std::fabs(w()) >= FLT_EPSILON;
}
bool isEqu(const CVector4f& other, float epsilon = FLT_EPSILON) {
const CVector4f diffVec = other - *this;
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon &&
diffVec.z() <= epsilon && diffVec.w() <= epsilon);
}
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
float w() const { return mSimd[3]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
simd<float>::reference w() { return mSimd[3]; }
static const CVector4f skOne;
static const CVector4f skNegOne;
static const CVector4f skZero;
}; };
static inline CVector4f operator+(float lhs, const CVector4f& rhs) static CVector4f operator+(float lhs, const CVector4f& rhs) {
{ return zeus::simd<float>(lhs) + rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z, lhs + rhs.w);
#endif
} }
static inline CVector4f operator-(float lhs, const CVector4f& rhs) static CVector4f operator-(float lhs, const CVector4f& rhs) {
{ return zeus::simd<float>(lhs) - rhs.mSimd;
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z, lhs - rhs.w);
#endif
} }
static inline CVector4f operator*(float lhs, const CVector4f& rhs) static CVector4f operator*(float lhs, const CVector4f& rhs) {
{ return zeus::simd<float>(lhs) * rhs.mSimd;
#if __SSE__ }
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_mul_ps(splat.mVec128, rhs.mVec128)); static CVector4f operator/(float lhs, const CVector4f& rhs) {
#else return zeus::simd<float>(lhs) / rhs.mSimd;
return CVector4f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z, lhs * rhs.w);
#endif
} }
static inline CVector4f operator/(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z, lhs / rhs.w);
#endif
}
} }

View File

@ -1,61 +1,19 @@
#pragma once #pragma once
#if _M_IX86_FP >= 1 || _M_X64 #if ZE_ATHENA_TYPES
#define __SSE__ 1 #include "athena/IStreamReader.hpp"
#endif #include "athena/simd/simd.hpp"
#if __SSE__
#include <immintrin.h>
#ifndef _MSC_VER
#include <mm_malloc.h>
#endif
#define zeAlloc(sz, align) _mm_malloc(sz, align)
#define zeFree(ptr) _mm_free(ptr)
#elif GEKKO
#include <ps_intrins.h>
#define zeAlloc(sz, align) _ps_malloc(sz, align)
#define zeFree(ptr) _ps_free(ptr)
#endif
#if __SSE__ || __GEKKO_PS__
#define ZE_DECLARE_ALIGNED_ALLOCATOR() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 16); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 16); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#else #else
#define ZE_DECLARE_ALIGNED_ALLOCATOR() void __unused__() #include "simd/simd.hpp"
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() void __unused__()
#endif #endif
#if __SSE__ namespace zeus {
#define ZE_SHUFFLE(x, y, z, w) ((w) << 6 | (z) << 4 | (y) << 2 | (x)) #if ZE_ATHENA_TYPES
#define ze_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask)) template<typename T> using simd = athena::simd<T>;
#define ze_splat3_ps(_a, _i) ze_pshufd_ps((_a), ZE_SHUFFLE(_i, _i, _i, 3)) using simd_floats = athena::simd_floats;
#define ze_splat_ps(_a, _i) ze_pshufd_ps((_a), ZE_SHUFFLE(_i, _i, _i, _i)) using simd_doubles = athena::simd_doubles;
#if _WIN32
#define zeCastiTo128f(a) (_mm_castsi128_ps(a))
#else
#define zeCastiTo128f(a) ((__m128)(a))
#endif
#elif __GEKKO_PS__
#endif #endif
}
inline int rotr(int x, int n) { return ((x >> n) | (x << (32 - n))); } inline int rotr(int x, int n) { return ((x >> n) | (x << (32 - n))); }
inline int rotl(int x, int n) { return ((x << n) | (x >> (32 - n))); } inline int rotl(int x, int n) { return ((x << n) | (x >> (32 - n))); }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <cfloat> #include <cfloat>
#undef min #undef min
#undef max #undef max
@ -26,8 +27,7 @@
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
namespace zeus namespace zeus {
{
#if _MSC_VER #if _MSC_VER
#if defined(_M_IX86) #if defined(_M_IX86)
@ -43,115 +43,131 @@ namespace zeus
#endif #endif
#endif #endif
struct CPUInfo struct CPUInfo {
{ const char cpuBrand[48] = {0};
const char cpuBrand[48] = {0}; const char cpuVendor[32] = {0};
const char cpuVendor[32] = {0};
#if ZEUS_ARCH_X86_64 || ZEUS_ARCH_X86 #if ZEUS_ARCH_X86_64 || ZEUS_ARCH_X86
const bool isIntel = false; const bool isIntel = false;
const bool SSE1 = false; const bool SSE1 = false;
const bool SSE2 = false; const bool SSE2 = false;
const bool SSE3 = false; const bool SSE3 = false;
const bool SSSE3 = false; const bool SSSE3 = false;
const bool SSE41 = false; const bool SSE41 = false;
const bool SSE42 = false; const bool SSE42 = false;
const bool SSE4a = false; const bool SSE4a = false;
const bool AESNI = false; const bool AESNI = false;
const bool AVX = false; const bool AVX = false;
const bool AVX2 = false; const bool AVX2 = false;
#endif #endif
}; };
/** /**
* Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available * Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
*/ */
void detectCPU(); void detectCPU();
const CPUInfo& cpuFeatures(); const CPUInfo& cpuFeatures();
std::pair<bool, const CPUInfo&> validateCPU(); std::pair<bool, const CPUInfo&> validateCPU();
void getCpuInfo(int eax, int regs[4]); void getCpuInfo(int eax, int regs[4]);
void getCpuInfoEx(int eax, int ecx, int regs[4]); void getCpuInfoEx(int eax, int ecx, int regs[4]);
class CVector3f; class CVector3f;
class CVector2f; class CVector2f;
class CTransform; class CTransform;
template <typename T> template<typename T>
inline constexpr T min(const T& a, const T& b) inline constexpr T min(const T& a, const T& b) {
{ return a < b ? a : b;
return a < b ? a : b;
} }
template <typename T>
inline constexpr T max(const T& a, const T& b)
{
return a > b ? a : b;
}
template <> CVector3f min(const CVector3f& a, const CVector3f& b);
template <> CVector3f max(const CVector3f& a, const CVector3f& b);
template <typename T> template<typename T>
inline constexpr T clamp(const T& a, const T& val, const T& b) inline constexpr T max(const T& a, const T& b) {
{ return a > b ? a : b;
return max<T>(a, min<T>(b, val)); }
template<>
CVector3f min(const CVector3f& a, const CVector3f& b);
template<>
CVector3f max(const CVector3f& a, const CVector3f& b);
template<typename T>
inline constexpr T clamp(const T& a, const T& val, const T& b) {
return max<T>(a, min<T>(b, val));
} }
inline constexpr float radToDeg(float rad) { return rad * (180.f / M_PIF); } inline constexpr float radToDeg(float rad) { return rad * (180.f / M_PIF); }
inline constexpr float degToRad(float deg) { return deg * (M_PIF / 180.f); } inline constexpr float degToRad(float deg) { return deg * (M_PIF / 180.f); }
inline constexpr double radToDeg(double rad) { return rad * (180.0 / M_PI); } inline constexpr double radToDeg(double rad) { return rad * (180.0 / M_PI); }
inline constexpr double degToRad(double deg) { return deg * (M_PI / 180.0); } inline constexpr double degToRad(double deg) { return deg * (M_PI / 180.0); }
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary); CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary);
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t); CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t); float getCatmullRomSplinePoint(float a, float b, float c, float d, float t);
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f
getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d,
float t); float t);
// Since round(double) doesn't exist in some <cmath> implementations // Since round(double) doesn't exist in some <cmath> implementations
// we'll define our own // we'll define our own
inline double round(double val) { return (val < 0.0 ? std::ceil(val - 0.5) : std::ceil(val + 0.5)); } inline double round(double val) { return (val < 0.0 ? std::ceil(val - 0.5) : std::ceil(val + 0.5)); }
inline double powD(float a, float b) { return std::exp(b * std::log(a)); } inline double powD(float a, float b) { return std::exp(b * std::log(a)); }
inline double invSqrtD(double val) { return 1.0 / std::sqrt(val); } inline double invSqrtD(double val) { return 1.0 / std::sqrt(val); }
inline float invSqrtF(float val) { return float(1.0 / std::sqrt(val)); } inline float invSqrtF(float val) { return float(1.0 / std::sqrt(val)); }
int floorPowerOfTwo(int x); int floorPowerOfTwo(int x);
int ceilingPowerOfTwo(int x); int ceilingPowerOfTwo(int x);
template <typename U> template<typename U>
typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x) typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x) {
{
#if __GNUC__ >= 4 #if __GNUC__ >= 4
return __builtin_popcountll(x); return __builtin_popcountll(x);
#else #else
const U m1 = U(0x5555555555555555); // binary: 0101... const U m1 = U(0x5555555555555555); // binary: 0101...
const U m2 = U(0x3333333333333333); // binary: 00110011.. const U m2 = U(0x3333333333333333); // binary: 00110011..
const U m4 = U(0x0f0f0f0f0f0f0f0f); // binary: 4 zeros, 4 ones ... const U m4 = U(0x0f0f0f0f0f0f0f0f); // binary: 4 zeros, 4 ones ...
const U h01 = U(0x0101010101010101); // the sum of 256 to the power of 0,1,2,3... const U h01 = U(0x0101010101010101); // the sum of 256 to the power of 0,1,2,3...
x -= (x >> 1) & m1; // put count of each 2 bits into those 2 bits x -= (x >> 1) & m1; // put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> 2) & m2); // put count of each 4 bits into those 4 bits x = (x & m2) + ((x >> 2) & m2); // put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; // put count of each 8 bits into those 8 bits x = (x + (x >> 4)) & m4; // put count of each 8 bits into those 8 bits
return (x * h01) >> ((sizeof(U) - 1) * 8); // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... return (x * h01) >> ((sizeof(U) - 1) * 8); // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
#endif #endif
} }
template <typename E> template<typename E>
typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e) typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e) {
{ return PopCount(static_cast<typename std::underlying_type<E>::type>(e));
return PopCount(static_cast<typename std::underlying_type<E>::type>(e));
} }
bool close_enough(const CVector3f &a, const CVector3f &b, float epsilon = 0.000099999997f); bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon = 0.000099999997f);
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon = 0.000099999997f); bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon = 0.000099999997f);
inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378752) inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378752) {
{ return std::fabs(a - b) < epsilon;
return std::fabs(a - b) < epsilon;
} }
inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752) inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752) {
{ return std::fabs(a - b) < epsilon;
return std::fabs(a - b) < epsilon;
} }
} }

View File

@ -1,22 +0,0 @@
#pragma once
namespace zeus
{
typedef union {
float v[4];
#if __SSE__
__m128 mVec128;
#endif
} TVectorUnion;
typedef union {
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif
} TDblVectorUnion;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
#pragma once
#define _ZEUS_SIMD_INCLUDED
namespace zeus::_simd { using namespace std; }
#include "parallelism_v2_simd.hpp"
#if _M_IX86_FP >= 1 || _M_X64
#define __SSE__ 1
#endif
#if __AVX__
#include "simd_avx.hpp"
#elif __SSE__
#include "simd_sse.hpp"
#else
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = fixed_size<4>; };
template<> struct zeus_native<double> { using type = fixed_size<4>; };
}
#endif
namespace zeus {
template<typename T> using simd = _simd::simd<T,
typename _simd::simd_abi::zeus_native<T>::type>;
template<typename T>
using simd_values = _simd::simd_data<simd<T>>;
using simd_floats = simd_values<float>;
using simd_doubles = simd_values<double>;
}

View File

@ -0,0 +1,188 @@
#pragma once
#ifndef _ZEUS_SIMD_INCLUDED
#error simd_avx.hpp must not be included directly. Include simd.hpp instead.
#endif
#include "simd_sse.hpp"
#include <immintrin.h>
namespace zeus::_simd {
// __m256d storage for AVX
template<>
class __simd_storage<double, m256d_abi> {
public:
using storage_type = __m256d;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, double __val) noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm256_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_ = _mm256_set_pd(d, c, b, a);
}
void __broadcast(double __val) noexcept {
__storage_ = _mm256_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
}
double __dot3(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
}
double __dot4(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
}
void __copy_from(const simd_data<simd<double, m256d_abi>>& __buffer) noexcept {
__storage_ = _mm256_load_pd(__buffer.data());
}
void __copy_to(simd_data<simd<double, m256d_abi>>& __buffer) const noexcept {
_mm256_store_pd(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_ = _mm256_cvtps_pd(other.__storage_);
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m256d mask storage for AVX
template<>
class __simd_mask_storage<double, m256d_abi> : public __simd_storage<double, m256d_abi> {
public:
bool __get(size_t __index) const noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT64_MAX : 0;
__storage_ = _mm256_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m256d_abi> simd<double, m256d_abi>::operator-() const {
return _mm256_xor_pd(__s_.__storage_, _mm256_set1_pd(-0.0));
}
inline simd<double, m256d_abi>
operator+(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator-(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator*(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator/(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>&
operator+=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator-=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator*=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator/=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>::mask_type
operator==(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_EQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator!=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_NEQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GT_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LT_OQ);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m256d_abi>& other) {
__storage_ = _mm256_cvtpd_ps(other.__storage_);
}
namespace simd_abi {
template<> struct zeus_native<double> { using type = m256d_abi; };
} // namespace simd_abi
} // namespace zeus::_simd

View File

@ -0,0 +1,455 @@
#pragma once
#ifndef _ZEUS_SIMD_INCLUDED
#error simd_sse.hpp must not be included directly. Include simd.hpp instead.
#endif
#include <xmmintrin.h>
#if __SSE4_1__
#include <smmintrin.h>
#endif
namespace zeus::_simd {
// __m128 ABI
using m128_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 1), 4>;
// __m128d ABI
using m128d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 2), 4>;
#ifdef __AVX__
// __m256d ABI
using m256d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 3), 4>;
#endif
template <>
class __simd_storage<double, m128d_abi>;
#ifdef __AVX__
template <>
class __simd_storage<double, m256d_abi>;
#endif
// __m128 storage for SSE2+
template <>
class __simd_storage<float, m128_abi> {
public:
using storage_type = __m128;
storage_type __storage_;
float __get(size_t __index) const noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, float __val) noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm_load_ps(sse_data.data());
}
void __set4(float a, float b, float c, float d) noexcept {
__storage_ = _mm_set_ps(d, c, b, a);
}
void __broadcast(float __val) noexcept {
__storage_ = _mm_set1_ps(__val);
}
float __dot2(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x3F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
#endif
}
float __dot3(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x7F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
#endif
}
float __dot4(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0xFF));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
#endif
}
template<int x, int y, int z, int w>
__simd_storage __shuffle() const noexcept {
__simd_storage s;
s.__storage_ = _mm_shuffle_ps(__storage_, __storage_, _MM_SHUFFLE(w, z, y, x));
return s;
}
void __copy_from(const simd_data<simd<float, m128_abi>>& __buffer) noexcept {
__storage_ = _mm_load_ps(__buffer.data());
}
void __copy_to(simd_data<simd<float, m128_abi>>& __buffer) const noexcept {
_mm_store_ps(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<double, m128d_abi>& other);
#ifdef __AVX__
explicit __simd_storage(const __simd_storage<double, m256d_abi>& other);
#endif
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128 mask storage for SSE2+
template <>
class __simd_mask_storage<float, m128_abi> : public __simd_storage<float, m128_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT32_MAX : 0;
__storage_ = _mm_load_ps(reinterpret_cast<float*>(sse_data));
}
};
template <>
inline simd<float, m128_abi> simd<float, m128_abi>::operator-() const {
return _mm_xor_ps(__s_.__storage_, _mm_set1_ps(-0.f));
}
inline simd<float, m128_abi>
operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator-(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator*(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator/(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>&
operator+=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator-=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator*=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator/=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>::mask_type
operator==(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpeq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator!=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpneq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpge_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmple_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpgt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmplt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
// __m128d storage for SSE2+
template <>
class __simd_storage<double, m128d_abi> {
public:
using storage_type = std::array<__m128d, 2>;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
return sse_data[__index % 2];
}
void __set(size_t __index, double __val) noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
sse_data[__index % 2] = __val;
__storage_[__index / 2] = _mm_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_[0] = _mm_set_pd(b, a);
__storage_[1] = _mm_set_pd(d, c);
}
void __broadcast(double __val) noexcept {
for (int i = 0; i < 2; ++i)
__storage_[i] = _mm_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
return ret;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
return sse_data[0] + sse_data[1];
#endif
}
double __dot3(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return ret + sse_data2[0];
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0];
#endif
}
double __dot4(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
double ret2;
_mm_store_sd(&ret2, _mm_dp_pd(__storage_[1], other.__storage_[1], 0x3F));
return ret + ret2;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0] + sse_data2[1];
#endif
}
void __copy_from(const simd_data<simd<double, m128d_abi>>& __buffer) noexcept {
__storage_[0] = _mm_load_pd(__buffer.data());
__storage_[1] = _mm_load_pd(__buffer.data() + 2);
}
void __copy_to(simd_data<simd<double, m128d_abi>>& __buffer) const noexcept {
_mm_store_pd(__buffer.data(), __storage_[0]);
_mm_store_pd(__buffer.data() + 2, __storage_[1]);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_[0] = _mm_cvtps_pd(other.__storage_);
__storage_[1] = _mm_cvtps_pd(_mm_movehl_ps(other.__storage_, other.__storage_));
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128d mask storage for SSE2+
template <>
class __simd_mask_storage<double, m128d_abi> : public __simd_storage<double, m128d_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
sse_data[__index % 2] = __val ? UINT64_MAX : 0;
__storage_[__index / 2] = _mm_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m128d_abi> simd<double, m128d_abi>::operator-() const {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_xor_pd(__s_.__storage_[i], _mm_set1_pd(-0.0));
return ret;
}
inline simd<double, m128d_abi>
operator+(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator-(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator*(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator/(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>&
operator+=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator-=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator*=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator/=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>::mask_type
operator==(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpeq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator!=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpneq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpge_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmple_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpgt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmplt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m128d_abi>& other) {
__storage_ = _mm_movelh_ps(_mm_cvtpd_ps(other.__storage_[0]), _mm_cvtpd_ps(other.__storage_[1]));
}
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = m128_abi; };
#ifndef __AVX__
template<> struct zeus_native<double> { using type = m128d_abi; };
#endif
} // namespace simd_abi
} // namespace zeus::_simd

View File

@ -3,7 +3,6 @@
namespace zeus namespace zeus
{ {
const CAABox CAABox::skInvertedBox = CAABox(); const CAABox CAABox::skInvertedBox = CAABox();
const CAABox CAABox::skNullBox = CAABox(CVector3f::skZero, CVector3f::skZero); const CAABox CAABox::skNullBox = CAABox(CVector3f::skZero, CVector3f::skZero);
} }

View File

@ -1,8 +1,7 @@
#include "zeus/CColor.hpp" #include "zeus/CColor.hpp"
#include "zeus/CVector4f.hpp" #include "zeus/CVector4f.hpp"
namespace zeus namespace zeus {
{
const CColor CColor::skRed(Comp32(0xFF0000FFul)); const CColor CColor::skRed(Comp32(0xFF0000FFul));
const CColor CColor::skBlack(Comp32(0x000000FFul)); const CColor CColor::skBlack(Comp32(0x000000FFul));
const CColor CColor::skBlue(Comp32(0x0000FFFFul)); const CColor CColor::skBlue(Comp32(0x0000FFFFul));
@ -14,133 +13,106 @@ const CColor CColor::skYellow(Comp32(0xFFFF00FFul));
const CColor CColor::skWhite(Comp32(0xFFFFFFFFul)); const CColor CColor::skWhite(Comp32(0xFFFFFFFFul));
const CColor CColor::skClear(Comp32(0x00000000ul)); const CColor CColor::skClear(Comp32(0x00000000ul));
float hueToRgb(float p, float q, float t) float hueToRgb(float p, float q, float t) {
{ if (t < 0.0f)
if (t < 0.0f) t += 1.0f;
t += 1.0f; if (t > 1.0f)
if (t > 1.0f) t -= 1.0f;
t -= 1.0f; if (t < 1.f / 6.f)
if (t < 1.f / 6.f) return p + (q - p) * 6.f * t;
return p + (q - p) * 6.f * t; if (t < 1.f / 2.f)
if (t < 1.f / 2.f) return q;
return q; if (t < 2.f / 3.f)
if (t < 2.f / 3.f) return p + (q - p) * (2.f / 3.f - t) * 6.f;
return p + (q - p) * (2.f / 3.f - t) * 6.f; return p;
return p;
} }
CColor::CColor(const CVector4f& other) void CColor::fromHSV(float h, float s, float v, float _a) {
{ int i = int(h * 6.f);
r = other.x; float f = h * 6.f - i;
g = other.y; float p = v * (1.f - s);
b = other.z; float q = v * (1.f - f * s);
a = other.w; float t = v * (1.f - (1.f - f) * s);
simd_floats fo;
switch (i % 6) {
case 0:
fo[0] = v, fo[1] = t, fo[2] = p;
break;
case 1:
fo[0] = q, fo[1] = v, fo[2] = p;
break;
case 2:
fo[0] = p, fo[1] = v, fo[2] = t;
break;
case 3:
fo[0] = p, fo[1] = q, fo[2] = v;
break;
case 4:
fo[0] = t, fo[1] = p, fo[2] = v;
break;
case 5:
fo[0] = v, fo[1] = p, fo[2] = q;
break;
default:
break;
}
fo[3] = _a;
mSimd.copy_from(fo);
} }
CColor& CColor::operator=(const CVector4f& other) void CColor::toHSV(float& h, float& s, float& v) const {
{ float min = std::min(r(), std::min(g(), b()));
r = other.x; float max = std::max(r(), std::max(g(), b()));
g = other.y; v = max;
b = other.z;
a = other.w;
return *this; float delta = max - min;
s = max == 0.f ? 0.f : delta / max;
if (max == min)
h = 0.f;
else {
if (max == r())
h = (g() - b()) / delta + (g() < b() ? 6.f : 0.f);
else if (max == g())
h = (b() - r()) / delta + 2.f;
else if (max == b())
h = (r() - g()) / delta + 4.f;
h /= 6.f;
}
} }
void CColor::fromHSV(float h, float s, float v, float _a) void CColor::fromHSL(float h, float s, float l, float _a) {
{ if (s == 0.0f) {
int i = int(h * 6); mSimd = simd<float>(l);
float f = h * 6 - i; } else {
float p = v * (1 - s); const float q = l < 0.5f ? l * (1.f + s) : l + s - 1.f * s;
float q = v * (1 - f * s); const float p = 2.f * l - q;
float t = v * (1 - (1 - f) * s); r() = hueToRgb(p, q, h + 1.f / 3.f);
float _r, _g, _b; g() = hueToRgb(p, q, h);
b() = hueToRgb(p, q, h - 1.f / 3.f);
switch (i % 6) }
{ a() = _a;
case 0:
_r = v, _g = t, _b = p;
break;
case 1:
_r = q, _g = v, _b = p;
break;
case 2:
_r = p, _g = v, _b = t;
break;
case 3:
_r = p, _g = q, _b = v;
break;
case 4:
_r = t, _g = p, _b = v;
break;
case 5:
_r = v, _g = p, _b = q;
break;
}
r = _r;
g = _g;
b = _b;
a = _a;
} }
void CColor::toHSV(float& h, float& s, float& v) const void CColor::toHSL(float& h, float& s, float& l) const {
{ const float min = std::min(r(), std::min(g(), b()));
float min = std::min(r, std::min(g, b)); const float max = std::max(r(), std::max(g(), b()));
float max = std::max(r, std::max(g, b)); const float d = max - min;
v = max;
float delta = max - min; if (max == min)
s = max == 0 ? 0 : delta / max; h = s = 0.f;
else {
s = l > 0.5f ? d / (2.f - max - min) : d / (max + min);
if (max == r())
h = (g() - b()) / d + (g() < b() ? 6.f : 0.f);
else if (max == g())
h = (b() - r()) / d + 2.f;
else if (max == b())
h = (r() - g()) / d + 4.f;
if (max == min) h /= 6.f;
h = 0; }
else
{
if (max == r)
h = (g - b) / delta + (g < b ? 6 : 0);
else if (max == g)
h = (b - r) / delta + 2;
else if (max == b)
h = (r - g) / delta + 4;
h /= 6;
}
}
void CColor::fromHSL(float h, float s, float l, float _a)
{
if (s == 0.0f)
r = g = b = l;
else
{
const float q = l < 0.5f ? l * (1.f + s) : l + s - 1.f * s;
const float p = 2 * l - q;
r = hueToRgb(p, q, h + 1.f / 3);
g = hueToRgb(p, q, h);
b = hueToRgb(p, q, h - 1.f / 3);
}
a = _a;
}
void CColor::toHSL(float& h, float& s, float& l)
{
const float min = std::min(r, std::min(g, b));
const float max = std::max(r, std::max(g, b));
const float d = max - min;
if (max == min)
h = s = 0;
else
{
s = l > 0.5f ? d / (2.f - max - min) : d / (max + min);
if (max == r)
h = (g - b) / d + (g < b ? 6.f : 0.f);
else if (max == g)
h = (b - r) / d + 2.f;
else if (max == b)
h = (r - g) / d + 4.f;
h /= 6;
}
} }
} }

View File

@ -10,29 +10,29 @@ CEulerAngles::CEulerAngles(const CQuaternion& quat)
float t0 = 0.f; float t0 = 0.f;
if (quatDot > 0.f) if (quatDot > 0.f)
t0 = 2.f / quatDot; t0 = 2.f / quatDot;
double t1 = 1.0 - (t0 * quat.x * quat.x + t0 * quat.z * quat.z); double t1 = 1.0 - (t0 * quat.x() * quat.x() + t0 * quat.z() * quat.z());
double t2 = t0 * quat.y * quat.x - t0 * quat.z * quat.w; double t2 = t0 * quat.y() * quat.x() - t0 * quat.z() * quat.w();
double t3 = t1 * t1 + t2 * t2; double t3 = t1 * t1 + t2 * t2;
double t4 = 0.0; double t4 = 0.0;
if (t3 > 0.0) if (t3 > 0.0)
t4 = std::sqrt(t3); t4 = std::sqrt(t3);
double t5 = t0 * quat.z * quat.y + t0 * quat.x * quat.w; double t5 = t0 * quat.z() * quat.y() + t0 * quat.x() * quat.w();
if (std::abs(t4) > 0.00001) if (std::abs(t4) > 0.00001)
{ {
x = -std::atan2(-t5, t4); x() = -std::atan2(-t5, t4);
y = -std::atan2(t0 * quat.z * quat.x - t0 * quat.y * quat.w, y() = -std::atan2(t0 * quat.z() * quat.x() - t0 * quat.y() * quat.w(),
1.0 - (t0 * quat.x * quat.x + t0 * quat.y * quat.y)); 1.0 - (t0 * quat.x() * quat.x() + t0 * quat.y() * quat.y()));
z = -std::atan2(t2, t1); z() = -std::atan2(t2, t1);
} }
else else
{ {
x = -std::atan2(-t5, t4); x() = -std::atan2(-t5, t4);
y = -std::atan2(-(t0 * quat.z * quat.x + t0 * quat.y * quat.w), y() = -std::atan2(-(t0 * quat.z() * quat.x() + t0 * quat.y() * quat.w()),
1.0 - (t0 * quat.y * quat.y + t0 * quat.z * quat.z)); 1.0 - (t0 * quat.y() * quat.y() + t0 * quat.z() * quat.z()));
z = 0.f; z() = 0.f;
} }
} }
@ -58,15 +58,15 @@ CEulerAngles::CEulerAngles(const CTransform& xf)
if (std::fabs(f1) >= 0.00001) if (std::fabs(f1) >= 0.00001)
{ {
x = -std::atan2(-xf.basis[1][2], f1); x() = -std::atan2(-xf.basis[1][2], f1);
y = -std::atan2(xf.basis[0][2], xf.basis[2][2]); y() = -std::atan2(xf.basis[0][2], xf.basis[2][2]);
z = -std::atan2(xf.basis[1][0], xf.basis[1][1]); z() = -std::atan2(xf.basis[1][0], xf.basis[1][1]);
} }
else else
{ {
x = -std::atan2(-xf.basis[1][2], f1); x() = -std::atan2(-xf.basis[1][2], f1);
y = -std::atan2(-xf.basis[2][0], xf.basis[0][0]); y() = -std::atan2(-xf.basis[2][0], xf.basis[0][0]);
z = 0.f; z() = 0.f;
} }
} }

View File

@ -1,138 +1,88 @@
#include "zeus/CFrustum.hpp" #include "zeus/CFrustum.hpp"
namespace zeus namespace zeus {
{
void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection) void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection) {
{ CMatrix4f mvp = projection * viewMtx;
CMatrix4f mvp = projection * viewMtx; CMatrix4f mvp_rm = mvp.transposed();
CMatrix4f mvp_rm = mvp.transposed();
#if __SSE__ /* Left */
planes[0].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[0].mSimd;
/* Left */ /* Right */
planes[0].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[0].mVec128); planes[1].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[0].mSimd;
/* Right */ /* Bottom */
planes[1].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[0].mVec128); planes[2].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[1].mSimd;
/* Bottom */ /* Top */
planes[2].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[1].mVec128); planes[3].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[1].mSimd;
/* Top */ /* Near */
planes[3].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[1].mVec128); planes[4].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[2].mSimd;
/* Near */ /* Far */
planes[4].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[2].mVec128); planes[5].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[2].mSimd;
/* Far */ planes[0].normalize();
planes[5].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[2].mVec128); planes[1].normalize();
planes[2].normalize();
planes[3].normalize();
planes[4].normalize();
planes[5].normalize();
#else valid = true;
/* Left */
planes[0].a = mvp.m[0][0] + mvp.m[3][0];
planes[0].b = mvp.m[0][1] + mvp.m[3][1];
planes[0].c = mvp.m[0][2] + mvp.m[3][2];
planes[0].d = mvp.m[0][3] + mvp.m[3][3];
/* Right */
planes[1].a = -mvp.m[0][0] + mvp.m[3][0];
planes[1].b = -mvp.m[0][1] + mvp.m[3][1];
planes[1].c = -mvp.m[0][2] + mvp.m[3][2];
planes[1].d = -mvp.m[0][3] + mvp.m[3][3];
/* Bottom */
planes[2].a = mvp.m[1][0] + mvp.m[3][0];
planes[2].b = mvp.m[1][1] + mvp.m[3][1];
planes[2].c = mvp.m[1][2] + mvp.m[3][2];
planes[2].d = mvp.m[1][3] + mvp.m[3][3];
/* Top */
planes[3].a = -mvp.m[1][0] + mvp.m[3][0];
planes[3].b = -mvp.m[1][1] + mvp.m[3][1];
planes[3].c = -mvp.m[1][2] + mvp.m[3][2];
planes[3].d = -mvp.m[1][3] + mvp.m[3][3];
/* Near */
planes[4].a = mvp.m[2][0] + mvp.m[3][0];
planes[4].b = mvp.m[2][1] + mvp.m[3][1];
planes[4].c = mvp.m[2][2] + mvp.m[3][2];
planes[4].d = mvp.m[2][3] + mvp.m[3][3];
/* Far */
planes[5].a = -mvp.m[2][0] + mvp.m[3][0];
planes[5].b = -mvp.m[2][1] + mvp.m[3][1];
planes[5].c = -mvp.m[2][2] + mvp.m[3][2];
planes[5].d = -mvp.m[2][3] + mvp.m[3][3];
#endif
planes[0].normalize();
planes[1].normalize();
planes[2].normalize();
planes[3].normalize();
planes[4].normalize();
planes[5].normalize();
valid = true;
} }
void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& projection) void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& projection) {
{ zeus::CMatrix3f tmp(viewPointMtx.basis[0], viewPointMtx.basis[2], -viewPointMtx.basis[1]);
zeus::CMatrix3f tmp(viewPointMtx.basis[0], viewPointMtx.basis[2], -viewPointMtx.basis[1]); zeus::CTransform viewBasis = zeus::CTransform(tmp.transposed());
zeus::CTransform viewBasis = zeus::CTransform(tmp.transposed()); zeus::CTransform viewMtx = viewBasis * zeus::CTransform::Translate(-viewPointMtx.origin);
zeus::CTransform viewMtx = viewBasis * zeus::CTransform::Translate(-viewPointMtx.origin);
updatePlanes(viewMtx.toMatrix4f(), projection.getCachedMatrix()); updatePlanes(viewMtx.toMatrix4f(), projection.getCachedMatrix());
} }
bool CFrustum::aabbFrustumTest(const CAABox& aabb) const bool CFrustum::aabbFrustumTest(const CAABox& aabb) const {
{ if (!valid)
if (!valid)
return true;
CVector3f center = aabb.center();
CVector3f extents = aabb.extents();
for (uint32_t i = 0; i < 6; ++i)
{
const CPlane& plane = planes[i];
float m = plane.vec.dot(center) + plane.d;
float n = extents.dot({std::fabs(plane.a), std::fabs(plane.b), std::fabs(plane.c)});
if (m + n < 0)
return false;
}
return true; return true;
CVector3f center = aabb.center();
CVector3f extents = aabb.extents();
for (uint32_t i = 0; i < 6; ++i) {
const CPlane& plane = planes[i];
float m = plane.normal().dot(center) + plane.d();
float n = extents.dot({std::fabs(plane.x()), std::fabs(plane.y()), std::fabs(plane.z())});
if (m + n < 0.f)
return false;
}
return true;
} }
bool CFrustum::sphereFrustumTest(const CSphere& sphere) const bool CFrustum::sphereFrustumTest(const CSphere& sphere) const {
{ if (!valid)
if (!valid)
return true;
for (uint32_t i = 0 ; i<6 ; ++i)
{
float dadot = planes[i].vec.dot(sphere.position);
if ((dadot + planes[i].d + sphere.radius) < 0)
return false;
}
return true; return true;
for (uint32_t i = 0; i < 6; ++i) {
float dadot = planes[i].normal().dot(sphere.position);
if ((dadot + planes[i].d() + sphere.radius) < 0.f)
return false;
}
return true;
} }
bool CFrustum::pointFrustumTest(const CVector3f& point) const bool CFrustum::pointFrustumTest(const CVector3f& point) const {
{ if (!valid)
if (!valid)
return true;
for (uint32_t i = 0 ; i<6 ; ++i)
{
float dadot = planes[i].vec.dot(point);
if ((dadot + planes[i].d) < 0)
return false;
}
return true; return true;
for (uint32_t i = 0; i < 6; ++i) {
float dadot = planes[i].normal().dot(point);
if ((dadot + planes[i].d()) < 0.f)
return false;
}
return true;
} }
} }

View File

@ -2,104 +2,113 @@
#include "zeus/CQuaternion.hpp" #include "zeus/CQuaternion.hpp"
#include "zeus/Global.hpp" #include "zeus/Global.hpp"
namespace zeus namespace zeus {
{
const CMatrix3f CMatrix3f::skIdentityMatrix3f = CMatrix3f(); const CMatrix3f CMatrix3f::skIdentityMatrix3f = CMatrix3f();
CMatrix3f::CMatrix3f(const CQuaternion& quat) CMatrix3f::CMatrix3f(const CQuaternion& quat) {
{ CQuaternion nq = quat.normalized();
CQuaternion nq = quat.normalized(); float x2 = nq.x() * nq.x();
float x2 = nq.x * nq.x; float y2 = nq.y() * nq.y();
float y2 = nq.y * nq.y; float z2 = nq.z() * nq.z();
float z2 = nq.z * nq.z;
m[0][0] = 1.0 - 2.0 * y2 - 2.0 * z2; m[0][0] = 1.0 - 2.0 * y2 - 2.0 * z2;
m[1][0] = 2.0 * nq.x * nq.y - 2.0 * nq.z * nq.w; m[1][0] = 2.0 * nq.x() * nq.y() - 2.0 * nq.z() * nq.w();
m[2][0] = 2.0 * nq.x * nq.z + 2.0 * nq.y * nq.w; m[2][0] = 2.0 * nq.x() * nq.z() + 2.0 * nq.y() * nq.w();
m[0][1] = 2.0 * nq.x * nq.y + 2.0 * nq.z * nq.w; m[0][1] = 2.0 * nq.x() * nq.y() + 2.0 * nq.z() * nq.w();
m[1][1] = 1.0 - 2.0 * x2 - 2.0 * z2; m[1][1] = 1.0 - 2.0 * x2 - 2.0 * z2;
m[2][1] = 2.0 * nq.y * nq.z - 2.0 * nq.x * nq.w; m[2][1] = 2.0 * nq.y() * nq.z() - 2.0 * nq.x() * nq.w();
m[0][2] = 2.0 * nq.x * nq.z - 2.0 * nq.y * nq.w; m[0][2] = 2.0 * nq.x() * nq.z() - 2.0 * nq.y() * nq.w();
m[1][2] = 2.0 * nq.y * nq.z + 2.0 * nq.x * nq.w; m[1][2] = 2.0 * nq.y() * nq.z() + 2.0 * nq.x() * nq.w();
m[2][2] = 1.0 - 2.0 * x2 - 2.0 * y2; m[2][2] = 1.0 - 2.0 * x2 - 2.0 * y2;
m[0][3] = 0.0f;
m[1][3] = 0.0f;
m[2][3] = 0.0f;
} }
void CMatrix3f::transpose() void CMatrix3f::transpose() {
{
#if __SSE__ #if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); __m128 zero = _mm_xor_ps(m[0].mSimd.native(), m[0].mSimd.native());
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); __m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); __m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero); __m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
vec[0].mVec128 = _mm_movelh_ps(T0, T2); m[0].mSimd = _mm_movelh_ps(T0, T2);
vec[1].mVec128 = _mm_movehl_ps(T2, T0); m[1].mSimd = _mm_movehl_ps(T2, T0);
vec[2].mVec128 = _mm_movelh_ps(T1, T3); m[2].mSimd = _mm_movelh_ps(T1, T3);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
m[0].mSimd = T0.val[0];
m[1].mSimd = T0.val[1];
m[2].mSimd = T1.val[0];
#else #else
float tmp; float tmp;
tmp = m[0][1]; tmp = m[0][1];
m[0][1] = m[1][0]; m[0][1] = m[1][0];
m[1][0] = tmp; m[1][0] = tmp;
tmp = m[0][2]; tmp = m[0][2];
m[0][2] = m[2][0]; m[0][2] = m[2][0];
m[2][0] = tmp; m[2][0] = tmp;
tmp = m[1][2]; tmp = m[1][2];
m[1][2] = m[2][1]; m[1][2] = m[2][1];
m[2][1] = tmp; m[2][1] = tmp;
#endif #endif
} }
CMatrix3f CMatrix3f::transposed() const CMatrix3f CMatrix3f::transposed() const {
{
#if __SSE__ #if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128); __m128 zero = _mm_xor_ps(m[0].mSimd.native(), m[0].mSimd.native());
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero); __m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); __m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero); __m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3)); return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3));
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
return CMatrix3f(T0.val[0], T0.val[1], T1.val[0]);
#else #else
CMatrix3f ret(*this); CMatrix3f ret(*this);
float tmp; float tmp;
tmp = ret.m[0][1]; tmp = ret.m[0][1];
ret.m[0][1] = ret.m[1][0]; ret.m[0][1] = ret.m[1][0];
ret.m[1][0] = tmp; ret.m[1][0] = tmp;
tmp = m[0][2]; tmp = m[0][2];
ret.m[0][2] = ret.m[2][0]; ret.m[0][2] = ret.m[2][0];
ret.m[2][0] = tmp; ret.m[2][0] = tmp;
tmp = m[1][2]; tmp = m[1][2];
ret.m[1][2] = ret.m[2][1]; ret.m[1][2] = ret.m[2][1];
ret.m[2][1] = tmp; ret.m[2][1] = tmp;
return ret; return ret;
#endif #endif
} }
CMatrix3f CMatrix3f::inverted() const CMatrix3f CMatrix3f::inverted() const {
{ float det = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] -
float det = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][2] * m[1][1] * m[2][0] - m[1][2] * m[2][1] * m[0][0] - m[2][2] * m[0][1] * m[1][0];
m[0][2] * m[1][1] * m[2][0] - m[1][2] * m[2][1] * m[0][0] - m[2][2] * m[0][1] * m[1][0];
if (det == 0.0) if (det == 0.0)
return CMatrix3f(); return CMatrix3f();
det = 1.0f / det; det = 1.0f / det;
return CMatrix3f((m[1][1] * m[2][2] - m[1][2] * m[2][1]) * det, -(m[1][0] * m[2][2] - m[1][2] * m[2][0]) * det, return CMatrix3f((m[1][1] * m[2][2] - m[1][2] * m[2][1]) * det, -(m[1][0] * m[2][2] - m[1][2] * m[2][0]) * det,
(m[1][0] * m[2][1] - m[1][1] * m[2][0]) * det, -(m[0][1] * m[2][2] - m[0][2] * m[2][1]) * det, (m[1][0] * m[2][1] - m[1][1] * m[2][0]) * det, -(m[0][1] * m[2][2] - m[0][2] * m[2][1]) * det,
(m[0][0] * m[2][2] - m[0][2] * m[2][0]) * det, -(m[0][0] * m[2][1] - m[0][1] * m[2][0]) * det, (m[0][0] * m[2][2] - m[0][2] * m[2][0]) * det, -(m[0][0] * m[2][1] - m[0][1] * m[2][0]) * det,
(m[0][1] * m[1][2] - m[0][2] * m[1][1]) * det, -(m[0][0] * m[1][2] - m[0][2] * m[1][0]) * det, (m[0][1] * m[1][2] - m[0][2] * m[1][1]) * det, -(m[0][0] * m[1][2] - m[0][2] * m[1][0]) * det,
(m[0][0] * m[1][1] - m[0][1] * m[1][0]) * det); (m[0][0] * m[1][1] - m[0][1] * m[1][0]) * det);
} }
} }

View File

@ -9,14 +9,25 @@ CMatrix4f CMatrix4f::transposed() const
{ {
CMatrix4f ret; CMatrix4f ret;
#if __SSE__ #if __SSE__
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128); __m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128); __m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), m[3].mSimd.native());
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128); __m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128); __m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), m[3].mSimd.native());
ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2); ret.m[0].mSimd = _mm_movelh_ps(T0, T2);
ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0); ret.m[1].mSimd = _mm_movehl_ps(T2, T0);
ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3); ret.m[2].mSimd = _mm_movelh_ps(T1, T3);
ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1); ret.m[3].mSimd = _mm_movehl_ps(T3, T1);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
ret.m[0].mSimd = T0.val[0];
ret.m[1].mSimd = T0.val[1];
ret.m[2].mSimd = T1.val[0];
ret.m[3].mSimd = T1.val[1];
#else #else
ret.m[0][0] = m[0][0]; ret.m[0][0] = m[0][0];
ret.m[1][0] = m[0][1]; ret.m[1][0] = m[0][1];

View File

@ -1,139 +1,140 @@
#include "zeus/COBBox.hpp" #include "zeus/COBBox.hpp"
namespace zeus namespace zeus {
{
CAABox COBBox::calculateAABox(const CTransform& worldXf) const CAABox COBBox::calculateAABox(const CTransform& worldXf) const {
{ CAABox ret = CAABox::skInvertedBox;
CAABox ret = CAABox::skInvertedBox;
CTransform trans = worldXf * transform; CTransform trans = worldXf * transform;
static const CVector3f basis[8] = {{1.f, 1.f, 1.f}, {1.f, 1.f, -1.f}, {1.f, -1.f, 1.f}, {1.f, -1.f, -1.f}, static const CVector3f basis[8] = {{1.f, 1.f, 1.f},
{-1.f, -1.f, -1.f}, {-1.f, -1.f, 1.f}, {-1.f, 1.f, -1.f}, {-1.f, 1.f, 1.f}}; {1.f, 1.f, -1.f},
CVector3f p = extents * basis[0]; {1.f, -1.f, 1.f},
ret.accumulateBounds(trans * p); {1.f, -1.f, -1.f},
p = extents * basis[1]; {-1.f, -1.f, -1.f},
ret.accumulateBounds(trans * p); {-1.f, -1.f, 1.f},
p = extents * basis[2]; {-1.f, 1.f, -1.f},
ret.accumulateBounds(trans * p); {-1.f, 1.f, 1.f}};
p = extents * basis[3]; CVector3f p = extents * basis[0];
ret.accumulateBounds(trans * p); ret.accumulateBounds(trans * p);
p = extents * basis[4]; p = extents * basis[1];
ret.accumulateBounds(trans * p); ret.accumulateBounds(trans * p);
p = extents * basis[5]; p = extents * basis[2];
ret.accumulateBounds(trans * p); ret.accumulateBounds(trans * p);
p = extents * basis[6]; p = extents * basis[3];
ret.accumulateBounds(trans * p); ret.accumulateBounds(trans * p);
p = extents * basis[7]; p = extents * basis[4];
ret.accumulateBounds(trans * p); ret.accumulateBounds(trans * p);
p = extents * basis[5];
ret.accumulateBounds(trans * p);
p = extents * basis[6];
ret.accumulateBounds(trans * p);
p = extents * basis[7];
ret.accumulateBounds(trans * p);
return ret; return ret;
} }
bool COBBox::OBBIntersectsBox(const COBBox& other) const bool COBBox::OBBIntersectsBox(const COBBox& other) const {
{ CVector3f v = other.transform.origin - transform.origin;
CVector3f v = other.transform.origin - transform.origin; CVector3f T = CVector3f(v.dot(transform.basis[0]),
CVector3f T = CVector3f(v.dot(transform.basis[0]), v.dot(transform.basis[1]),
v.dot(transform.basis[1]), v.dot(transform.basis[2]));
v.dot(transform.basis[2]));
CMatrix3f R; CMatrix3f R;
float ra, rb, t; float ra, rb, t;
for (int i = 0; i < 3; ++i)
for (int k = 0; k < 3; ++k)
R[i][k] = transform.basis[i].dot(other.transform.basis[k]);
for (int i = 0; i < 3; ++i)
{
ra = extents[i];
rb = (other.extents[0] * std::fabs(R[i][0])) +
(other.extents[1] * std::fabs(R[i][1])) +
(other.extents[2] * std::fabs(R[i][2]));
t = std::fabs(T[i]);
if (t > (ra + rb + FLT_EPSILON))
return false;
}
for (int i = 0; i < 3; ++i)
for (int k = 0; k < 3; ++k) for (int k = 0; k < 3; ++k)
{ R[i][k] = transform.basis[i].dot(other.transform.basis[k]);
ra = (extents[0] * std::fabs(R[0][k])) +
(extents[1] * std::fabs(R[1][k])) +
(extents[2] * std::fabs(R[2][k]));
rb = other.extents[k];
t = std::fabs(T[0] * R[0][k] + T[1] * R[1][k] + T[2] * R[2][k]); for (int i = 0; i < 3; ++i) {
ra = extents[i];
rb = (other.extents[0] * std::fabs(R[i][0])) +
(other.extents[1] * std::fabs(R[i][1])) +
(other.extents[2] * std::fabs(R[i][2]));
t = std::fabs(T[i]);
if (t > (ra + rb + FLT_EPSILON))
return false;
}
/* A0 x B0 */
ra = (extents[1] * std::fabs(R[2][0])) + (extents[2] * std::fabs(R[1][0]));
rb = (other.extents[1] * std::fabs(R[0][2])) + (other.extents[2] * std::fabs(R[0][1]));
t = std::fabs((T[2] * R[1][0]) - (T[1] * R[2][0]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
}
for (int k = 0; k < 3; ++k) {
ra = (extents[0] * std::fabs(R[0][k])) +
(extents[1] * std::fabs(R[1][k])) +
(extents[2] * std::fabs(R[2][k]));
rb = other.extents[k];
t = std::fabs(T[0] * R[0][k] + T[1] * R[1][k] + T[2] * R[2][k]);
/* A0 x B1 */
ra = (extents[1] * std::fabs(R[2][1])) + (extents[2] * std::fabs(R[1][1]));
rb = (other.extents[0] * std::fabs(R[0][2])) + (other.extents[2] * std::fabs(R[0][0]));
t = std::fabs((T[2] * R[1][1]) - (T[1] * R[2][1]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
}
/* A0 x B2 */ /* A0 x B0 */
ra = (extents[1] * std::fabs(R[2][2])) + (extents[2] * std::fabs(R[1][2])); ra = (extents[1] * std::fabs(R[2][0])) + (extents[2] * std::fabs(R[1][0]));
rb = (other.extents[0] * std::fabs(R[0][1])) + (other.extents[1] * std::fabs(R[0][0])); rb = (other.extents[1] * std::fabs(R[0][2])) + (other.extents[2] * std::fabs(R[0][1]));
t = std::fabs((T[2] * R[1][2]) - (T[1] * R[2][2])); t = std::fabs((T[2] * R[1][0]) - (T[1] * R[2][0]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
/* A1 x B0 */ /* A0 x B1 */
ra = (extents[0] * std::fabs(R[2][0])) + (extents[2] * std::fabs(R[0][0])); ra = (extents[1] * std::fabs(R[2][1])) + (extents[2] * std::fabs(R[1][1]));
rb = (other.extents[1] * std::fabs(R[1][2])) + (other.extents[2] * std::fabs(R[1][1])); rb = (other.extents[0] * std::fabs(R[0][2])) + (other.extents[2] * std::fabs(R[0][0]));
t = std::fabs((T[0] * R[2][0]) - (T[2] * R[0][0])); t = std::fabs((T[2] * R[1][1]) - (T[1] * R[2][1]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
/* A1 x B1 */ /* A0 x B2 */
ra = (extents[0] * std::fabs(R[2][1])) + (extents[2] * std::fabs(R[0][1])); ra = (extents[1] * std::fabs(R[2][2])) + (extents[2] * std::fabs(R[1][2]));
rb = (other.extents[0] * std::fabs(R[1][2])) + (other.extents[2] * std::fabs(R[1][0])); rb = (other.extents[0] * std::fabs(R[0][1])) + (other.extents[1] * std::fabs(R[0][0]));
t = std::fabs((T[0] * R[2][1]) - (T[2] * R[0][1])); t = std::fabs((T[2] * R[1][2]) - (T[1] * R[2][2]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
/* A1 x B2 */ /* A1 x B0 */
ra = (extents[0] * std::fabs(R[2][2])) + (extents[2] * std::fabs(R[0][2])); ra = (extents[0] * std::fabs(R[2][0])) + (extents[2] * std::fabs(R[0][0]));
rb = (other.extents[0] * std::fabs(R[1][1])) + (other.extents[1] * std::fabs(R[1][0])); rb = (other.extents[1] * std::fabs(R[1][2])) + (other.extents[2] * std::fabs(R[1][1]));
t = std::fabs((T[0] * R[2][2]) - (T[2] * R[0][2])); t = std::fabs((T[0] * R[2][0]) - (T[2] * R[0][0]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
/* A2 x B0 */ /* A1 x B1 */
ra = (extents[0] * std::fabs(R[1][0])) + (extents[1] * std::fabs(R[0][0])); ra = (extents[0] * std::fabs(R[2][1])) + (extents[2] * std::fabs(R[0][1]));
rb = (other.extents[1] * std::fabs(R[2][2])) + (other.extents[2] * std::fabs(R[2][1])); rb = (other.extents[0] * std::fabs(R[1][2])) + (other.extents[2] * std::fabs(R[1][0]));
t = std::fabs((T[1] * R[0][0]) - (T[0] * R[1][0])); t = std::fabs((T[0] * R[2][1]) - (T[2] * R[0][1]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
/* A2 x B1 */ /* A1 x B2 */
ra = (extents[0] * std::fabs(R[1][1])) + (extents[1] * std::fabs(R[0][1])); ra = (extents[0] * std::fabs(R[2][2])) + (extents[2] * std::fabs(R[0][2]));
rb = (other.extents[0] * std::fabs(R[2][2])) + (other.extents[2] * std::fabs(R[2][0])); rb = (other.extents[0] * std::fabs(R[1][1])) + (other.extents[1] * std::fabs(R[1][0]));
t = std::fabs((T[1] * R[0][1]) - (T[0] * R[1][1])); t = std::fabs((T[0] * R[2][2]) - (T[2] * R[0][2]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
/* A2 x B2 */ /* A2 x B0 */
ra = (extents[0] * std::fabs(R[1][2])) + (extents[1] * std::fabs(R[0][2])); ra = (extents[0] * std::fabs(R[1][0])) + (extents[1] * std::fabs(R[0][0]));
rb = (other.extents[0] * std::fabs(R[2][1])) + (other.extents[1] * std::fabs(R[2][0])); rb = (other.extents[1] * std::fabs(R[2][2])) + (other.extents[2] * std::fabs(R[2][1]));
t = std::fabs((T[1] * R[0][2]) - (T[0] * R[1][2])); t = std::fabs((T[1] * R[0][0]) - (T[0] * R[1][0]));
if (t > (ra + rb + FLT_EPSILON)) if (t > (ra + rb + FLT_EPSILON))
return false; return false;
return true; /* A2 x B1 */
ra = (extents[0] * std::fabs(R[1][1])) + (extents[1] * std::fabs(R[0][1]));
rb = (other.extents[0] * std::fabs(R[2][2])) + (other.extents[2] * std::fabs(R[2][0]));
t = std::fabs((T[1] * R[0][1]) - (T[0] * R[1][1]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A2 x B2 */
ra = (extents[0] * std::fabs(R[1][2])) + (extents[1] * std::fabs(R[0][2]));
rb = (other.extents[0] * std::fabs(R[2][1])) + (other.extents[1] * std::fabs(R[2][0]));
t = std::fabs((T[1] * R[0][2]) - (T[0] * R[1][2]));
if (t > (ra + rb + FLT_EPSILON))
return false;
return true;
} }
} }

View File

@ -1,18 +1,16 @@
#include "zeus/CPlane.hpp" #include "zeus/CPlane.hpp"
namespace zeus namespace zeus {
{
bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const {
{ zeus::CVector3f delta = to - from;
zeus::CVector3f delta = to - from; if (std::fabs(delta.normalized().dot(normal())) < 0.01f)
if (std::fabs(delta.normalized().dot(vec)) < 0.01f) return false;
return false; float tmp = -pointToPlaneDist(from) / delta.dot(normal());
float tmp = -pointToPlaneDist(from) / delta.dot(vec); if (tmp < -0.f || tmp > 1.0001f)
if (tmp < -0.f || tmp > 1.0001f) return false;
return false; point = delta * tmp + from;
point = delta * tmp + from; return true;
return true;
} }
} }

View File

@ -2,72 +2,67 @@
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
#include <cassert> #include <cassert>
namespace zeus namespace zeus {
{ void CProjection::_updateCachedMatrix() {
void CProjection::_updateCachedMatrix() assert(m_projType == EProjType::Orthographic || m_projType == EProjType::Perspective);
{ if (m_projType == EProjType::Orthographic) {
assert(m_projType == EProjType::Orthographic || m_projType == EProjType::Perspective); float tmp;
if (m_projType == EProjType::Orthographic)
{
float tmp;
tmp = 1.0f / (m_ortho.right - m_ortho.left); tmp = 1.0f / (m_ortho.right - m_ortho.left);
m_mtx.m[0][0] = 2.0f * tmp; m_mtx.m[0][0] = 2.0f * tmp;
m_mtx.m[1][0] = 0.0f; m_mtx.m[1][0] = 0.0f;
m_mtx.m[2][0] = 0.0f; m_mtx.m[2][0] = 0.0f;
m_mtx.m[3][0] = -(m_ortho.right + m_ortho.left) * tmp; m_mtx.m[3][0] = -(m_ortho.right + m_ortho.left) * tmp;
tmp = 1.0f / (m_ortho.top - m_ortho.bottom); tmp = 1.0f / (m_ortho.top - m_ortho.bottom);
m_mtx.m[0][1] = 0.0f; m_mtx.m[0][1] = 0.0f;
m_mtx.m[1][1] = 2.0f * tmp; m_mtx.m[1][1] = 2.0f * tmp;
m_mtx.m[2][1] = 0.0f; m_mtx.m[2][1] = 0.0f;
m_mtx.m[3][1] = -(m_ortho.top + m_ortho.bottom) * tmp; m_mtx.m[3][1] = -(m_ortho.top + m_ortho.bottom) * tmp;
tmp = 1.0f / (m_ortho.zfar - m_ortho.znear); tmp = 1.0f / (m_ortho.zfar - m_ortho.znear);
m_mtx.m[0][2] = 0.0f; m_mtx.m[0][2] = 0.0f;
m_mtx.m[1][2] = 0.0f; m_mtx.m[1][2] = 0.0f;
m_mtx.m[2][2] = -tmp; m_mtx.m[2][2] = -tmp;
m_mtx.m[3][2] = -m_ortho.zfar * tmp; m_mtx.m[3][2] = -m_ortho.zfar * tmp;
m_mtx.m[0][3] = 0.0f; m_mtx.m[0][3] = 0.0f;
m_mtx.m[1][3] = 0.0f; m_mtx.m[1][3] = 0.0f;
m_mtx.m[2][3] = 0.0f; m_mtx.m[2][3] = 0.0f;
m_mtx.m[3][3] = 1.0f; m_mtx.m[3][3] = 1.0f;
} } else if (m_projType == EProjType::Perspective) {
else if (m_projType == EProjType::Perspective) float tfov = std::tan(m_persp.fov * 0.5f);
{ float top = m_persp.znear * tfov;
float tfov = std::tan(m_persp.fov * 0.5f); float bottom = -top;
float top = m_persp.znear * tfov; float right = m_persp.aspect * m_persp.znear * tfov;
float bottom = -top; float left = -right;
float right = m_persp.aspect * m_persp.znear * tfov;
float left = -right;
float rml = right - left; float rml = right - left;
float rpl = right + left; float rpl = right + left;
float tmb = top - bottom; float tmb = top - bottom;
float tpb = top + bottom; float tpb = top + bottom;
float fpn = m_persp.zfar + m_persp.znear; float fpn = m_persp.zfar + m_persp.znear;
float fmn = m_persp.zfar - m_persp.znear; float fmn = m_persp.zfar - m_persp.znear;
m_mtx.m[0][0] = 2.f * m_persp.znear / rml; m_mtx.m[0][0] = 2.f * m_persp.znear / rml;
m_mtx.m[1][0] = 0.0f; m_mtx.m[1][0] = 0.0f;
m_mtx.m[2][0] = rpl / rml; m_mtx.m[2][0] = rpl / rml;
m_mtx.m[3][0] = 0.0f; m_mtx.m[3][0] = 0.0f;
m_mtx.m[0][1] = 0.0f; m_mtx.m[0][1] = 0.0f;
m_mtx.m[1][1] = 2.f * m_persp.znear / tmb; m_mtx.m[1][1] = 2.f * m_persp.znear / tmb;
m_mtx.m[2][1] = tpb / tmb; m_mtx.m[2][1] = tpb / tmb;
m_mtx.m[3][1] = 0.0f; m_mtx.m[3][1] = 0.0f;
m_mtx.m[0][2] = 0.0f; m_mtx.m[0][2] = 0.0f;
m_mtx.m[1][2] = 0.0f; m_mtx.m[1][2] = 0.0f;
m_mtx.m[2][2] = -fpn / fmn; m_mtx.m[2][2] = -fpn / fmn;
m_mtx.m[3][2] = -2.f * m_persp.zfar * m_persp.znear / fmn; m_mtx.m[3][2] = -2.f * m_persp.zfar * m_persp.znear / fmn;
m_mtx.m[0][3] = 0.0f; m_mtx.m[0][3] = 0.0f;
m_mtx.m[1][3] = 0.0f; m_mtx.m[1][3] = 0.0f;
m_mtx.m[2][3] = -1.0f; m_mtx.m[2][3] = -1.0f;
m_mtx.m[3][3] = 0.0f; m_mtx.m[3][3] = 0.0f;
} }
} }
} }

View File

@ -1,399 +1,330 @@
#include "zeus/CQuaternion.hpp" #include "zeus/CQuaternion.hpp"
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{
const CQuaternion CQuaternion::skNoRotation; const CQuaternion CQuaternion::skNoRotation;
CQuaternion::CQuaternion(const CMatrix3f& mat) CQuaternion::CQuaternion(const CMatrix3f& mat) {
{ float trace = mat[0][0] + mat[1][1] + mat[2][2];
float trace = mat[0][0] + mat[1][1] + mat[2][2]; if (trace >= 0.f) {
if (trace >= 0.f) float st = std::sqrt(trace + 1.0f);
{ float s = 0.5f / st;
float st = std::sqrt(trace + 1.0f); w() = 0.5f * st;
float s = 0.5f / st; x() = (mat[1][2] - mat[2][1]) * s;
w = 0.5f * st; y() = (mat[2][0] - mat[0][2]) * s;
x = (mat[1][2] - mat[2][1]) * s; z() = (mat[0][1] - mat[1][0]) * s;
y = (mat[2][0] - mat[0][2]) * s; } else {
z = (mat[0][1] - mat[1][0]) * s; int idx = 0;
} if (mat[1][1] > mat[0][0]) {
else idx = 1;
{ if (mat[2][2] > mat[1][1])
int idx = 0; idx = 2;
if (mat[1][1] > mat[0][0]) } else if (mat[2][2] > mat[0][0]) {
{ idx = 2;
idx = 1;
if (mat[2][2] > mat[1][1])
idx = 2;
}
else if (mat[2][2] > mat[0][0])
{
idx = 2;
}
switch (idx)
{
case 0:
{
float st = std::sqrt(mat[0][0] - (mat[1][1] + mat[2][2]) + 1.f);
float s = 0.5f / st;
w = (mat[1][2] - mat[2][1]) * s;
x = 0.5f * st;
y = (mat[1][0] + mat[0][1]) * s;
z = (mat[2][0] + mat[0][2]) * s;
break;
}
case 1:
{
float st = std::sqrt(mat[1][1] - (mat[2][2] + mat[0][0]) + 1.f);
float s = 0.5f / st;
w = (mat[2][0] - mat[0][2]) * s;
x = (mat[1][0] + mat[0][1]) * s;
y = 0.5f * st;
z = (mat[2][1] + mat[1][2]) * s;
break;
}
case 2:
{
float st = std::sqrt(mat[2][2] - (mat[0][0] + mat[1][1]) + 1.f);
float s = 0.5f / st;
w = (mat[0][1] - mat[1][0]) * s;
x = (mat[2][0] + mat[0][2]) * s;
y = (mat[2][1] + mat[1][2]) * s;
z = 0.5f * st;
break;
}
default:
w = 0.f;
x = 0.f;
y = 0.f;
z = 0.f;
break;
}
}
}
void CQuaternion::fromVector3f(const CVector3f& vec)
{
float cosX = std::cos(0.5f * vec.x);
float cosY = std::cos(0.5f * vec.y);
float cosZ = std::cos(0.5f * vec.z);
float sinX = std::sin(0.5f * vec.x);
float sinY = std::sin(0.5f * vec.y);
float sinZ = std::sin(0.5f * vec.z);
w = cosZ * cosY * cosX + sinZ * sinY * sinX;
x = cosZ * cosY * sinX - sinZ * sinY * cosX;
y = cosZ * sinY * cosX + sinZ * cosY * sinX;
z = sinZ * cosY * cosX - cosZ * sinY * sinX;
}
CQuaternion& CQuaternion::operator=(const CQuaternion& q)
{
#if __SSE__
mVec128 = q.mVec128;
#else
w = q.w;
x = q.x;
y = q.y;
z = q.z;
#endif
return *this;
}
CQuaternion CQuaternion::operator+(const CQuaternion& q) const { return CQuaternion(w + q.w, x + q.x, y + q.y, z + q.z); }
CQuaternion CQuaternion::operator-(const CQuaternion& q) const { return CQuaternion(w - q.w, x - q.x, y - q.y, z - q.z); }
CQuaternion CQuaternion::operator*(const CQuaternion& q) const
{
return CQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
}
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const
{
return CNUQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
}
CQuaternion CQuaternion::operator/(const CQuaternion& q) const
{
CQuaternion p(q);
p.invert();
return *this * p;
}
CQuaternion CQuaternion::operator*(float scale) const { return CQuaternion(w * scale, x * scale, y * scale, z * scale); }
CNUQuaternion CNUQuaternion::operator*(float scale) const { return CNUQuaternion(w * scale, x * scale, y * scale, z * scale); }
CQuaternion CQuaternion::operator/(float scale) const { return CQuaternion(w / scale, x / scale, y / scale, z / scale); }
CQuaternion CQuaternion::operator-() const { return CQuaternion(-w, -x, -y, -z); }
const CQuaternion& CQuaternion::operator+=(const CQuaternion& q)
{
w += q.w;
x += q.x;
y += q.y;
z += q.z;
return *this;
}
const CNUQuaternion& CNUQuaternion::operator+=(const CNUQuaternion& q)
{
w += q.w;
x += q.x;
y += q.y;
z += q.z;
return *this;
}
const CQuaternion& CQuaternion::operator-=(const CQuaternion& q)
{
w -= q.w;
x -= q.x;
y -= q.y;
z -= q.z;
return *this;
}
const CQuaternion& CQuaternion::operator*=(const CQuaternion& q)
{
CQuaternion orig = *this;
w = orig.w * q.w - CVector3f(orig.x, orig.y, orig.z).dot({q.x, q.y, q.z});
x = orig.y * q.z - orig.z * q.y + orig.w * q.x + orig.x * q.w;
y = orig.z * q.x - orig.x * q.z + orig.w * q.y + orig.y * q.w;
z = orig.x * q.y - orig.y * q.x + orig.w * q.z + orig.z * q.w;
return *this;
}
const CQuaternion& CQuaternion::operator*=(float scale)
{
w *= scale;
x *= scale;
y *= scale;
z *= scale;
return *this;
}
const CQuaternion& CQuaternion::operator/=(float scale)
{
w /= scale;
x /= scale;
y /= scale;
z /= scale;
return *this;
}
void CQuaternion::invert()
{
x = -x;
y = -y;
z = -z;
}
CQuaternion CQuaternion::inverse() const { return CQuaternion(w, -x, -y, -z); }
CQuaternion CQuaternion::log() const
{
float a = std::acos(w);
float sina = std::sin(a);
CQuaternion ret;
ret.w = 0.f;
if (sina > 0.f)
{
ret.x = a * x / sina;
ret.y = a * y / sina;
ret.z = a * z / sina;
}
else
{
ret.x = 0.f;
ret.y = 0.f;
ret.z = 0.f;
} }
return ret; switch (idx) {
case 0: {
float st = std::sqrt(mat[0][0] - (mat[1][1] + mat[2][2]) + 1.f);
float s = 0.5f / st;
w() = (mat[1][2] - mat[2][1]) * s;
x() = 0.5f * st;
y() = (mat[1][0] + mat[0][1]) * s;
z() = (mat[2][0] + mat[0][2]) * s;
break;
}
case 1: {
float st = std::sqrt(mat[1][1] - (mat[2][2] + mat[0][0]) + 1.f);
float s = 0.5f / st;
w() = (mat[2][0] - mat[0][2]) * s;
x() = (mat[1][0] + mat[0][1]) * s;
y() = 0.5f * st;
z() = (mat[2][1] + mat[1][2]) * s;
break;
}
case 2: {
float st = std::sqrt(mat[2][2] - (mat[0][0] + mat[1][1]) + 1.f);
float s = 0.5f / st;
w() = (mat[0][1] - mat[1][0]) * s;
x() = (mat[2][0] + mat[0][2]) * s;
y() = (mat[2][1] + mat[1][2]) * s;
z() = 0.5f * st;
break;
}
default:
w() = 0.f;
x() = 0.f;
y() = 0.f;
z() = 0.f;
break;
}
}
} }
CQuaternion CQuaternion::exp() const void CQuaternion::fromVector3f(const CVector3f& vec) {
{ float cosX = std::cos(0.5f * vec.x());
float a = (CVector3f(x, y, z).magnitude()); float cosY = std::cos(0.5f * vec.y());
float sina = std::sin(a); float cosZ = std::cos(0.5f * vec.z());
float cosa = std::cos(a);
CQuaternion ret;
ret.w = cosa; float sinX = std::sin(0.5f * vec.x());
if (a > 0.f) float sinY = std::sin(0.5f * vec.y());
{ float sinZ = std::sin(0.5f * vec.z());
ret.x = sina * x / a;
ret.y = sina * y / a;
ret.z = sina * z / a;
}
else
{
ret.x = 0.f;
ret.y = 0.f;
ret.z = 0.f;
}
return ret; simd_floats f;
f[0] = cosZ * cosY * cosX + sinZ * sinY * sinX;
f[1] = cosZ * cosY * sinX - sinZ * sinY * cosX;
f[2] = cosZ * sinY * cosX + sinZ * cosY * sinX;
f[3] = sinZ * cosY * cosX - cosZ * sinY * sinX;
mSimd.copy_from(f);
}
CQuaternion& CQuaternion::operator=(const CQuaternion& q) {
mSimd = q.mSimd;
return *this;
}
CQuaternion CQuaternion::operator+(const CQuaternion& q) const {
return mSimd + q.mSimd;
}
CQuaternion CQuaternion::operator-(const CQuaternion& q) const {
return mSimd - q.mSimd;
}
CQuaternion CQuaternion::operator*(const CQuaternion& q) const {
return CQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
y() * q.z() - z() * q.y() + w() * q.x() + x() * q.w(),
z() * q.x() - x() * q.z() + w() * q.y() + y() * q.w(),
x() * q.y() - y() * q.x() + w() * q.z() + z() * q.w());
}
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const {
return CNUQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
y() * q.z() - z() * q.y() + w() * q.x() + x() * q.w(),
z() * q.x() - x() * q.z() + w() * q.y() + y() * q.w(),
x() * q.y() - y() * q.x() + w() * q.z() + z() * q.w());
}
CQuaternion CQuaternion::operator/(const CQuaternion& q) const {
CQuaternion p(q);
p.invert();
return *this * p;
}
CQuaternion CQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
}
CNUQuaternion CNUQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
}
CQuaternion CQuaternion::operator/(float scale) const {
return mSimd / simd<float>(scale);
}
CQuaternion CQuaternion::operator-() const { return -mSimd; }
const CQuaternion& CQuaternion::operator+=(const CQuaternion& q) {
mSimd += q.mSimd;
return *this;
}
const CNUQuaternion& CNUQuaternion::operator+=(const CNUQuaternion& q) {
mSimd += q.mSimd;
return *this;
}
const CQuaternion& CQuaternion::operator-=(const CQuaternion& q) {
mSimd -= q.mSimd;
return *this;
}
const CQuaternion& CQuaternion::operator*=(const CQuaternion& q) {
CQuaternion orig = *this;
w() = orig.w() * q.w() - CVector3f(orig.x(), orig.y(), orig.z()).dot({q.x(), q.y(), q.z()});
x() = orig.y() * q.z() - orig.z() * q.y() + orig.w() * q.x() + orig.x() * q.w();
y() = orig.z() * q.x() - orig.x() * q.z() + orig.w() * q.y() + orig.y() * q.w();
z() = orig.x() * q.y() - orig.y() * q.x() + orig.w() * q.z() + orig.z() * q.w();
return *this;
}
const CQuaternion& CQuaternion::operator*=(float scale) {
mSimd *= simd<float>(scale);
return *this;
}
const CQuaternion& CQuaternion::operator/=(float scale) {
mSimd /= simd<float>(scale);
return *this;
}
static const simd<float> InvertQuat(1.f, -1.f, -1.f, -1.f);
void CQuaternion::invert() {
mSimd *= InvertQuat;
}
CQuaternion CQuaternion::inverse() const { return mSimd * InvertQuat; }
CQuaternion CQuaternion::log() const {
float a = std::acos(w());
float sina = std::sin(a);
CQuaternion ret;
if (sina > 0.f)
ret = a * *this / sina;
else
ret = simd<float>(0.f);
ret.w() = 0.f;
return ret;
}
CQuaternion CQuaternion::exp() const {
float a = (CVector3f(mSimd.shuffle<1, 2, 3, 3>()).magnitude());
float sina = std::sin(a);
float cosa = std::cos(a);
CQuaternion ret;
if (a > 0.f)
ret = sina * *this / a;
else
ret = simd<float>(0.f);
ret.w() = cosa;
return ret;
} }
CQuaternion CQuaternion::lerp(const CQuaternion& a, const CQuaternion& b, double t) { return (a + t * (b - a)); } CQuaternion CQuaternion::lerp(const CQuaternion& a, const CQuaternion& b, double t) { return (a + t * (b - a)); }
CQuaternion CQuaternion::nlerp(const CQuaternion& a, const CQuaternion& b, double t) { return lerp(a, b, t).normalized(); } CQuaternion CQuaternion::nlerp(const CQuaternion& a, const CQuaternion& b, double t) {
return lerp(a, b, t).normalized();
CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, double t)
{
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
CQuaternion ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);
const double s1 = std::sin(sign * t * theta);
const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0 - t) * theta);
ret.x = float((a.x * s0 + b.x * s1) * d);
ret.y = float((a.y * s0 + b.y * s1) * d);
ret.z = float((a.z * s0 + b.z * s1) * d);
ret.w = float((a.w * s0 + b.w * s1) * d);
return ret;
}
return a;
} }
CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1) CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, double t) {
{ if (t <= 0.0f)
CVector3f v0N = v0; return a;
CVector3f v1N = v1; if (t >= 1.0f)
return b;
if (!v0N.isZero()) CQuaternion ret;
v0N.normalize();
if (!v1N.isZero())
v1N.normalize();
CVector3f cross = v0N.cross(v1N); float mag = std::sqrt(a.dot(a) * b.dot(b));
if (cross.magSquared() < 0.001f) float prod = a.dot(b) / mag;
{
if (v0N.dot(v1N) > 0.f) if (std::fabs(prod) < 1.0f) {
return CQuaternion::skNoRotation; const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
if (cross.canBeNormalized())
return CQuaternion(0.0f, cross.normalized()); const double theta = std::acos(sign * prod);
return CQuaternion::skNoRotation; const double s1 = std::sin(sign * t * theta);
} const double d = 1.0 / std::sin(theta);
else const double s0 = std::sin((1.0 - t) * theta);
{
float w = std::sqrt((1.f + zeus::clamp(-1.f, v0N.dot(v1N), 1.f)) * 2.f); ret = (a * s0 + b * s1) * d;
return CQuaternion(0.5f * w, cross * (1.f / w));
} return ret;
}
return a;
}
CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1) {
CVector3f v0N = v0;
CVector3f v1N = v1;
if (!v0N.isZero())
v0N.normalize();
if (!v1N.isZero())
v1N.normalize();
CVector3f cross = v0N.cross(v1N);
if (cross.magSquared() < 0.001f) {
if (v0N.dot(v1N) > 0.f)
return CQuaternion::skNoRotation;
if (cross.canBeNormalized())
return CQuaternion(0.0f, cross.normalized());
return CQuaternion::skNoRotation;
} else {
float w = std::sqrt((1.f + zeus::clamp(-1.f, v0N.dot(v1N), 1.f)) * 2.f);
return CQuaternion(0.5f * w, cross * (1.f / w));
}
} }
CQuaternion CQuaternion::clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1, CQuaternion CQuaternion::clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle) const zeus::CRelAngle& angle) {
{ CQuaternion arc = shortestRotationArc(v0, v1);
CQuaternion arc = shortestRotationArc(v0, v1); if (angle >= 2.f * std::acos(arc.w()))
if (angle >= 2.f * std::acos(arc.w)) return arc;
return arc;
return fromAxisAngle(arc.getImaginary(), angle); return fromAxisAngle(arc.getImaginary(), angle);
} }
CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b, double t) CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b, double t) {
{ return zeus::CQuaternion::slerp((b.dot(a) >= 0.f) ? a : a.buildEquivalent(), b, t);
return zeus::CQuaternion::slerp((b.dot(a) >= 0.f) ? a : a.buildEquivalent(), b, t);
} }
CQuaternion operator+(float lhs, const CQuaternion& rhs) CQuaternion operator+(float lhs, const CQuaternion& rhs) {
{ return simd<float>(lhs) + rhs.mSimd;
return CQuaternion(lhs + rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
} }
CQuaternion operator-(float lhs, const CQuaternion& rhs) CQuaternion operator-(float lhs, const CQuaternion& rhs) {
{ return simd<float>(lhs) - rhs.mSimd;
return CQuaternion(lhs - rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
} }
CQuaternion operator*(float lhs, const CQuaternion& rhs) CQuaternion operator*(float lhs, const CQuaternion& rhs) {
{ return simd<float>(lhs) * rhs.mSimd;
return CQuaternion(lhs * rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
} }
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs) CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs) {
{ return simd<float>(lhs) * rhs.mSimd;
return CNUQuaternion(lhs * rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
} }
CQuaternion CQuaternion::buildEquivalent() const CQuaternion CQuaternion::buildEquivalent() const {
{ float tmp = std::acos(clamp(-1.f, w(), 1.f)) * 2.f;
float tmp = std::acos(clamp(-1.f, w, 1.f)) * 2.0; if (std::fabs(tmp) < 1.0e-7)
if (std::fabs(tmp) < 1.0e-7) return {-1.f, 0.f, 0.f, 0.f};
return {-1.f, 0.f, 0.f, 0.f}; else
else return CQuaternion::fromAxisAngle(CUnitVector3f(mSimd.shuffle<1, 2, 3, 3>()), tmp + 2.0 * M_PI);
return CQuaternion::fromAxisAngle(CUnitVector3f(x, y, z), tmp + 2.0 * M_PI);
} }
CRelAngle CQuaternion::angleFrom(const zeus::CQuaternion& other) CRelAngle CQuaternion::angleFrom(const zeus::CQuaternion& other) {
{ return std::acos(zeus::clamp(-1.f, dot(other), 1.f));
return std::acos(zeus::clamp(-1.f, dot(other), 1.f));
} }
CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng) CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng) {
{ CQuaternion q = skNoRotation;
CQuaternion q = skNoRotation; zeus::CVector3f destNoZ = dest;
zeus::CVector3f destNoZ = dest; zeus::CVector3f sourceNoZ = source;
zeus::CVector3f sourceNoZ = source; destNoZ.z() = 0.f;
destNoZ.z = 0.f; sourceNoZ.z() = 0.f;
sourceNoZ.z = 0.f; zeus::CVector3f tmp;
zeus::CVector3f tmp; if (sourceNoZ.magSquared() > 0.0001f && destNoZ.magSquared() > 0.0001f) {
if (sourceNoZ.magSquared() > 0.0001f && destNoZ.magSquared() > 0.0001f) sourceNoZ.normalize();
{ destNoZ.normalize();
sourceNoZ.normalize();
destNoZ.normalize();
float angleBetween = float angleBetween =
normalize_angle(std::atan2(destNoZ.x, destNoZ.y) - std::atan2(sourceNoZ.x, sourceNoZ.y)); normalize_angle(std::atan2(destNoZ.x(), destNoZ.y()) - std::atan2(sourceNoZ.x(), sourceNoZ.y()));
float realAngle = zeus::clamp(-maxAng.asRadians(), angleBetween, maxAng.asRadians()); float realAngle = zeus::clamp(-maxAng.asRadians(), angleBetween, maxAng.asRadians());
CQuaternion tmpQ; CQuaternion tmpQ;
tmpQ.rotateZ(-realAngle); tmpQ.rotateZ(-realAngle);
q = tmpQ; q = tmpQ;
tmp = q.transform(sourceNoZ); tmp = q.transform(sourceNoZ);
} } else if (sourceNoZ.magSquared() > 0.0001f)
else if (sourceNoZ.magSquared() > 0.0001f) tmp = sourceNoZ.normalized();
tmp = sourceNoZ.normalized(); else if (destNoZ.magSquared() > 0.0001f)
else if (destNoZ.magSquared() > 0.0001f) tmp = destNoZ.normalized();
tmp = destNoZ.normalized(); else
else return skNoRotation;
return skNoRotation;
float realAngle = float realAngle =
zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z) - std::acos(source.z)), maxAng.asRadians()); zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z()) - std::acos(source.z())), maxAng.asRadians());
return CQuaternion::fromAxisAngle(tmp.cross(CVector3f::skUp), -realAngle) * q; return CQuaternion::fromAxisAngle(tmp.cross(CVector3f::skUp), -realAngle) * q;
} }
} }

View File

@ -1,69 +1,65 @@
#include "zeus/CTransform.hpp" #include "zeus/CTransform.hpp"
namespace zeus namespace zeus {
{ CTransform CTransformFromEditorEuler(const CVector3f& eulerVec) {
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec) CTransform result;
{ double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
CTransform result;
double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
ti = eulerVec[0]; ti = eulerVec[0];
tj = eulerVec[1]; tj = eulerVec[1];
th = eulerVec[2]; th = eulerVec[2];
ci = std::cos(ti); ci = std::cos(ti);
cj = std::cos(tj); cj = std::cos(tj);
ch = std::cos(th); ch = std::cos(th);
si = std::sin(ti); si = std::sin(ti);
sj = std::sin(tj); sj = std::sin(tj);
sh = std::sin(th); sh = std::sin(th);
cc = ci * ch; cc = ci * ch;
cs = ci * sh; cs = ci * sh;
sc = si * ch; sc = si * ch;
ss = si * sh; ss = si * sh;
result.basis.m[0][0] = float(cj * ch); result.basis.m[0][0] = float(cj * ch);
result.basis.m[1][0] = float(sj * sc - cs); result.basis.m[1][0] = float(sj * sc - cs);
result.basis.m[2][0] = float(sj * cc + ss); result.basis.m[2][0] = float(sj * cc + ss);
result.basis.m[0][1] = float(cj * sh); result.basis.m[0][1] = float(cj * sh);
result.basis.m[1][1] = float(sj * ss + cc); result.basis.m[1][1] = float(sj * ss + cc);
result.basis.m[2][1] = float(sj * cs - sc); result.basis.m[2][1] = float(sj * cs - sc);
result.basis.m[0][2] = float(-sj); result.basis.m[0][2] = float(-sj);
result.basis.m[1][2] = float(cj * si); result.basis.m[1][2] = float(cj * si);
result.basis.m[2][2] = float(cj * ci); result.basis.m[2][2] = float(cj * ci);
return result; return result;
} }
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle) CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle) {
{ CTransform result;
CTransform result; CVector3f axisN = axis.normalized();
CVector3f axisN = axis.normalized();
float c = std::cos(angle); float c = std::cos(angle);
float s = std::sin(angle); float s = std::sin(angle);
float t = 1.f - c; float t = 1.f - c;
result.basis.m[0][0] = t * axisN.v[0] * axisN.v[0] + c; result.basis.m[0][0] = t * axisN[0] * axisN[0] + c;
result.basis.m[1][0] = t * axisN.v[0] * axisN.v[1] - axisN.v[2] * s; result.basis.m[1][0] = t * axisN[0] * axisN[1] - axisN[2] * s;
result.basis.m[2][0] = t * axisN.v[0] * axisN.v[2] + axisN.v[1] * s; result.basis.m[2][0] = t * axisN[0] * axisN[2] + axisN[1] * s;
result.basis.m[0][1] = t * axisN.v[0] * axisN.v[1] + axisN.v[2] * s; result.basis.m[0][1] = t * axisN[0] * axisN[1] + axisN[2] * s;
result.basis.m[1][1] = t * axisN.v[1] * axisN.v[1] + c; result.basis.m[1][1] = t * axisN[1] * axisN[1] + c;
result.basis.m[2][1] = t * axisN.v[1] * axisN.v[2] - axisN.v[0] * s; result.basis.m[2][1] = t * axisN[1] * axisN[2] - axisN[0] * s;
result.basis.m[0][2] = t * axisN.v[0] * axisN.v[2] - axisN.v[1] * s; result.basis.m[0][2] = t * axisN[0] * axisN[2] - axisN[1] * s;
result.basis.m[1][2] = t * axisN.v[1] * axisN.v[2] + axisN.v[0] * s; result.basis.m[1][2] = t * axisN[1] * axisN[2] + axisN[0] * s;
result.basis.m[2][2] = t * axisN.v[2] * axisN.v[2] + c; result.basis.m[2][2] = t * axisN[2] * axisN[2] + c;
return result; return result;
} }
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin) CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin) {
{ CTransform ret = CTransformFromEditorEuler(eulerVec);
CTransform ret = CTransformFromEditorEuler(eulerVec); ret.origin = origin;
ret.origin = origin; return ret;
return ret;
} }
} }

View File

@ -4,50 +4,46 @@
#include <cassert> #include <cassert>
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{
const CVector2f CVector2f::skOne = CVector2f(1.0); const CVector2f CVector2f::skOne = CVector2f(1.0);
const CVector2f CVector2f::skNegOne = CVector2f(-1.0); const CVector2f CVector2f::skNegOne = CVector2f(-1.0);
const CVector2f CVector2f::skZero(0.f, 0.f); const CVector2f CVector2f::skZero(0.f, 0.f);
float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b) float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b) {
{ float mag1 = a.magnitude();
float mag1 = a.magnitude(); float mag2 = b.magnitude();
float mag2 = b.magnitude();
if (!mag1 || !mag2) if (!mag1 || !mag2)
return 0; return 0;
float dot = a.dot(b); float dot = a.dot(b);
float theta = std::acos(dot / (mag1 * mag2)); float theta = std::acos(dot / (mag1 * mag2));
return theta; return theta;
} }
CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t) CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t) {
{ if (t <= 0.0f)
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
CVector2f ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);
const double s1 = std::sin(sign * t * theta);
const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0f - t) * theta);
ret = (a * s0 + b * s1) * d;
return ret;
}
return a; return a;
if (t >= 1.0f)
return b;
CVector2f ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f) {
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);
const double s1 = std::sin(sign * t * theta);
const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0f - t) * theta);
ret = (a * s0 + b * s1) * d;
return ret;
}
return a;
} }
} }

View File

@ -5,8 +5,7 @@
#include <cassert> #include <cassert>
#include "zeus/Math.hpp" #include "zeus/Math.hpp"
namespace zeus namespace zeus {
{
const CVector3f CVector3f::skOne(1.f); const CVector3f CVector3f::skOne(1.f);
const CVector3f CVector3f::skNegOne(-1.f); const CVector3f CVector3f::skNegOne(-1.f);
const CVector3f CVector3f::skZero; const CVector3f CVector3f::skZero;
@ -20,59 +19,44 @@ const CVector3f CVector3f::skRadToDegVec(180.0f / M_PIF);
const CVector3f CVector3f::skDegToRadVec(M_PIF / 180.0f); const CVector3f CVector3f::skDegToRadVec(M_PIF / 180.0f);
const CVector3d CVector3d::skZero(0.0, 0.0, 0.0); const CVector3d CVector3d::skZero(0.0, 0.0, 0.0);
CVector3f::CVector3f(const CVector3d& vec) CVector3f::CVector3f(const CVector3d& vec) : mSimd(vec.mSimd) {}
{
#if __SSE__ float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b) {
mVec128 = _mm_cvtpd_ps(vec.mVec128[0]); float mag1 = a.magnitude();
v[2] = vec.v[2]; float mag2 = b.magnitude();
#else
v[0] = vec.v[0]; if (!mag1 || !mag2)
v[1] = vec.v[1]; return 0.f;
v[2] = vec.v[2];
#endif float dot = a.dot(b);
float theta = std::acos(dot / (mag1 * mag2));
return theta;
} }
float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b) CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t) {
{ if (t <= 0.0f)
float mag1 = a.magnitude();
float mag2 = b.magnitude();
if (!mag1 || !mag2)
return 0.f;
float dot = a.dot(b);
float theta = std::acos(dot / (mag1 * mag2));
return theta;
}
CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t)
{
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
CVector3f ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = acos(sign * prod);
const double s1 = sin(sign * t * theta);
const double d = 1.0 / sin(theta);
const double s0 = sin((1.0 - t) * theta);
ret.x = (float)(a.x * s0 + b.x * s1) * d;
ret.y = (float)(a.y * s0 + b.y * s1) * d;
ret.z = (float)(a.z * s0 + b.z * s1) * d;
return ret;
}
return a; return a;
if (t >= 1.0f)
return b;
CVector3f ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f) {
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = acos(sign * prod);
const double s1 = sin(sign * t * theta);
const double d = 1.0 / sin(theta);
const double s0 = sin((1.0 - t) * theta);
ret = (a * s0 + b * s1) * d;
return ret;
}
return a;
} }
} }

View File

@ -1,19 +1,13 @@
#include "zeus/CVector4f.hpp" #include "zeus/CVector4f.hpp"
#include "zeus/CColor.hpp" #include "zeus/CColor.hpp"
namespace zeus namespace zeus {
{
const CVector4f CVector4f::skZero(0.f, 0.f, 0.f, 0.f); const CVector4f CVector4f::skZero(0.f, 0.f, 0.f, 0.f);
CVector4f::CVector4f(const zeus::CColor& other) : x(other.r), y(other.g), z(other.b), w(other.a) {} CVector4f::CVector4f(const zeus::CColor& other) : mSimd(other.mSimd) {}
CVector4f& CVector4f::operator=(const CColor& other) CVector4f& CVector4f::operator=(const CColor& other) {
{ mSimd = other.mSimd;
x = other.r; return *this;
y = other.g;
z = other.b;
w = other.a;
return *this;
} }
} }

View File

@ -2,312 +2,292 @@
#include "zeus/CTransform.hpp" #include "zeus/CTransform.hpp"
#include "zeus/CVector3f.hpp" #include "zeus/CVector3f.hpp"
#include "zeus/CVector2f.hpp" #include "zeus/CVector2f.hpp"
#if _WIN32 #if _WIN32
#include <intrin.h> #include <intrin.h>
#else #else
#include <cpuid.h> #include <cpuid.h>
#endif #endif
namespace zeus namespace zeus {
{
static bool isCPUInit = false; static bool isCPUInit = false;
static CPUInfo g_cpuFeatures = {}; static CPUInfo g_cpuFeatures = {};
static CPUInfo g_missingFeatures = {}; static CPUInfo g_missingFeatures = {};
void getCpuInfo(int eax, int regs[4]) void getCpuInfo(int eax, int regs[4]) {
{
#if !GEKKO #if !GEKKO
#if _WIN32 #if _WIN32
__cpuid(regs, eax); __cpuid(regs, eax);
#else #else
__cpuid(eax, regs[0], regs[1], regs[2], regs[3]); __cpuid(eax, regs[0], regs[1], regs[2], regs[3]);
#endif #endif
#endif #endif
} }
void getCpuInfoEx(int eax, int ecx, int regs[4]) void getCpuInfoEx(int eax, int ecx, int regs[4]) {
{
#if !GEKKO #if !GEKKO
#if _WIN32 #if _WIN32
__cpuidex(regs, eax, ecx); __cpuidex(regs, eax, ecx);
#else #else
__cpuid_count(eax, ecx, regs[0], regs[1], regs[2], regs[3]); __cpuid_count(eax, ecx, regs[0], regs[1], regs[2], regs[3]);
#endif #endif
#endif #endif
} }
void detectCPU() void detectCPU() {
{
#if !GEKKO #if !GEKKO
if (isCPUInit) if (isCPUInit)
return; return;
int regs[4]; int regs[4];
getCpuInfo(0, regs); getCpuInfo(0, regs);
int highestFeature = regs[0]; int highestFeature = regs[0];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor) = regs[1]; *reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor) = regs[1];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 4) = regs[3]; *reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 4) = regs[3];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 8) = regs[2]; *reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 8) = regs[2];
getCpuInfo(0x80000000, regs); getCpuInfo(0x80000000, regs);
if (regs[0] >= 0x80000004) if (regs[0] >= 0x80000004) {
{ for (unsigned int i = 0x80000002; i <= 0x80000004; i++) {
for (unsigned int i = 0x80000002; i <= 0x80000004; i++) getCpuInfo(i, regs);
{ // Interpret CPU brand string and cache information.
getCpuInfo(i, regs); if (i == 0x80000002)
// Interpret CPU brand string and cache information. memcpy((char*) g_cpuFeatures.cpuBrand, regs, sizeof(regs));
if (i == 0x80000002) else if (i == 0x80000003)
memcpy((char*)g_cpuFeatures.cpuBrand, regs, sizeof(regs)); memcpy((char*) g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs));
else if (i == 0x80000003) else if (i == 0x80000004)
memcpy((char*)g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs)); memcpy((char*) g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
else if (i == 0x80000004)
memcpy((char*)g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
}
} }
}
if (highestFeature >= 1) if (highestFeature >= 1) {
{ getCpuInfo(1, regs);
getCpuInfo(1, regs); memset((bool*) &g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1); memset((bool*) &g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1); memset((bool*) &g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1); memset((bool*) &g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1); memset((bool*) &g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1);
memset((bool*)&g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1); memset((bool*) &g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1); memset((bool*) &g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1); memset((bool*) &g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1);
memset((bool*)&g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1); }
}
if (highestFeature >= 7) if (highestFeature >= 7) {
{ getCpuInfoEx(7, 0, regs);
getCpuInfoEx(7, 0, regs); memset((bool*) &g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1);
memset((bool*)&g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1); }
}
isCPUInit = true; isCPUInit = true;
#endif #endif
} }
const CPUInfo& cpuFeatures() { detectCPU(); return g_cpuFeatures; } const CPUInfo& cpuFeatures() {
detectCPU();
return g_cpuFeatures;
}
std::pair<bool, const CPUInfo&> validateCPU() std::pair<bool, const CPUInfo&> validateCPU() {
{ detectCPU();
detectCPU(); bool ret = true;
bool ret = true;
#if __AVX2__ #if __AVX2__
if (!g_cpuFeatures.AVX2) if (!g_cpuFeatures.AVX2) {
{ *(bool*) &g_missingFeatures.AVX2 = true;
*(bool*) &g_missingFeatures.AVX2 = true; ret = false;
ret = false; }
}
#endif #endif
#if __AVX__ #if __AVX__
if (!g_cpuFeatures.AVX) if (!g_cpuFeatures.AVX) {
{ *(bool*) &g_missingFeatures.AVX = true;
*(bool*) &g_missingFeatures.AVX = true; ret = false;
ret = false; }
}
#endif #endif
#if __SSE4A__ #if __SSE4A__
if (!g_cpuFeatures.SSE4a) if (!g_cpuFeatures.SSE4a)
{ {
*(bool*) &g_missingFeatures.SSE4a = true; *(bool*) &g_missingFeatures.SSE4a = true;
ret = false; ret = false;
} }
#endif #endif
#if __SSE4_2__ #if __SSE4_2__
if (!g_cpuFeatures.SSE42) if (!g_cpuFeatures.SSE42) {
{ *(bool*) &g_missingFeatures.SSE42 = true;
*(bool*) &g_missingFeatures.SSE42 = true; ret = false;
ret = false; }
}
#endif #endif
#if __SSE4_1__ #if __SSE4_1__
if (!g_cpuFeatures.SSE41) if (!g_cpuFeatures.SSE41) {
{ *(bool*) &g_missingFeatures.SSE41 = true;
*(bool*) &g_missingFeatures.SSE41 = true; ret = false;
ret = false; }
}
#endif #endif
#if __SSSE3__ #if __SSSE3__
if (!g_cpuFeatures.SSSE3) if (!g_cpuFeatures.SSSE3) {
{ *(bool*) &g_missingFeatures.SSSE3 = true;
*(bool*) &g_missingFeatures.SSSE3 = true; ret = false;
ret = false; }
}
#endif #endif
#if __SSE3__ #if __SSE3__
if (!g_cpuFeatures.SSE3) if (!g_cpuFeatures.SSE3) {
{ *(bool*) &g_missingFeatures.SSE3 = true;
*(bool*) &g_missingFeatures.SSE3 = true; ret = false;
ret = false; }
}
#endif #endif
#if __SSE2__ #if __SSE2__
if (!g_cpuFeatures.SSE2) if (!g_cpuFeatures.SSE2) {
{ *(bool*) &g_missingFeatures.SSE2 = true;
*(bool*) &g_missingFeatures.SSE2 = true; ret = false;
ret = false; }
}
#endif #endif
#if __SSE__ #if __SSE__
if (!g_cpuFeatures.SSE1) if (!g_cpuFeatures.SSE1) {
{ *(bool*) &g_missingFeatures.SSE1 = true;
*(bool*) &g_missingFeatures.SSE1 = true; ret = false;
ret = false; }
}
#endif #endif
return {ret, g_missingFeatures}; return {ret, g_missingFeatures};
} }
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up) CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up) {
{ CVector3f vLook, vRight, vUp;
CVector3f vLook, vRight, vUp;
vLook = lookPos - pos; vLook = lookPos - pos;
if (vLook.magnitude() <= FLT_EPSILON) if (vLook.magnitude() <= FLT_EPSILON)
vLook = {0.f, 1.f, 0.f}; vLook = {0.f, 1.f, 0.f};
else else
vLook.normalize(); vLook.normalize();
vUp = up - vLook * clamp(-1.f, up.dot(vLook), 1.f); vUp = up - vLook * clamp(-1.f, up.dot(vLook), 1.f);
if (vUp.magnitude() <= FLT_EPSILON) {
vUp = CVector3f(0.f, 0.f, 1.f) - vLook * vLook.z();
if (vUp.magnitude() <= FLT_EPSILON) if (vUp.magnitude() <= FLT_EPSILON)
{ vUp = CVector3f(0.f, 1.f, 0.f) - vLook * vLook.y();
vUp = CVector3f(0.f, 0.f, 1.f) - vLook * vLook.z; }
if (vUp.magnitude() <= FLT_EPSILON) vUp.normalize();
vUp = CVector3f(0.f, 1.f, 0.f) - vLook * vLook.y; vRight = vLook.cross(vUp);
}
vUp.normalize();
vRight = vLook.cross(vUp);
CMatrix3f rmBasis(vRight, vLook, vUp); CMatrix3f rmBasis(vRight, vLook, vUp);
return CTransform(rmBasis, pos); return CTransform(rmBasis, pos);
} }
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b,
const CVector3f& c, const CVector3f& d, float t) const CVector3f& c, const CVector3f& d, float t) {
{ const float omt = 1.f - t;
const float omt = 1.f - t; return ((a * omt + b * t) * omt + (b * omt + c * t) * t) * omt +
return ((a * omt + b * t) * omt + (b * omt + c * t) * t) * omt + ((b * omt + c * t) * omt + (c * omt + d * t) * t) * t;
((b * omt + c * t) * omt + (c * omt + d * t) * t) * t;
} }
int floorPowerOfTwo(int x) int floorPowerOfTwo(int x) {
{ if (x == 0)
if (x == 0) return 0;
return 0; /*
/* * we want to ensure that we always get the previous power,
* we want to ensure that we always get the previous power, * but if we have values like 256, we'll always get the same value,
* but if we have values like 256, we'll always get the same value, * x-1 ensures that we always get the previous power.
* x-1 ensures that we always get the previous power. */
*/ x = (x - 1) | (x >> 1);
x = (x - 1) | (x >> 1); x = x | (x >> 2);
x = x | (x >> 2); x = x | (x >> 4);
x = x | (x >> 4); x = x | (x >> 8);
x = x | (x >> 8); x = x | (x >> 16);
x = x | (x >> 16); return x - (x >> 1);
return x - (x >> 1);
} }
int ceilingPowerOfTwo(int x) int ceilingPowerOfTwo(int x) {
{ if (x == 0)
if (x == 0) return 0;
return 0;
x--; x--;
x |= x >> 1; x |= x >> 1;
x |= x >> 2; x |= x >> 2;
x |= x >> 4; x |= x >> 4;
x |= x >> 8; x |= x >> 8;
x |= x >> 16; x |= x >> 16;
x++; x++;
return x; return x;
} }
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t) float getCatmullRomSplinePoint(float a, float b, float c, float d, float t) {
{ if (t <= 0.0f)
if (t <= 0.0f) return b;
return b; if (t >= 1.0f)
if (t >= 1.0f) return c;
return c;
const float t2 = t * t; const float t2 = t * t;
const float t3 = t2 * t; const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) + c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) + return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
d * (0.5f * t3 - 0.5f * t2)); c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
} }
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) CVector3f
{ getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
if (t <= 0.0f) if (t <= 0.0f)
return b; return b;
if (t >= 1.0f) if (t >= 1.0f)
return c; return c;
const float t2 = t * t; const float t2 = t * t;
const float t3 = t2 * t; const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) + c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) + return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
d * (0.5f * t3 - 0.5f * t2)); c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
} }
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) CVector3f
{ getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
if (t >= 0.0f) if (t >= 0.0f)
return b; return b;
if (t <= 1.0f) if (t <= 1.0f)
return c; return c;
CVector3f cb = c - b; CVector3f cb = c - b;
if (!cb.canBeNormalized()) if (!cb.canBeNormalized())
return b; return b;
CVector3f ab = a - b; CVector3f ab = a - b;
if (!ab.canBeNormalized()) if (!ab.canBeNormalized())
ab = CVector3f(0, 1, 0); ab = CVector3f(0, 1, 0);
CVector3f bVelocity = cb.normalized() - ab.normalized(); CVector3f bVelocity = cb.normalized() - ab.normalized();
if (bVelocity.canBeNormalized()) if (bVelocity.canBeNormalized())
bVelocity.normalize(); bVelocity.normalize();
CVector3f dc = d - c; CVector3f dc = d - c;
if (!dc.canBeNormalized()) if (!dc.canBeNormalized())
dc = CVector3f(0, 1, 0); dc = CVector3f(0, 1, 0);
CVector3f bc = -cb; CVector3f bc = -cb;
CVector3f cVelocity = dc.normalized() - bc.normalized(); CVector3f cVelocity = dc.normalized() - bc.normalized();
if (cVelocity.canBeNormalized()) if (cVelocity.canBeNormalized())
cVelocity.normalize(); cVelocity.normalize();
const float cbDistance = cb.magnitude(); const float cbDistance = cb.magnitude();
return zeus::getCatmullRomSplinePoint(b, c, bVelocity * cbDistance, cVelocity * cbDistance, t); return zeus::getCatmullRomSplinePoint(b, c, bVelocity * cbDistance, cVelocity * cbDistance, t);
} }
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary) CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary) {
{ return bary.x() * p0 + bary.y() * p1 + bary.z() * p2;
return bary.x * p0 + bary.y * p1 + bary.z * p2;
} }
bool close_enough(const CVector3f& a, const CVector3f &b, float epsilon) bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon) {
{ return std::fabs(a.x() - b.x()) < epsilon &&
if (std::fabs(a.x - b.x) < epsilon && std::fabs(a.y - b.y) < epsilon && std::fabs(a.z - b.z) < epsilon) std::fabs(a.y() - b.y()) < epsilon &&
return true; std::fabs(a.z() - b.z()) < epsilon;
return false;
} }
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon) bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon) {
{ return std::fabs(a.x() - b.x()) < epsilon && std::fabs(a.y() - b.y()) < epsilon;
if (std::fabs(a.x - b.x) < epsilon && std::fabs(a.y - b.y) < epsilon)
return true;
return false;
} }
template <> CVector3f min(const CVector3f& a, const CVector3f& b) template<>
{ CVector3f min(const CVector3f& a, const CVector3f& b) {
return {min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)}; return {min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z())};
} }
template <> CVector3f max(const CVector3f& a, const CVector3f& b) template<>
{ CVector3f max(const CVector3f& a, const CVector3f& b) {
return {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)}; return {max(a.x(), b.x()), max(a.y(), b.y()), max(a.z(), b.z())};
} }
} }

View File

@ -30,6 +30,9 @@ int main()
CAABox test2{{-100, -100, -100}, {100, 100, 100}}; CAABox test2{{-100, -100, -100}, {100, 100, 100}};
CAABox test3{{-50, -50, -50}, {50, 50, 50}}; CAABox test3{{-50, -50, -50}, {50, 50, 50}};
CAABox test4{{-50, -50, -105}, {50, 50, 105}}; CAABox test4{{-50, -50, -105}, {50, 50, 105}};
CVector2f point2(-90, 67);
CVector2f point3(-90, 67);
CVector3f point4 = point2 + point3;
CVector3f point(-90, 67, -105); CVector3f point(-90, 67, -105);
test.closestPointAlongVector(point); test.closestPointAlongVector(point);
CVector3d(100, -100, -200); CVector3d(100, -100, -200);
@ -72,7 +75,7 @@ int main()
ctest1.fromHSV(0, 255 / 255.f, .5); ctest1.fromHSV(0, 255 / 255.f, .5);
float h, s, v; float h, s, v;
ctest1.toHSV(h, s, v); ctest1.toHSV(h, s, v);
std::cout << (int)ctest1.r << " " << (int)ctest1.g << " " << (int)ctest1.b << " " << (int)ctest1.a << std::endl; std::cout << (int)ctest1.r() << " " << (int)ctest1.g() << " " << (int)ctest1.b() << " " << (int)ctest1.a() << std::endl;
std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a / 255.f) << std::endl; std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a() / 255.f) << std::endl;
return 0; return 0;
} }