SIMD refactor

This commit is contained in:
Jack Andersen 2018-12-07 15:16:50 -10:00
parent d881e58f62
commit e8dfecbb6e
49 changed files with 6047 additions and 4721 deletions

View File

@ -1,5 +1,5 @@
---
IndentWidth: 4
IndentWidth: 2
ColumnLimit: 128
UseTab: Never
---

View File

@ -40,7 +40,6 @@ add_library(zeus
include/zeus/CColor.hpp
include/zeus/Global.hpp
include/zeus/zeus.hpp
include/zeus/TVectorUnion.hpp
include/zeus/CVector2i.hpp
include/zeus/CVector2f.hpp
include/zeus/CVector3f.hpp
@ -56,7 +55,11 @@ add_library(zeus
include/zeus/CSphere.hpp
include/zeus/CUnitVector.hpp
include/zeus/CMRay.hpp
include/zeus/CEulerAngles.hpp)
include/zeus/CEulerAngles.hpp
include/zeus/simd/simd.hpp
include/zeus/simd/simd_sse.hpp
include/zeus/simd/simd_avx.hpp
include/zeus/simd/parallelism_v2_simd.hpp)
add_subdirectory(test)

View File

@ -6,412 +6,367 @@
#include "zeus/CLineSeg.hpp"
#include "zeus/CSphere.hpp"
#include "zeus/Math.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
namespace zeus
{
class alignas(16) CAABox
{
namespace zeus {
class CAABox {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
enum class EBoxEdgeId {
Z0,
X0,
Z1,
X1,
Z2,
X2,
Z3,
X3,
Y0,
Y1,
Y2,
Y3
};
enum class EBoxEdgeId
{
Z0,
X0,
Z1,
X1,
Z2,
X2,
Z3,
X3,
Y0,
Y1,
Y2,
Y3
};
enum class EBoxFaceID {
};
enum class EBoxFaceID
{
};
static const CAABox skInvertedBox;
static const CAABox skNullBox;
static const CAABox skInvertedBox;
static const CAABox skNullBox;
CVector3f min;
CVector3f max;
CVector3f min;
CVector3f max;
// set default AABox to insane inverse min/max to allow for accumulation
CAABox() : CAABox(1e16f, -1e16f) {}
// set default AABox to insane inverse min/max to allow for accumulation
CAABox() : CAABox(1e16f, -1e16f) {}
CAABox(const CVector3f& min, const CVector3f& max) : min(min), max(max) {}
CAABox(const CVector3f& min, const CVector3f& max) : min(min), max(max) {}
CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {}
CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {}
CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ) {
}
CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ)
{
}
#if ZE_ATHENA_TYPES
inline void readBoundingBoxBig(athena::io::IStreamReader& in)
{
min.readBig(in);
max.readBig(in);
}
static inline CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in)
{
CAABox ret;
ret.readBoundingBoxBig(in);
return ret;
}
void readBoundingBoxBig(athena::io::IStreamReader& in) {
min.readBig(in);
max.readBig(in);
}
static CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in) {
CAABox ret;
ret.readBoundingBoxBig(in);
return ret;
}
#endif
float distanceFromPointSquared(const CVector3f& other) const
{
float dist = 0;
for (int i = 0; i < 3; i++)
{
if (other[i] < min[i])
{
const float tmp = (min[i] - other[i]);
dist += tmp * tmp;
}
else if (other[i] > max[i])
{
const float tmp = (other[i] - max[i]);
dist += tmp * tmp;
}
}
return dist;
float distanceFromPointSquared(const CVector3f& other) const {
float dist = 0;
for (int i = 0; i < 3; i++) {
if (other[i] < min[i]) {
const float tmp = (min[i] - other[i]);
dist += tmp * tmp;
} else if (other[i] > max[i]) {
const float tmp = (other[i] - max[i]);
dist += tmp * tmp;
}
}
float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); }
return dist;
}
inline bool intersects(const CAABox& other) const
{
bool x1 = (max[0] >= other.min[0]);
bool x2 = (min[0] <= other.max[0]);
bool y1 = (max[1] >= other.min[1]);
bool y2 = (min[1] <= other.max[1]);
bool z1 = (max[2] >= other.min[2]);
bool z2 = (min[2] <= other.max[2]);
return x1 && x2 && y1 && y2 && z1 && z2;
float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); }
bool intersects(const CAABox& other) const {
bool x1 = (max[0] >= other.min[0]);
bool x2 = (min[0] <= other.max[0]);
bool y1 = (max[1] >= other.min[1]);
bool y2 = (min[1] <= other.max[1]);
bool z1 = (max[2] >= other.min[2]);
bool z2 = (min[2] <= other.max[2]);
return x1 && x2 && y1 && y2 && z1 && z2;
}
bool intersects(const CSphere& other) const {
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
}
float intersectionRadius(const CSphere& other) const {
float dist = distanceFromPoint(other.position);
return (dist < other.radius) ? dist : -1.f;
}
CAABox booleanIntersection(const CAABox& other) const {
CVector3f minVec = CVector3f::skZero;
CVector3f maxVec = CVector3f::skZero;
for (int i = 0; i < 3; ++i) {
if (min[i] <= other.min[i] && max[i] >= other.max[i]) {
minVec[i] = other.min[i];
maxVec[i] = other.max[i];
} else if (other.min[i] <= min[i] && other.max[i] >= max[i]) {
minVec[i] = min[i];
maxVec[i] = max[i];
} else if (other.min[i] <= min[i] && other.max[i] >= min[i]) {
minVec[i] = min[i];
maxVec[i] = other.max[i];
} else if (other.min[i] <= max[i] && other.max[i] >= max[i]) {
minVec[i] = other.min[i];
maxVec[i] = max[i];
}
}
bool intersects(const CSphere& other) const
{
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
return {minVec, maxVec};
}
bool inside(const CAABox& other) const {
bool x = min[0] >= other.min[0] && max[0] <= other.max[0];
bool y = min[1] >= other.min[1] && max[1] <= other.max[1];
bool z = min[2] >= other.min[2] && max[2] <= other.max[2];
return x && y && z;
}
bool insidePlane(const CPlane& plane) const {
CVector3f vmax;
/* X axis */
if (plane.x() >= 0.f)
vmax[0] = max[0];
else
vmax[0] = min[0];
/* Y axis */
if (plane.y() >= 0.f)
vmax[1] = max[1];
else
vmax[1] = min[1];
/* Z axis */
if (plane.z() >= 0.f)
vmax[2] = max[2];
else
vmax[2] = min[2];
return plane.normal().dot(vmax) + plane.d() >= 0.f;
}
CVector3f center() const { return (min + max) * 0.5f; }
CVector3f extents() const { return (max - min) * 0.5f; }
float volume() const {
auto delta = max - min;
return delta.x() * delta.y() * delta.z();
}
CLineSeg getEdge(EBoxEdgeId id) const {
switch (id) {
case EBoxEdgeId::Z0:
default:
return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), min.y(), min.z()});
case EBoxEdgeId::X0:
return CLineSeg({min.x(), min.y(), min.z()}, {max.x(), min.y(), min.z()});
case EBoxEdgeId::Z1:
return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), min.y(), max.z()});
case EBoxEdgeId::X1:
return CLineSeg({max.x(), min.y(), max.z()}, {min.x(), min.y(), max.z()});
case EBoxEdgeId::Z2:
return CLineSeg({max.x(), max.y(), max.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::X2:
return CLineSeg({max.x(), max.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Z3:
return CLineSeg({min.x(), max.y(), min.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::X3:
return CLineSeg({min.x(), max.y(), max.z()}, {max.x(), max.y(), max.z()});
case EBoxEdgeId::Y0:
return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::Y1:
return CLineSeg({min.x(), min.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Y2:
return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::Y3:
return CLineSeg({max.x(), min.y(), max.z()}, {max.x(), max.y(), max.z()});
}
}
CAABox getTransformedAABox(const CTransform& xfrm) const {
CAABox box;
CVector3f point = xfrm * getPoint(0);
box.accumulateBounds(point);
point = xfrm * getPoint(1);
box.accumulateBounds(point);
point = xfrm * getPoint(2);
box.accumulateBounds(point);
point = xfrm * getPoint(3);
box.accumulateBounds(point);
point = xfrm * getPoint(4);
box.accumulateBounds(point);
point = xfrm * getPoint(5);
box.accumulateBounds(point);
point = xfrm * getPoint(6);
box.accumulateBounds(point);
point = xfrm * getPoint(7);
box.accumulateBounds(point);
return box;
}
void accumulateBounds(const CVector3f& point) {
if (min.x() > point.x())
min.x() = point.x();
if (min.y() > point.y())
min.y() = point.y();
if (min.z() > point.z())
min.z() = point.z();
if (max.x() < point.x())
max.x() = point.x();
if (max.y() < point.y())
max.y() = point.y();
if (max.z() < point.z())
max.z() = point.z();
}
void accumulateBounds(const CAABox& other) {
accumulateBounds(other.min);
accumulateBounds(other.max);
}
bool pointInside(const CVector3f& other) const {
return (min.x() <= other.x() && other.x() <= max.x() &&
min.y() <= other.y() && other.y() <= max.y() &&
min.z() <= other.z() && other.z() <= max.z());
}
CVector3f closestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? min.x() : max.x()),
(other.y() >= 0.f ? min.y() : max.y()),
(other.z() >= 0.f ? min.z() : max.z())};
}
CVector3f furthestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? max.x() : min.x()),
(other.y() >= 0.f ? max.y() : min.y()),
(other.z() >= 0.f ? max.z() : min.z())};
}
float distanceBetween(const CAABox& other) {
int intersects = 0;
if (max.x() >= other.min.x() && min.x() <= other.max.x())
intersects |= 0x1;
if (max.y() >= other.min.y() && min.y() <= other.max.y())
intersects |= 0x2;
if (max.z() >= other.min.z() && min.z() <= other.max.z())
intersects |= 0x4;
float minX, maxX;
if (max.x() < other.min.x()) {
minX = max.x();
maxX = other.min.x();
} else {
minX = min.x();
maxX = other.max.x();
}
float intersectionRadius(const CSphere& other) const
{
float dist = distanceFromPoint(other.position);
return (dist < other.radius) ? dist : -1.f;
float minY, maxY;
if (max.y() < other.min.y()) {
minY = max.y();
maxY = other.min.y();
} else {
minY = min.y();
maxY = other.max.y();
}
inline CAABox booleanIntersection(const CAABox& other) const
{
CVector3f minVec = CVector3f::skZero;
CVector3f maxVec = CVector3f::skZero;
for (int i = 0; i < 3; ++i)
{
if (min[i] <= other.min[i] && max[i] >= other.max[i])
{
minVec[i] = other.min[i];
maxVec[i] = other.max[i];
}
else if (other.min[i] <= min[i] && other.max[i] >= max[i])
{
minVec[i] = min[i];
maxVec[i] = max[i];
}
else if (other.min[i] <= min[i] && other.max[i] >= min[i])
{
minVec[i] = min[i];
maxVec[i] = other.max[i];
}
else if (other.min[i] <= max[i] && other.max[i] >= max[i])
{
minVec[i] = other.min[i];
maxVec[i] = max[i];
}
}
return {minVec, maxVec};
float minZ, maxZ;
if (max.z() < other.min.z()) {
minZ = max.z();
maxZ = other.min.z();
} else {
minZ = min.z();
maxZ = other.max.z();
}
inline bool inside(const CAABox& other) const
{
bool x = min[0] >= other.min[0] && max[0] <= other.max[0];
bool y = min[1] >= other.min[1] && max[1] <= other.max[1];
bool z = min[2] >= other.min[2] && max[2] <= other.max[2];
return x && y && z;
switch (intersects) {
case 0:
return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude();
case 1:
return zeus::CVector2f(maxY - minY, maxZ - minZ).magnitude();
case 2:
return zeus::CVector2f(maxX - minX, maxZ - minZ).magnitude();
case 3:
return std::fabs(maxZ - minZ);
case 4:
return zeus::CVector2f(maxX - minX, maxY - minY).magnitude();
case 5:
return std::fabs(maxY - minY);
case 6:
return std::fabs(maxX - minX);
case 7:
default:
return 0.f;
}
}
inline bool insidePlane(const CPlane& plane) const
{
CVector3f vmax;
/* X axis */
if (plane.a >= 0)
vmax[0] = max[0];
else
vmax[0] = min[0];
/* Y axis */
if (plane.b >= 0)
vmax[1] = max[1];
else
vmax[1] = min[1];
/* Z axis */
if (plane.c >= 0)
vmax[2] = max[2];
else
vmax[2] = min[2];
return plane.vec.dot(vmax) + plane.d >= 0.f;
}
CVector3f getPoint(const int point) const {
const CVector3f* vecs = &min;
return CVector3f(vecs[(point & 1) != 0].x(), vecs[(point & 2) != 0].y(), vecs[(point & 4) != 0].z());
}
CVector3f center() const { return (min + max) * 0.5f; }
CVector3f clampToBox(const CVector3f& vec) const {
CVector3f ret = vec;
ret.x() = clamp(min.x(), float(ret.x()), max.x());
ret.y() = clamp(min.y(), float(ret.y()), max.y());
ret.z() = clamp(min.z(), float(ret.z()), max.z());
return ret;
}
CVector3f extents() const { return (max - min) * 0.5f; }
void splitX(CAABox& negX, CAABox& posX) const {
float midX = (max.x() - min.x()) * .5f + min.x();
posX.max = max;
posX.min = min;
posX.min.x() = midX;
negX.max = max;
negX.max.x() = midX;
negX.min = min;
}
float volume() const { return (max.x - min.x) * (max.y - min.y) * (max.z - min.z); }
void splitY(CAABox& negY, CAABox& posY) const {
float midY = (max.y() - min.y()) * .5f + min.y();
posY.max = max;
posY.min = min;
posY.min.y() = midY;
negY.max = max;
negY.max.y() = midY;
negY.min = min;
}
inline CLineSeg getEdge(EBoxEdgeId id) const
{
switch (id)
{
case EBoxEdgeId::Z0:
default:
return CLineSeg({min.x, min.y, max.z}, {min.x, min.y, min.z});
case EBoxEdgeId::X0:
return CLineSeg({min.x, min.y, min.z}, {max.x, min.y, min.z});
case EBoxEdgeId::Z1:
return CLineSeg({max.x, min.y, min.z}, {max.x, min.y, max.z});
case EBoxEdgeId::X1:
return CLineSeg({max.x, min.y, max.z}, {min.x, min.y, max.z});
case EBoxEdgeId::Z2:
return CLineSeg({max.x, max.y, max.z}, {max.x, max.y, min.z});
case EBoxEdgeId::X2:
return CLineSeg({max.x, max.y, min.z}, {min.x, max.y, min.z});
case EBoxEdgeId::Z3:
return CLineSeg({min.x, max.y, min.z}, {min.x, max.y, max.z});
case EBoxEdgeId::X3:
return CLineSeg({min.x, max.y, max.z}, {max.x, max.y, max.z});
case EBoxEdgeId::Y0:
return CLineSeg({min.x, min.y, max.z}, {min.x, max.y, max.z});
case EBoxEdgeId::Y1:
return CLineSeg({min.x, min.y, min.z}, {min.x, max.y, min.z});
case EBoxEdgeId::Y2:
return CLineSeg({max.x, min.y, min.z}, {max.x, max.y, min.z});
case EBoxEdgeId::Y3:
return CLineSeg({max.x, min.y, max.z}, {max.x, max.y, max.z});
}
}
void splitZ(CAABox& negZ, CAABox& posZ) const {
float midZ = (max.z() - min.z()) * .5f + min.z();
posZ.max = max;
posZ.min = min;
posZ.min.z() = midZ;
negZ.max = max;
negZ.max.z() = midZ;
negZ.min = min;
}
inline CAABox getTransformedAABox(const CTransform& xfrm) const
{
CAABox box;
CVector3f point = xfrm * getPoint(0);
box.accumulateBounds(point);
point = xfrm * getPoint(1);
box.accumulateBounds(point);
point = xfrm * getPoint(2);
box.accumulateBounds(point);
point = xfrm * getPoint(3);
box.accumulateBounds(point);
point = xfrm * getPoint(4);
box.accumulateBounds(point);
point = xfrm * getPoint(5);
box.accumulateBounds(point);
point = xfrm * getPoint(6);
box.accumulateBounds(point);
point = xfrm * getPoint(7);
box.accumulateBounds(point);
return box;
}
bool invalid() { return (max.x() < min.x() || max.y() < min.y() || max.z() < min.z()); }
inline void accumulateBounds(const CVector3f& point)
{
if (min.x > point.x)
min.x = point.x;
if (min.y > point.y)
min.y = point.y;
if (min.z > point.z)
min.z = point.z;
if (max.x < point.x)
max.x = point.x;
if (max.y < point.y)
max.y = point.y;
if (max.z < point.z)
max.z = point.z;
}
inline void accumulateBounds(const CAABox& other)
{
accumulateBounds(other.min);
accumulateBounds(other.max);
}
inline bool pointInside(const CVector3f& other) const
{
return (min.x <= other.x && other.x <= max.x &&
min.y <= other.y && other.y <= max.y &&
min.z <= other.z && other.z <= max.z);
}
inline CVector3f closestPointAlongVector(const CVector3f& other) const
{
return {(other.x >= 0.f ? min.x : max.x),
(other.y >= 0.f ? min.y : max.y),
(other.z >= 0.f ? min.z : max.z)};
}
inline CVector3f furthestPointAlongVector(const CVector3f& other) const
{
return {(other.x >= 0.f ? max.x : min.x),
(other.y >= 0.f ? max.y : min.y),
(other.z >= 0.f ? max.z : min.z)};
}
inline float distanceBetween(const CAABox& other)
{
int intersects = 0;
if (max.x >= other.min.x && min.x <= other.max.x)
intersects |= 0x1;
if (max.y >= other.min.y && min.y <= other.max.y)
intersects |= 0x2;
if (max.z >= other.min.z && min.z <= other.max.z)
intersects |= 0x4;
float minX, maxX;
if (max.x < other.min.x)
{
minX = max.x;
maxX = other.min.x;
}
else
{
minX = min.x;
maxX = other.max.x;
}
float minY, maxY;
if (max.y < other.min.y)
{
minY = max.y;
maxY = other.min.y;
}
else
{
minY = min.y;
maxY = other.max.y;
}
float minZ, maxZ;
if (max.z < other.min.z)
{
minZ = max.z;
maxZ = other.min.z;
}
else
{
minZ = min.z;
maxZ = other.max.z;
}
switch (intersects)
{
case 0:
return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude();
case 1:
return zeus::CVector2f(maxY - minY, maxZ - minZ).magnitude();
case 2:
return zeus::CVector2f(maxX - minX, maxZ - minZ).magnitude();
case 3:
return std::fabs(maxZ - minZ);
case 4:
return zeus::CVector2f(maxX - minX, maxY - minY).magnitude();
case 5:
return std::fabs(maxY - minY);
case 6:
return std::fabs(maxX - minX);
case 7:
default:
return 0.f;
}
}
inline CVector3f getPoint(const int point) const
{
const CVector3f* vecs = &min;
return CVector3f(vecs[(point & 1) != 0].x, vecs[(point & 2) != 0].y, vecs[(point & 4) != 0].z);
}
inline CVector3f clampToBox(const CVector3f& vec)
{
CVector3f ret = vec;
clamp(min.x, ret.x, max.x);
clamp(min.y, ret.y, max.y);
clamp(min.z, ret.z, max.z);
return ret;
}
inline void splitX(CAABox& negX, CAABox& posX) const
{
float midX = (max.x - min.x) * .5f + min.x;
posX.max = max;
posX.min = min;
posX.min.x = midX;
negX.max = max;
negX.max.x = midX;
negX.min = min;
}
inline void splitY(CAABox& negY, CAABox& posY) const
{
float midY = (max.y - min.y) * .5f + min.y;
posY.max = max;
posY.min = min;
posY.min.y = midY;
negY.max = max;
negY.max.y = midY;
negY.min = min;
}
inline void splitZ(CAABox& negZ, CAABox& posZ) const
{
float midZ = (max.z - min.z) * .5f + min.z;
posZ.max = max;
posZ.min = min;
posZ.min.z = midZ;
negZ.max = max;
negZ.max.z = midZ;
negZ.min = min;
}
inline bool invalid() { return (max.x < min.x || max.y < min.y || max.z < min.z); }
inline float operator[](size_t idx) const
{
assert(idx < 6);
if (idx < 3)
return min[idx];
else
return max[idx-3];
}
float operator[](size_t idx) const {
assert(idx < 6);
if (idx < 3)
return min[idx];
else
return max[idx - 3];
}
};
inline bool operator==(const CAABox& left, const CAABox& right)
{
return (left.min == right.min && left.max == right.max);
inline bool operator==(const CAABox& left, const CAABox& right) {
return (left.min == right.min && left.max == right.max);
}
inline bool operator!=(const CAABox& left, const CAABox& right)
{
return (left.min != right.min || left.max != right.max);
inline bool operator!=(const CAABox& left, const CAABox& right) {
return (left.min != right.min || left.max != right.max);
}
}

View File

@ -4,22 +4,15 @@
#include "zeus/CVector3f.hpp"
#include "CUnitVector.hpp"
namespace zeus
{
struct alignas(16) CAxisAngle : CVector3f
{
ZE_DECLARE_ALIGNED_ALLOCATOR();
CAxisAngle() = default;
CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {}
CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {}
CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {}
float angle() const { return magnitude(); }
const CVector3f& getVector() const { return *this; }
static const CAxisAngle sIdentity;
namespace zeus {
struct CAxisAngle : CVector3f {
CAxisAngle() = default;
CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {}
CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {}
CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {}
float angle() const { return magnitude(); }
const CVector3f& getVector() const { return *this; }
static const CAxisAngle sIdentity;
};
}

View File

@ -2,11 +2,15 @@
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#include "CVector4f.hpp"
#if ZE_ATHENA_TYPES
#include <athena/FileReader.hpp>
#include <athena/FileWriter.hpp>
#include "athena/FileReader.hpp"
#include "athena/FileWriter.hpp"
#endif
#include <iostream>
#include <cassert>
@ -20,410 +24,315 @@
#define COLOR(rgba) rgba
#endif
namespace zeus
{
namespace zeus {
typedef uint8_t Comp8;
typedef uint32_t Comp32;
constexpr float OneOver255 = 1.f / 255.f;
typedef union {
struct
{
Comp8 r, g, b, a;
};
Comp32 rgba;
struct {
Comp8 r, g, b, a;
};
Comp32 rgba;
} RGBA32;
class CVector4f;
class alignas(16) CColor
{
class CColor {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
simd<float> mSimd;
static const CColor skRed;
static const CColor skBlack;
static const CColor skBlue;
static const CColor skGreen;
static const CColor skGrey;
static const CColor skOrange;
static const CColor skPurple;
static const CColor skYellow;
static const CColor skWhite;
static const CColor skClear;
static const CColor skRed;
static const CColor skBlack;
static const CColor skBlue;
static const CColor skGreen;
static const CColor skGrey;
static const CColor skOrange;
static const CColor skPurple;
static const CColor skYellow;
static const CColor skWhite;
static const CColor skClear;
CColor() : mSimd(1.f) {}
#if __SSE__
CColor(const __m128& mVec128) : mVec128(mVec128) {}
#endif
CColor(float rgb, float a = 1.0) { splat(rgb, a); }
CColor() : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
CColor(float rgb, float a = 1.0) { splat(rgb, a); }
CColor(float r, float g, float b, float a = 1.0f)
{
v[0] = r;
v[1] = g;
v[2] = b;
v[3] = a;
}
#if ZE_ATHENA_TYPES
CColor(const atVec4f& vec)
#if __SSE__ || __GEKKO_PS__
: mVec128(vec.mVec128)
{
}
#else
{
r = vec.vec[0], g = vec.vec[1], b = vec.vec[2], a = vec.vec[3];
}
#endif
#endif
CColor(Comp32 rgba) { fromRGBA32(rgba); }
CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
CColor(const CVector4f& other);
CColor& operator=(const CVector4f& other);
CColor(float r, float g, float b, float a = 1.0f) : mSimd(r, g, b, a) {}
#if ZE_ATHENA_TYPES
static inline CColor ReadRGBABig(athena::io::IStreamReader& reader)
{
CColor ret;
ret.readRGBABig(reader);
return ret;
}
CColor(const atVec4f& vec) : mSimd(vec.simd) {}
inline void readRGBABig(athena::io::IStreamReader& reader)
{
r = reader.readFloatBig();
g = reader.readFloatBig();
b = reader.readFloatBig();
a = reader.readFloatBig();
}
inline void readBGRABig(athena::io::IStreamReader& reader)
{
b = reader.readFloatBig();
g = reader.readFloatBig();
r = reader.readFloatBig();
a = reader.readFloatBig();
}
inline void writeRGBABig(athena::io::IStreamWriter& writer) const
{
writer.writeFloatBig(r);
writer.writeFloatBig(g);
writer.writeFloatBig(b);
writer.writeFloatBig(a);
}
inline void writeBGRABig(athena::io::IStreamWriter& writer) const
{
writer.writeFloatBig(b);
writer.writeFloatBig(g);
writer.writeFloatBig(r);
writer.writeFloatBig(a);
}
inline void writeRGBA8(athena::io::IStreamWriter& writer) const
{
writer.writeUByte(this->r * 255);
writer.writeUByte(this->g * 255);
writer.writeUByte(this->b * 255);
writer.writeUByte(this->a * 255);
}
#endif
inline bool operator==(const CColor& rhs) const { return (r == rhs.r && g == rhs.g && b == rhs.b && a == rhs.a); }
inline bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
inline CColor operator+(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CColor(r + rhs.r, g + rhs.g, b + rhs.b, a + rhs.a);
#endif
}
inline CColor operator-(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CColor(r - rhs.r, g - rhs.g, b - rhs.b, a - rhs.a);
#endif
}
inline CColor operator*(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CColor(r * rhs.r, g * rhs.g, b * rhs.b, a * rhs.a);
#endif
}
inline CColor operator/(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CColor(r / rhs.r, g / rhs.g, b / rhs.b, a / rhs.a);
#endif
}
inline CColor operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_add_ps(mVec128, splat.mVec128));
#else
return CColor(r + val, g + val, b + val, a + val);
#endif
}
inline CColor operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CColor(r - val, g - val, b - val, a - val);
#endif
}
inline CColor operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CColor(r * val, g * val, b * val, a * val);
#endif
}
inline CColor operator/(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_div_ps(mVec128, splat.mVec128));
#else
return CColor(r / val, g / val, b / val, a / val);
#endif
}
inline const CColor& operator+=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
r += rhs.r;
g += rhs.g;
b += rhs.b;
a += rhs.a;
#endif
return *this;
}
inline const CColor& operator-=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
r -= rhs.r;
g -= rhs.g;
b -= rhs.b;
a -= rhs.a;
#endif
return *this;
}
inline const CColor& operator*=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
r *= rhs.r;
g *= rhs.g;
b *= rhs.b;
a *= rhs.a;
#endif
return *this;
}
inline const CColor& operator/=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
r /= rhs.r;
g /= rhs.g;
b /= rhs.b;
a /= rhs.a;
#endif
return *this;
}
inline void normalize()
{
float mag = magnitude();
mag = 1.f / mag;
*this *= mag;
}
inline CColor normalized() const
{
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
CColor(Comp32 rgba) { fromRGBA32(rgba); }
CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
CColor(const CVector4f& other) : mSimd(other.mSimd) {}
template <typename T>
CColor(const simd<T>& s) : mSimd(s) {}
CColor& operator=(const CVector4f& other) {
mSimd = other.mSimd;
return *this;
}
#if ZE_ATHENA_TYPES
static CColor ReadRGBABig(athena::io::IStreamReader& reader) {
CColor ret;
ret.readRGBABig(reader);
return ret;
}
void readRGBABig(athena::io::IStreamReader& reader) {
simd_floats f;
f[0] = reader.readFloatBig();
f[1] = reader.readFloatBig();
f[2] = reader.readFloatBig();
f[3] = reader.readFloatBig();
mSimd.copy_from(f);
}
void readBGRABig(athena::io::IStreamReader& reader) {
simd_floats f;
f[2] = reader.readFloatBig();
f[1] = reader.readFloatBig();
f[0] = reader.readFloatBig();
f[3] = reader.readFloatBig();
mSimd.copy_from(f);
}
void writeRGBABig(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeFloatBig(f[0]);
writer.writeFloatBig(f[1]);
writer.writeFloatBig(f[2]);
writer.writeFloatBig(f[3]);
}
void writeBGRABig(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeFloatBig(f[2]);
writer.writeFloatBig(f[1]);
writer.writeFloatBig(f[0]);
writer.writeFloatBig(f[3]);
}
void writeRGBA8(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeUByte(atUint8(f[0] * 255));
writer.writeUByte(atUint8(f[1] * 255));
writer.writeUByte(atUint8(f[2] * 255));
writer.writeUByte(atUint8(f[3] * 255));
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return r * r + g * g + b * b + a * a;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
static inline CColor lerp(const CColor& a, const CColor& b, float t) { return (a + (b - a) * t); }
static inline CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
inline float& operator[](const size_t& idx) { assert(idx < 4); return (&r)[idx]; }
inline const float& operator[](const size_t& idx) const { assert(idx < 4); return (&r)[idx]; }
inline void splat(float rgb, float a)
{
#if __SSE__
TVectorUnion splat = {{rgb, rgb, rgb, a}};
mVec128 = splat.mVec128;
#else
v[0] = rgb;
v[1] = rgb;
v[2] = rgb;
v[3] = a;
#endif
}
inline float rgbDot(const CColor& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (r * rhs.r) + (g * rhs.g) + (b * rhs.b);
#endif
}
bool operator==(const CColor& rhs) const {
return (r() == rhs.r() && g() == rhs.g() && b() == rhs.b() && a() == rhs.a());
}
union {
struct
{
float r, g, b, a;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
void fromRGBA8(Comp8 r, Comp8 g, Comp8 b, Comp8 a)
{
this->r = r * OneOver255;
this->g = g * OneOver255;
this->b = b * OneOver255;
this->a = a * OneOver255;
}
CColor operator+(const CColor& rhs) const {
return mSimd + rhs.mSimd;
}
void fromRGBA32(Comp32 rgba)
{
static RGBA32 tmp;
tmp.rgba = COLOR(rgba);
fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a);
}
CColor operator-(const CColor& rhs) const {
return mSimd - rhs.mSimd;
}
/*!
* \brief Converts a CColor to RGBA8
* \param r
* \param g
* \param b
* \param a
*/
void toRGBA8(Comp8& r, Comp8& g, Comp8& b, Comp8& a)
{
r = this->r * 255;
g = this->g * 255;
b = this->b * 255;
a = this->a * 255;
}
CColor operator*(const CColor& rhs) const {
return mSimd * rhs.mSimd;
}
/**
* @brief Assigns rgba from hsv
* @param h[0-1] The hue percentagee of the color.
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void fromHSV(float h, float s, float v, float _a = 1.0);
CColor operator/(const CColor& rhs) const {
return mSimd / rhs.mSimd;
}
/**
* @brief Converts rgba to hsv
* @param h[0-1] The hue percentagee of the color.
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void toHSV(float& h, float& s, float& v) const;
CColor operator+(float val) const {
return mSimd + simd<float>(val);
}
void fromHSL(float h, float s, float l, float _a = 1.0);
CColor operator-(float val) const {
return mSimd - simd<float>(val);
}
void toHSL(float& h, float& s, float& l);
CColor operator*(float val) const {
return mSimd * simd<float>(val);
}
CColor toGrayscale() { return {std::sqrt((r * r + g * g + b * b) / 3), a}; }
CColor operator/(float val) const {
return mSimd / simd<float>(val);
}
/**
* @brief Clamps to GPU-safe RGBA values [0,1]
*/
void Clamp()
{
this->r = std::min(1.f, std::max(0.f, this->r));
this->g = std::min(1.f, std::max(0.f, this->g));
this->b = std::min(1.f, std::max(0.f, this->b));
this->a = std::min(1.f, std::max(0.f, this->a));
}
const CColor& operator+=(const CColor& rhs) {
mSimd += rhs.mSimd;
return *this;
}
const CColor& operator-=(const CColor& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CColor& operator*=(const CColor& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CColor& operator/=(const CColor& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
const CColor& operator+=(float rhs) {
mSimd += simd<float>(rhs);
return *this;
}
const CColor& operator-=(float rhs) {
mSimd -= simd<float>(rhs);
return *this;
}
const CColor& operator*=(float rhs) {
mSimd *= simd<float>(rhs);
return *this;
}
const CColor& operator/=(float rhs) {
mSimd /= simd<float>(rhs);
return *this;
}
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= mag;
}
CColor normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
float magSquared() const {
return mSimd.dot4(mSimd);
}
float magnitude() const { return std::sqrt(magSquared()); }
static CColor lerp(const CColor& a, const CColor& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
simd<float>::reference operator[](const size_t& idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](const size_t& idx) const {
assert(idx < 4);
return mSimd[idx];
}
void splat(float rgb, float a) {
mSimd = simd<float>(rgb);
mSimd[3] = a;
}
float rgbDot(const CColor& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
void fromRGBA8(const Comp8 ri, const Comp8 gi, const Comp8 bi, const Comp8 ai) {
mSimd = simd<float>(ri * OneOver255, gi * OneOver255, bi * OneOver255, ai * OneOver255);
}
void fromRGBA32(Comp32 rgba) {
static RGBA32 tmp;
tmp.rgba = COLOR(rgba);
fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a);
}
/*!
* \brief Converts a CColor to RGBA8
* \param r
* \param g
* \param b
* \param a
*/
void toRGBA8(Comp8& ro, Comp8& go, Comp8& bo, Comp8& ao) const {
ro = Comp8(r() * 255);
go = Comp8(g() * 255);
bo = Comp8(b() * 255);
ao = Comp8(a() * 255);
}
/**
* @brief Assigns rgba from hsv
* @param h[0-1] The hue percentagee of the color.
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void fromHSV(float h, float s, float v, float _a = 1.0);
/**
* @brief Converts rgba to hsv
* @param h[0-1] The hue percentagee of the color.
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void toHSV(float& h, float& s, float& v) const;
void fromHSL(float h, float s, float l, float _a = 1.0);
void toHSL(float& h, float& s, float& l) const;
CColor toGrayscale() const { return {std::sqrt((r() * r() + g() * g() + b() * b()) / 3), a()}; }
/**
* @brief Clamps to GPU-safe RGBA values [0,1]
*/
void Clamp() {
r() = std::min(1.f, std::max(0.f, float(r())));
g() = std::min(1.f, std::max(0.f, float(g())));
b() = std::min(1.f, std::max(0.f, float(b())));
a() = std::min(1.f, std::max(0.f, float(a())));
}
float r() const { return mSimd[0]; }
float g() const { return mSimd[1]; }
float b() const { return mSimd[2]; }
float a() const { return mSimd[3]; }
simd<float>::reference r() { return mSimd[0]; }
simd<float>::reference g() { return mSimd[1]; }
simd<float>::reference b() { return mSimd[2]; }
simd<float>::reference a() { return mSimd[3]; }
};
static inline CColor operator+(float lhs, const CColor& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs + rhs.r, lhs + rhs.g, lhs + rhs.b, lhs + rhs.a);
#endif
static inline CColor operator+(float lhs, const CColor& rhs) {
return simd<float>(lhs) + rhs.mSimd;
}
static inline CColor operator-(float lhs, const CColor& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs - rhs.r, lhs - rhs.g, lhs - rhs.b, lhs - rhs.a);
#endif
static inline CColor operator-(float lhs, const CColor& rhs) {
return simd<float>(lhs) - rhs.mSimd;
}
static inline CColor operator*(float lhs, const CColor& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs * rhs.r, lhs * rhs.g, lhs * rhs.b, lhs * rhs.a);
#endif
static inline CColor operator*(float lhs, const CColor& rhs) {
return simd<float>(lhs) * rhs.mSimd;
}
static inline CColor operator/(float lhs, const CColor& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs / rhs.r, lhs / rhs.g, lhs / rhs.b, lhs / rhs.a);
#endif
static inline CColor operator/(float lhs, const CColor& rhs) {
return simd<float>(lhs) / rhs.mSimd;
}
}

View File

@ -2,16 +2,14 @@
#include "zeus/CVector3f.hpp"
namespace zeus
{
namespace zeus {
class CQuaternion;
class CEulerAngles : public CVector3f
{
class CEulerAngles : public CVector3f {
public:
CEulerAngles(float x, float y, float z) { assign(x, y, z); }
CEulerAngles(const CQuaternion& quat);
CEulerAngles(const CTransform& xf);
CEulerAngles(float x, float y, float z) { assign(x, y, z); }
CEulerAngles(const CQuaternion& quat);
CEulerAngles(const CTransform& xf);
};
}

View File

@ -4,19 +4,16 @@
#include "zeus/CAABox.hpp"
#include "zeus/CProjection.hpp"
namespace zeus
{
class CFrustum
{
CPlane planes[6];
bool valid = false;
namespace zeus {
class CFrustum {
CPlane planes[6];
bool valid = false;
public:
void updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection);
void updatePlanes(const CTransform& viewPointMtx, const CProjection& projection);
bool aabbFrustumTest(const CAABox& aabb) const;
bool sphereFrustumTest(const CSphere& sphere) const;
bool pointFrustumTest(const CVector3f& point) const;
void updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection);
void updatePlanes(const CTransform& viewPointMtx, const CProjection& projection);
bool aabbFrustumTest(const CAABox& aabb) const;
bool sphereFrustumTest(const CSphere& sphere) const;
bool pointFrustumTest(const CVector3f& point) const;
};
}

View File

@ -3,14 +3,13 @@
#include "Global.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
{
class CLine
{
namespace zeus {
class CLine {
public:
CLine(const CVector3f& origin, const CVector3f& dir) : origin(origin), dir(dir) {}
CVector3f origin;
CVector3f dir;
CLine(const CVector3f& origin, const CVector3f& dir) : origin(origin), dir(dir) {}
CVector3f origin;
CVector3f dir;
};
}

View File

@ -3,23 +3,20 @@
#include "Global.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
{
class CLineSeg
{
namespace zeus {
class CLineSeg {
public:
CLineSeg(const CVector3f& start, const CVector3f& end) : x0_start(start), x18_end(end)
{
CVector3f tmp = (end - start).normalized();
if (tmp.x != 0 || tmp.y != 0 || tmp.z != 0)
xc_dir = tmp.normalized();
else
xc_dir = CVector3f::skZero;
}
CLineSeg(const CVector3f& start, const CVector3f& end) : x0_start(start), x18_end(end) {
CVector3f tmp = (end - start).normalized();
if (tmp.x() != 0.f || tmp.y() != 0.f || tmp.z() != 0.f)
xc_dir = tmp.normalized();
else
xc_dir = CVector3f::skZero;
}
CVector3f x0_start;
CVector3f xc_dir;
CVector3f x18_end;
CVector3f x0_start;
CVector3f xc_dir;
CVector3f x18_end;
};
}

View File

@ -1,38 +1,34 @@
#pragma once
#include "zeus/CVector3f.hpp"
#include "zeus/CTransform.hpp"
#include "zeus/Math.hpp"
namespace zeus
{
struct CMRay
{
CMRay(const CVector3f& start, const CVector3f& dirin, float len)
: start(start), length(len), invLength(1.f / len), dir(dirin)
{
end = start + (len * dirin);
delta = end - start;
}
namespace zeus {
struct CMRay {
CMRay(const CVector3f& start, const CVector3f& dirin, float len)
: start(start), length(len), invLength(1.f / len), dir(dirin) {
end = start + (len * dirin);
delta = end - start;
}
CMRay(const CVector3f& start, const CVector3f& end, float len, float invLen)
: start(start), end(end), length(len), invLength(invLen)
{
delta = end - start;
dir = invLen * delta;
}
CMRay(const CVector3f& start, const CVector3f& end, float len, float invLen)
: start(start), end(end), length(len), invLength(invLen) {
delta = end - start;
dir = invLen * delta;
}
CMRay getInvUnscaledTransformRay(const CTransform& xfrm) const
{
const CTransform inv = xfrm.inverse();
return CMRay(inv * start, inv * end, length, invLength);
}
CMRay getInvUnscaledTransformRay(const CTransform& xfrm) const {
const CTransform inv = xfrm.inverse();
return CMRay(inv * start, inv * end, length, invLength);
}
CVector3f start; // x0
CVector3f end; // xc
CVector3f delta; // x18
float length; // x24
float invLength; // x28
CVector3f dir; // x2c
CVector3f start; // x0
CVector3f end; // xc
CVector3f delta; // x18
float length; // x24
float invLength; // x28
CVector3f dir; // x2c
};
}

View File

@ -6,254 +6,185 @@
#include <cstring>
/* Column-major matrix class */
namespace zeus
{
namespace zeus {
class CQuaternion;
class alignas(16) CMatrix3f
{
class CMatrix3f {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
explicit CMatrix3f(bool zero = false)
{
memset(m, 0, sizeof(m));
if (!zero)
{
m[0][0] = 1.0;
m[1][1] = 1.0;
m[2][2] = 1.0;
}
explicit CMatrix3f(bool zero = false) {
m[0] = simd<float>(0.f);
m[1] = simd<float>(0.f);
m[2] = simd<float>(0.f);
if (!zero) {
m[0][0] = 1.0;
m[1][1] = 1.0;
m[2][2] = 1.0;
}
CMatrix3f(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21, float m22)
{
m[0][0] = m00, m[1][0] = m01, m[2][0] = m02;
m[0][1] = m10, m[1][1] = m11, m[2][1] = m12;
m[0][2] = m20, m[1][2] = m21, m[2][2] = m22;
}
CMatrix3f(const CVector3f& scaleVec)
{
memset(m, 0, sizeof(m));
m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2];
}
CMatrix3f(float scale) : CMatrix3f(CVector3f(scale)) {}
CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2)
{
vec[0] = r0;
vec[1] = r1;
vec[2] = r2;
}
CMatrix3f(const CMatrix3f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
}
#if __SSE__
CMatrix3f(const __m128& r0, const __m128& r1, const __m128& r2)
{
vec[0].mVec128 = r0;
vec[1].mVec128 = r1;
vec[2].mVec128 = r2;
}
#endif
}
CMatrix3f(float m00, float m01, float m02,
float m10, float m11, float m12,
float m20, float m21, float m22)
: m{{m00, m10, m20},
{m01, m11, m21},
{m02, m12, m22}} {}
CMatrix3f(const CVector3f& scaleVec) {
m[0] = simd<float>(0.f);
m[1] = simd<float>(0.f);
m[2] = simd<float>(0.f);
m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2];
}
CMatrix3f(float scale) : CMatrix3f(CVector3f(scale)) {}
CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2) {
m[0] = r0;
m[1] = r1;
m[2] = r2;
}
CMatrix3f(const CMatrix3f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
}
CMatrix3f(const simd<float>& r0, const simd<float>& r1, const simd<float>& r2) {
m[0].mSimd = r0;
m[1].mSimd = r1;
m[2].mSimd = r2;
}
#if ZE_ATHENA_TYPES
CMatrix3f(const atVec4f& r0, const atVec4f& r1, const atVec4f& r2)
{
#if __SSE__
vec[0].mVec128 = r0.mVec128;
vec[1].mVec128 = r1.mVec128;
vec[2].mVec128 = r2.mVec128;
#else
vec[0].x = r0.vec[0];
vec[0].y = r0.vec[1];
vec[0].z = r0.vec[2];
vec[1].x = r1.vec[0];
vec[1].y = r1.vec[1];
vec[1].z = r1.vec[2];
vec[2].x = r2.vec[0];
vec[2].y = r2.vec[1];
vec[2].z = r2.vec[2];
CMatrix3f(const atVec4f& r0, const atVec4f& r1, const atVec4f& r2) {
m[0].mSimd = r0.simd;
m[1].mSimd = r1.simd;
m[2].mSimd = r2.simd;
}
void readBig(athena::io::IStreamReader& input) {
m[0][0] = input.readFloatBig();
m[1][0] = input.readFloatBig();
m[2][0] = input.readFloatBig();
m[0][1] = input.readFloatBig();
m[1][1] = input.readFloatBig();
m[2][1] = input.readFloatBig();
m[0][2] = input.readFloatBig();
m[1][2] = input.readFloatBig();
m[2][2] = input.readFloatBig();
}
static CMatrix3f ReadBig(athena::io::IStreamReader& input) {
CMatrix3f ret;
ret.readBig(input);
return ret;
}
#endif
}
void readBig(athena::io::IStreamReader& input)
{
m[0][0] = input.readFloatBig();
m[1][0] = input.readFloatBig();
m[2][0] = input.readFloatBig();
m[0][1] = input.readFloatBig();
m[1][1] = input.readFloatBig();
m[2][1] = input.readFloatBig();
m[0][2] = input.readFloatBig();
m[1][2] = input.readFloatBig();
m[2][2] = input.readFloatBig();
}
static CMatrix3f ReadBig(athena::io::IStreamReader& input)
{
CMatrix3f ret;
ret.readBig(input);
return ret;
}
#endif
CMatrix3f(const CVector3f& axis, float angle);
CMatrix3f(const CQuaternion& quat);
CMatrix3f(const TVectorUnion& r0, const TVectorUnion& r1, const TVectorUnion& r2)
{
#if __SSE__
vec[0].mVec128 = r0.mVec128;
vec[1].mVec128 = r1.mVec128;
vec[2].mVec128 = r2.mVec128;
#else
vec[0].x = r0.vec[0];
vec[0].y = r0.vec[1];
vec[0].z = r0.vec[2];
vec[1].x = r1.vec[0];
vec[1].y = r1.vec[1];
vec[1].z = r1.vec[2];
vec[2].x = r2.vec[0];
vec[2].y = r2.vec[1];
vec[2].z = r2.vec[2];
#endif
}
CMatrix3f(const CVector3f& axis, float angle);
inline CMatrix3f& operator=(const CMatrix3f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
return *this;
}
CMatrix3f(const CQuaternion& quat);
inline CVector3f operator*(const CVector3f& other) const
{
#if __SSE__
TVectorUnion res;
res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)),
_mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))),
_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)));
return CVector3f(res.mVec128);
#else
return CVector3f(m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2],
m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2],
m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2]);
#endif
}
CMatrix3f& operator=(const CMatrix3f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
return *this;
}
inline CVector3f& operator[](int i)
{
assert(0 <= i && i < 3);
return vec[i];
}
CVector3f operator*(const CVector3f& other) const {
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>();
}
inline const CVector3f& operator[](int i) const
{
assert(0 <= i && i < 3);
return vec[i];
}
CVector3f& operator[](size_t i) {
assert(i < 3);
return m[i];
}
inline CMatrix3f orthonormalized() const
{
CMatrix3f ret;
ret[0] = vec[0].normalized();
ret[2] = ret[0].cross(vec[1]);
ret[2].normalize();
ret[1] = ret[2].cross(ret[0]);
return ret;
}
const CVector3f& operator[](size_t i) const {
assert(i < 3);
return m[i];
}
inline bool operator==(const CMatrix3f& other) const
{
return vec[0] == other.vec[0] && vec[1] == other.vec[1] && vec[2] == other.vec[2];
}
CMatrix3f orthonormalized() const {
CMatrix3f ret;
ret[0] = m[0].normalized();
ret[2] = ret[0].cross(m[1]);
ret[2].normalize();
ret[1] = ret[2].cross(ret[0]);
return ret;
}
static const CMatrix3f skIdentityMatrix3f;
bool operator==(const CMatrix3f& other) const {
return m[0] == other.m[0] && m[1] == other.m[1] && m[2] == other.m[2];
}
void transpose();
void transposeSSE3();
CMatrix3f transposed() const;
CMatrix3f transposedSSE3() const;
static const CMatrix3f skIdentityMatrix3f;
inline void invert() { *this = inverted(); }
CMatrix3f inverted() const;
void transpose();
void addScaledMatrix(const CMatrix3f& other, float scale)
{
CVector3f scaleVec(scale);
vec[0] += other.vec[0] * scaleVec;
vec[1] += other.vec[1] * scaleVec;
vec[2] += other.vec[2] * scaleVec;
}
CMatrix3f transposed() const;
static inline CMatrix3f RotateX(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{1.f, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, cosT, sinT, 0.f}},
TVectorUnion{{0.f, -sinT, cosT, 0.f}});
}
void invert() { *this = inverted(); }
static inline CMatrix3f RotateY(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{cosT, 0.f, -sinT, 0.f}},
TVectorUnion{{0.f, 1.f, 0.f, 0.f}},
TVectorUnion{{sinT, 0.f, cosT, 0.f}});
}
CMatrix3f inverted() const;
static inline CMatrix3f RotateZ(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{cosT, sinT, 0.f, 0.f}},
TVectorUnion{{-sinT, cosT, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, 1.f, 0.f}});
}
void addScaledMatrix(const CMatrix3f& other, float scale) {
CVector3f scaleVec(scale);
m[0] += other.m[0] * scaleVec;
m[1] += other.m[1] * scaleVec;
m[2] += other.m[2] * scaleVec;
}
float determinant() const
{
return
m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) +
m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) +
m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
}
static CMatrix3f RotateX(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
simd<float>{0.f, cosT, sinT, 0.f},
simd<float>{0.f, -sinT, cosT, 0.f});
}
union {
float m[3][4]; /* 4th row for union-alignment */
struct
{
CVector3f vec[3];
};
};
static CMatrix3f RotateY(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
simd<float>{0.f, 1.f, 0.f, 0.f},
simd<float>{sinT, 0.f, cosT, 0.f});
}
static CMatrix3f RotateZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
simd<float>{-sinT, cosT, 0.f, 0.f},
simd<float>{0.f, 0.f, 1.f, 0.f});
}
float determinant() const {
return
m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) +
m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) +
m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
}
CVector3f m[3];
};
static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs)
{
#if __SSE__
unsigned i;
TVectorUnion resVec[3];
for (i = 0; i < 3; ++i)
{
resVec[i].mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
_mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
_mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
resVec[i].v[3] = 0.0;
}
return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
#else
return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
#endif
static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs) {
simd<float> v[3];
for (int i = 0; i < 3; ++i)
v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>();
return CMatrix3f(v[0], v[1], v[2]);
}
}

View File

@ -1,176 +1,116 @@
#pragma once
#include "zeus/CMatrix3f.hpp"
#include "zeus/CVector4f.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
{
class alignas(16) CMatrix4f
{
namespace zeus {
class CMatrix4f {
public:
static const CMatrix4f skIdentityMatrix4f;
ZE_DECLARE_ALIGNED_ALLOCATOR();
explicit CMatrix4f(bool zero = false)
{
memset(m, 0, sizeof(m));
static const CMatrix4f skIdentityMatrix4f;
if (!zero)
{
m[0][0] = 1.0;
m[1][1] = 1.0;
m[2][2] = 1.0;
m[3][3] = 1.0;
}
explicit CMatrix4f(bool zero = false) {
if (!zero) {
m[0][0] = 1.0;
m[1][1] = 1.0;
m[2][2] = 1.0;
m[3][3] = 1.0;
}
CMatrix4f(float m00, float m01, float m02, float m03, float m10, float m11, float m12, float m13, float m20, float m21,
float m22, float m23, float m30, float m31, float m32, float m33)
{
m[0][0] = m00, m[1][0] = m01, m[2][0] = m02, m[3][0] = m03;
m[0][1] = m10, m[1][1] = m11, m[2][1] = m12, m[3][1] = m13;
m[0][2] = m20, m[1][2] = m21, m[2][2] = m22, m[3][2] = m23;
m[0][3] = m30, m[1][3] = m31, m[2][3] = m32, m[3][3] = m33;
}
CMatrix4f(const CVector3f& scaleVec)
{
memset(m, 0, sizeof(m));
m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2];
m[3][3] = 1.0f;
}
CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3)
{
vec[0] = r0;
vec[1] = r1;
vec[2] = r2;
vec[3] = r3;
}
CMatrix4f(const CMatrix4f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = other.vec[3];
}
#if __SSE__
CMatrix4f(const __m128& r0, const __m128& r1, const __m128& r2, const __m128& r3)
{
vec[0].mVec128 = r0;
vec[1].mVec128 = r1;
vec[2].mVec128 = r2;
vec[3].mVec128 = r3;
}
#endif
CMatrix4f(const CMatrix3f& other)
{
memset(m, 0, sizeof(m));
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = CVector4f(0, 0, 0, 1.0f);
}
inline CMatrix4f& operator=(const CMatrix4f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = other.vec[3];
return *this;
}
inline CVector4f operator*(const CVector4f& other) const
{
#if __SSE__
TVectorUnion res;
res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)),
_mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))),
_mm_add_ps(_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)),
_mm_mul_ps(vec[3].mVec128, ze_splat_ps(other.mVec128, 3))));
}
return CVector4f(res.mVec128);
#else
return CVector4f(m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2] + m[3][0] * other.v[3],
m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2] + m[3][1] * other.v[3],
m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2] + m[3][2] * other.v[3],
m[0][3] * other.v[0] + m[1][3] * other.v[1] + m[2][3] * other.v[2] + m[3][3] * other.v[3]);
#endif
}
CMatrix4f(float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23,
float m30, float m31, float m32, float m33)
: m{{m00, m10, m20, m30},
{m01, m11, m21, m31},
{m02, m12, m22, m32},
{m03, m13, m23, m33}} {}
inline CVector4f& operator[](int i)
{
assert(0 <= i && i < 4);
return vec[i];
}
CMatrix4f(const CVector3f& scaleVec) {
m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2];
m[3][3] = 1.0f;
}
inline const CVector4f& operator[](int i) const
{
assert(0 <= i && i < 4);
return vec[i];
}
CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3) {
m[0] = r0;
m[1] = r1;
m[2] = r2;
m[3] = r3;
}
CMatrix4f transposed() const;
CMatrix4f transposedSSE3() const;
CMatrix4f(const CMatrix4f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
m[3] = other.m[3];
}
inline CVector3f multiplyOneOverW(const CVector3f& point) const
{
CVector4f xfVec = *this * point;
return xfVec.toVec3f() / xfVec.w;
}
CMatrix4f(const simd<float>& r0, const simd<float>& r1, const simd<float>& r2, const simd<float>& r3) {
m[0].mSimd = r0;
m[1].mSimd = r1;
m[2].mSimd = r2;
m[3].mSimd = r3;
}
inline CVector3f multiplyOneOverW(const CVector3f& point, float& wOut) const
{
CVector4f xfVec = *this * point;
wOut = xfVec.w;
return xfVec.toVec3f() / xfVec.w;
}
CMatrix4f(const CMatrix3f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
m[3] = CVector4f(0.f, 0.f, 0.f, 1.0f);
}
union {
float m[4][4];
struct
{
CVector4f vec[4];
};
};
CMatrix4f& operator=(const CMatrix4f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
m[3] = other.m[3];
return *this;
}
CVector4f operator*(const CVector4f& other) const {
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>() +
m[3].mSimd * other.mSimd.shuffle<3, 3, 3, 3>();
}
CVector4f& operator[](size_t i) {
assert(i < 4);
return m[i];
}
const CVector4f& operator[](size_t i) const {
assert(i < 4);
return m[i];
}
CMatrix4f transposed() const;
CVector3f multiplyOneOverW(const CVector3f& point) const {
CVector4f xfVec = *this * point;
return xfVec.toVec3f() / xfVec.w();
}
CVector3f multiplyOneOverW(const CVector3f& point, float& wOut) const {
CVector4f xfVec = *this * point;
wOut = xfVec.w();
return xfVec.toVec3f() / xfVec.w();
}
CVector4f m[4];
};
static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs)
{
CMatrix4f ret;
#if __SSE__
unsigned i;
for (i = 0; i < 4; ++i)
{
ret.vec[i].mVec128 = _mm_add_ps(
_mm_add_ps(_mm_add_ps(_mm_mul_ps(lhs.vec[0].mVec128,
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(0, 0, 0, 0))),
_mm_mul_ps(lhs.vec[1].mVec128,
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(1, 1, 1, 1)))),
_mm_mul_ps(lhs.vec[2].mVec128,
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(2, 2, 2, 2)))),
_mm_mul_ps(lhs.vec[3].mVec128, _mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(3, 3, 3, 3))));
}
#else
ret.m[0][0] = lhs.m[0][0] * rhs.m[0][0] + lhs.m[1][0] * rhs.m[0][1] + lhs.m[2][0] * rhs.m[0][2] + lhs.m[3][0] * rhs.m[0][3];
ret.m[1][0] = lhs.m[0][0] * rhs.m[1][0] + lhs.m[1][0] * rhs.m[1][1] + lhs.m[2][0] * rhs.m[1][2] + lhs.m[3][0] * rhs.m[1][3];
ret.m[2][0] = lhs.m[0][0] * rhs.m[2][0] + lhs.m[1][0] * rhs.m[2][1] + lhs.m[2][0] * rhs.m[2][2] + lhs.m[3][0] * rhs.m[2][3];
ret.m[3][0] = lhs.m[0][0] * rhs.m[3][0] + lhs.m[1][0] * rhs.m[3][1] + lhs.m[2][0] * rhs.m[3][2] + lhs.m[3][0] * rhs.m[3][3];
ret.m[0][1] = lhs.m[0][1] * rhs.m[0][0] + lhs.m[1][1] * rhs.m[0][1] + lhs.m[2][1] * rhs.m[0][2] + lhs.m[3][1] * rhs.m[0][3];
ret.m[1][1] = lhs.m[0][1] * rhs.m[1][0] + lhs.m[1][1] * rhs.m[1][1] + lhs.m[2][1] * rhs.m[1][2] + lhs.m[3][1] * rhs.m[1][3];
ret.m[2][1] = lhs.m[0][1] * rhs.m[2][0] + lhs.m[1][1] * rhs.m[2][1] + lhs.m[2][1] * rhs.m[2][2] + lhs.m[3][1] * rhs.m[2][3];
ret.m[3][1] = lhs.m[0][1] * rhs.m[3][0] + lhs.m[1][1] * rhs.m[3][1] + lhs.m[2][1] * rhs.m[3][2] + lhs.m[3][1] * rhs.m[3][3];
ret.m[0][2] = lhs.m[0][2] * rhs.m[0][0] + lhs.m[1][2] * rhs.m[0][1] + lhs.m[2][2] * rhs.m[0][2] + lhs.m[3][2] * rhs.m[0][3];
ret.m[1][2] = lhs.m[0][2] * rhs.m[1][0] + lhs.m[1][2] * rhs.m[1][1] + lhs.m[2][2] * rhs.m[1][2] + lhs.m[3][2] * rhs.m[1][3];
ret.m[2][2] = lhs.m[0][2] * rhs.m[2][0] + lhs.m[1][2] * rhs.m[2][1] + lhs.m[2][2] * rhs.m[2][2] + lhs.m[3][2] * rhs.m[2][3];
ret.m[3][2] = lhs.m[0][2] * rhs.m[3][0] + lhs.m[1][2] * rhs.m[3][1] + lhs.m[2][2] * rhs.m[3][2] + lhs.m[3][2] * rhs.m[3][3];
ret.m[0][3] = lhs.m[0][3] * rhs.m[0][0] + lhs.m[1][3] * rhs.m[0][1] + lhs.m[2][3] * rhs.m[0][2] + lhs.m[3][3] * rhs.m[0][3];
ret.m[1][3] = lhs.m[0][3] * rhs.m[1][0] + lhs.m[1][3] * rhs.m[1][1] + lhs.m[2][3] * rhs.m[1][2] + lhs.m[3][3] * rhs.m[1][3];
ret.m[2][3] = lhs.m[0][3] * rhs.m[2][0] + lhs.m[1][3] * rhs.m[2][1] + lhs.m[2][3] * rhs.m[2][2] + lhs.m[3][3] * rhs.m[2][3];
ret.m[3][3] = lhs.m[0][3] * rhs.m[3][0] + lhs.m[1][3] * rhs.m[3][1] + lhs.m[2][3] * rhs.m[2][2] + lhs.m[3][3] * rhs.m[3][3];
#endif
return ret;
static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs) {
simd<float> v[4];
for (int i = 0; i < 4; ++i)
v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>() +
lhs.m[3].mSimd * rhs[i].mSimd.shuffle<3, 3, 3, 3>();
return CMatrix4f(v[0], v[1], v[2], v[3]);
}
}

View File

@ -5,51 +5,46 @@
#include "zeus/CAABox.hpp"
#include "zeus/CMRay.hpp"
namespace zeus
{
class alignas(16) COBBox
{
namespace zeus {
class COBBox {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
#if ZE_ATHENA_TYPES
void readBig(athena::io::IStreamReader& in)
{
transform.read34RowMajor(in);
extents.readBig(in);
}
static COBBox ReadBig(athena::io::IStreamReader& in)
{
COBBox out;
out.readBig(in);
return out;
}
void readBig(athena::io::IStreamReader& in) {
transform.read34RowMajor(in);
extents.readBig(in);
}
static COBBox ReadBig(athena::io::IStreamReader& in) {
COBBox out;
out.readBig(in);
return out;
}
#endif
CTransform transform;
CVector3f extents;
CTransform transform;
CVector3f extents;
COBBox() {}
COBBox() = default;
COBBox(const CAABox& aabb) : extents(aabb.extents()) { transform.origin = aabb.center(); }
COBBox(const CAABox& aabb) : extents(aabb.extents()) { transform.origin = aabb.center(); }
COBBox(const CTransform& xf, const CVector3f& extents) : transform(xf), extents(extents) {}
COBBox(const CTransform& xf, const CVector3f& extents) : transform(xf), extents(extents) {}
CAABox calculateAABox(const CTransform& worldXf = CTransform()) const;
CAABox calculateAABox(const CTransform& worldXf = CTransform()) const;
static COBBox FromAABox(const CAABox& box, const CTransform& xf)
{
const CVector3f extents = box.max - box.center();
const CTransform newXf = CTransform::Translate(box.center()) * xf;
return COBBox(newXf, extents);
}
static COBBox FromAABox(const CAABox& box, const CTransform& xf) {
const CVector3f extents = box.max - box.center();
const CTransform newXf = CTransform::Translate(box.center()) * xf;
return COBBox(newXf, extents);
}
bool OBBIntersectsBox(const COBBox& other) const;
bool OBBIntersectsBox(const COBBox& other) const;
bool AABoxIntersectsBox(const CAABox& other)
{
return OBBIntersectsBox(FromAABox(other, CTransform::Identity()));
}
bool AABoxIntersectsBox(const CAABox& other) {
return OBBIntersectsBox(FromAABox(other, CTransform::Identity()));
}
};
}

View File

@ -4,72 +4,67 @@
#include "zeus/CVector3f.hpp"
#include "zeus/Math.hpp"
namespace zeus
{
class alignas(16) CPlane
{
namespace zeus {
class CPlane {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CPlane() : mSimd(1.0, 0.f, 0.f, 0.f) {}
inline CPlane() : a(1.f), b(0.f), c(0.f), d(0.f) {}
CPlane(float a, float b, float c, float d) : a(a), b(b), c(c), d(d) {}
CPlane(const CVector3f& a, const CVector3f& b, const CVector3f& c)
{
vec = (b - a).cross(c - a).normalized();
d = a.dot(vec);
}
CPlane(float a, float b, float c, float d) : mSimd(a, b, c, d) {}
CPlane(const CVector3f& point, float displacement)
{
#if __SSE__
mVec128 = point.mVec128;
#else
a = point[0];
b = point[1];
c = point[2];
#endif
d = displacement;
}
CPlane(const CVector3f& a, const CVector3f& b, const CVector3f& c) {
mSimd = (b - a).cross(c - a).normalized().mSimd;
mSimd[3] = a.dot(normal());
}
float clipLineSegment(const CVector3f& a, const CVector3f& b)
{
float mag = (b-a).dot(vec);
float dis = (-(vec.y - d)) / mag;
return clamp(0.0f, dis, 1.0f);
}
CPlane(const CVector3f& point, float displacement) {
mSimd = point.mSimd;
mSimd[3] = displacement;
}
inline void normalize()
{
float nd = d;
float mag = vec.magnitude();
mag = 1.f / mag;
vec = vec * mag;
d = nd * mag;
}
float clipLineSegment(const CVector3f& a, const CVector3f& b) {
float mag = (b - a).dot(normal());
float dis = (-(y() - d())) / mag;
return clamp(0.0f, dis, 1.0f);
}
float pointToPlaneDist(const CVector3f& pos) const
{
return pos.dot(vec) - d;
}
void normalize() {
float nd = d();
auto norm = normal();
float mag = norm.magnitude();
mag = 1.f / mag;
mSimd = (norm * mag).mSimd;
mSimd[3] = nd * mag;
}
bool rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const;
float pointToPlaneDist(const CVector3f& pos) const {
return pos.dot(normal()) - d();
}
const CVector3f& normal() const { return vec; }
bool rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const;
inline float& operator[](size_t idx) { assert(idx < 4); return p[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return p[idx]; }
CVector3f normal() const { return mSimd; }
union {
struct
{
float a, b, c, d;
};
float p[4];
CVector3f vec;
#ifdef __SSE__
__m128 mVec128;
#endif
};
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
float d() const { return mSimd[3]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
simd<float>::reference d() { return mSimd[3]; }
zeus::simd<float> mSimd;
};
}

View File

@ -6,121 +6,117 @@
#include <cstdio>
#include <cmath>
namespace zeus
{
enum class EProjType
{
None = 0,
Orthographic = 1,
Perspective = 2
namespace zeus {
enum class EProjType {
None = 0,
Orthographic = 1,
Perspective = 2
};
class SProjOrtho
{
class SProjOrtho {
public:
float top, bottom, left, right, znear, zfar;
explicit SProjOrtho(float p_top = 1.0f, float p_bottom = -1.0f, float p_left = -1.0f, float p_right = 1.0f,
float p_near = 1.0f, float p_far = -1.0f)
: top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far)
{
}
float top, bottom, left, right, znear, zfar;
explicit SProjOrtho(float p_top = 1.0f, float p_bottom = -1.0f, float p_left = -1.0f, float p_right = 1.0f,
float p_near = 1.0f, float p_far = -1.0f)
: top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far) {
}
};
struct SProjPersp
{
float fov, aspect, znear, zfar;
SProjPersp(float p_fov = degToRad(55.0f), float p_aspect = 1.0f, float p_near = 0.1f, float p_far = 4096.f)
: fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far)
{
}
struct SProjPersp {
float fov, aspect, znear, zfar;
SProjPersp(float p_fov = degToRad(55.0f), float p_aspect = 1.0f, float p_near = 0.1f, float p_far = 4096.f)
: fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far) {
}
};
extern const SProjOrtho kOrthoIdentity;
class alignas(16) CProjection
{
void _updateCachedMatrix();
class CProjection {
void _updateCachedMatrix();
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CProjection() {
m_projType = EProjType::Orthographic;
m_ortho = SProjOrtho();
m_mtx = CMatrix4f::skIdentityMatrix4f;
}
CProjection()
{
m_projType = EProjType::Orthographic;
m_ortho = SProjOrtho();
m_mtx = CMatrix4f::skIdentityMatrix4f;
}
CProjection(const CProjection& other) { *this = other; }
CProjection(const SProjOrtho& ortho) { setOrtho(ortho); }
CProjection(const SProjPersp& persp) { setPersp(persp); }
CProjection(const CProjection& other) { *this = other; }
inline CProjection& operator=(const CProjection& other)
{
if (this != &other)
{
m_projType = other.m_projType;
m_ortho = other.m_ortho;
m_mtx = other.m_mtx;
}
return *this;
}
CProjection(const SProjOrtho& ortho) { setOrtho(ortho); }
inline void setOrtho(const SProjOrtho& ortho)
{
m_projType = EProjType::Orthographic;
m_ortho = ortho;
_updateCachedMatrix();
}
inline void setPersp(const SProjPersp& persp)
{
m_projType = EProjType::Perspective;
m_persp = persp;
_updateCachedMatrix();
}
CProjection(const SProjPersp& persp) { setPersp(persp); }
inline EProjType getType() const { return m_projType; }
inline const SProjOrtho& getOrtho() const
{
if (m_projType != EProjType::Orthographic)
{
std::fprintf(stderr, "attempted to access orthographic structure of non-ortho projection");
std::abort();
}
return m_ortho;
}
inline const SProjPersp& getPersp() const
{
if (m_projType != EProjType::Perspective)
{
std::fprintf(stderr, "attempted to access perspective structure of non-persp projection");
std::abort();
}
return m_persp;
CProjection& operator=(const CProjection& other) {
if (this != &other) {
m_projType = other.m_projType;
m_ortho = other.m_ortho;
m_mtx = other.m_mtx;
}
return *this;
}
inline const CMatrix4f& getCachedMatrix() const { return m_mtx; }
void setOrtho(const SProjOrtho& ortho) {
m_projType = EProjType::Orthographic;
m_ortho = ortho;
_updateCachedMatrix();
}
void setPersp(const SProjPersp& persp) {
m_projType = EProjType::Perspective;
m_persp = persp;
_updateCachedMatrix();
}
EProjType getType() const { return m_projType; }
const SProjOrtho& getOrtho() const {
#ifndef NDEBUG
if (m_projType != EProjType::Orthographic) {
std::fprintf(stderr, "attempted to access orthographic structure of non-ortho projection");
std::abort();
}
#endif
return m_ortho;
}
const SProjPersp& getPersp() const {
#ifndef NDEBUG
if (m_projType != EProjType::Perspective) {
std::fprintf(stderr, "attempted to access perspective structure of non-persp projection");
std::abort();
}
#endif
return m_persp;
}
const CMatrix4f& getCachedMatrix() const { return m_mtx; }
protected:
/* Projection type */
EProjType m_projType;
/* Projection type */
EProjType m_projType;
/* Projection intermediate */
union {
/* Projection intermediate */
union {
#ifdef _MSC_VER
struct
{
SProjOrtho m_ortho;
};
struct
{
SProjPersp m_persp;
};
#else
struct
{
SProjOrtho m_ortho;
SProjPersp m_persp;
#endif
};
struct
{
SProjPersp m_persp;
};
#else
SProjOrtho m_ortho;
SProjPersp m_persp;
#endif
};
/* Cached projection matrix */
CMatrix4f m_mtx;
/* Cached projection matrix */
CMatrix4f m_mtx;
};
}

View File

@ -8,262 +8,303 @@
#include "zeus/Math.hpp"
#include "zeus/CRelAngle.hpp"
#include "zeus/CTransform.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
namespace zeus
{
namespace zeus {
static inline float normalize_angle(float angle)
{
if (angle > M_PIF)
angle -= 2.f * M_PIF;
else if (angle < -M_PIF)
angle += 2.f * M_PIF;
static float normalize_angle(float angle) {
if (angle > M_PIF)
angle -= 2.f * M_PIF;
else if (angle < -M_PIF)
angle += 2.f * M_PIF;
return angle;
return angle;
}
class CNUQuaternion;
/** Unit quaternion, used for all quaternion arithmetic */
class alignas(16) CQuaternion
{
#if __atdna__ && ZE_ATHENA_TYPES
float clangVec __attribute__((__vector_size__(16)));
#endif
class CQuaternion {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CQuaternion() : mSimd(1.f, 0.f, 0.f, 0.f) {}
CQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
CQuaternion(float xi, float yi, float zi) { fromVector3f(CVector3f(xi, yi, zi)); }
CQuaternion(float wi, const CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
mSimd[0] = wi;
}
template <typename T>
CQuaternion(const simd<T>& s) : mSimd(s) {}
CQuaternion() : w(1.0f), x(0.0f), y(0.0f), z(0.0f) {}
CQuaternion(float wi, float xi, float yi, float zi) : w(wi), x(xi), y(yi), z(zi) {}
CQuaternion(float xi, float yi, float zi) { fromVector3f(CVector3f(xi, yi, zi)); }
CQuaternion(float wi, const CVector3f& vec) : w(wi), x(vec.x), y(vec.y), z(vec.z) {}
#if ZE_ATHENA_TYPES
inline void readBig(athena::io::IStreamReader& input)
{
w = input.readFloatBig();
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
}
CQuaternion(const atVec4f& vec)
{
#if __SSE__
mVec128 = vec.mVec128;
#else
x = vec.vec[1];
y = vec.vec[2];
z = vec.vec[3];
w = vec.vec[0];
#endif
}
operator atVec4f&()
{
return *reinterpret_cast<atVec4f*>(v);
}
operator const atVec4f&() const
{
return *reinterpret_cast<const atVec4f*>(v);
}
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = input.readFloatBig();
mSimd.copy_from(f);
}
CQuaternion(const atVec4f& vec) : mSimd(vec.simd) {}
operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
}
operator const atVec4f&() const {
return *reinterpret_cast<const atVec4f*>(this);
}
#endif
CQuaternion(const CMatrix3f& mat);
CQuaternion(const CVector3f& vec) { fromVector3f(vec); }
CQuaternion(const CVector4f& vec)
{
#if __SSE__
mVec128 = vec.mVec128;
#else
x = vec[1];
y = vec[2];
z = vec[3];
w = vec[0];
#endif
}
CQuaternion(const CMatrix3f& mat);
CQuaternion(const CVector3f& vecA, const CVector3f& vecB)
{
CVector3f vecAN = vecA.normalized();
CVector3f vecBN = vecB.normalized();
CVector3f w = vecAN.cross(vecBN);
*this = CQuaternion(1.f + vecAN.dot(vecBN), w.x, w.y, w.z).normalized();
}
CQuaternion(const CVector3f& vec) { fromVector3f(vec); }
void fromVector3f(const CVector3f& vec);
CQuaternion(const CVector4f& vec) : mSimd(vec.mSimd) {}
CQuaternion& operator=(const CQuaternion& q);
CQuaternion operator+(const CQuaternion& q) const;
CQuaternion operator-(const CQuaternion& q) const;
CQuaternion operator*(const CQuaternion& q) const;
CQuaternion operator/(const CQuaternion& q) const;
CQuaternion operator*(float scale) const;
CQuaternion operator/(float scale) const;
CQuaternion operator-() const;
const CQuaternion& operator+=(const CQuaternion& q);
const CQuaternion& operator-=(const CQuaternion& q);
const CQuaternion& operator*=(const CQuaternion& q);
const CQuaternion& operator*=(float scale);
const CQuaternion& operator/=(float scale);
float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return w * w + x * x + y * y + z * z; }
void normalize() { *this /= magnitude(); }
CQuaternion normalized() const { return *this / magnitude(); }
void invert();
CQuaternion inverse() const;
CQuaternion(const CVector3f& vecA, const CVector3f& vecB) {
CVector3f vecAN = vecA.normalized();
CVector3f vecBN = vecB.normalized();
CVector3f w = vecAN.cross(vecBN);
*this = CQuaternion(1.f + vecAN.dot(vecBN), w).normalized();
}
/**
* @brief Set the rotation using axis angle notation
* @param axis The axis to rotate around
* @param angle The magnitude of the rotation in radians
* @return
*/
static inline CQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle)
{
return CQuaternion(std::cos(angle / 2.f), axis * std::sin(angle / 2.f));
}
void fromVector3f(const CVector3f& vec);
void rotateX(const CRelAngle& angle) { *this *= fromAxisAngle({1.0f, 0.0f, 0.0f}, angle); }
void rotateY(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 1.0f, 0.0f}, angle); }
void rotateZ(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 0.0f, 1.0f}, angle); }
CQuaternion& operator=(const CQuaternion& q);
static inline CVector3f rotate(const CQuaternion& rotation, const CAxisAngle& v)
{
CQuaternion q = rotation * v;
q *= rotation.inverse();
CQuaternion operator+(const CQuaternion& q) const;
return {q.x, q.y, q.z};
}
CQuaternion operator-(const CQuaternion& q) const;
static CQuaternion lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng);
CQuaternion operator*(const CQuaternion& q) const;
CVector3f transform(const CVector3f& v) const
{
CQuaternion r(0.f, v);
return (*this * r * inverse()).getImaginary();
}
CQuaternion operator/(const CQuaternion& q) const;
CQuaternion log() const;
CQuaternion operator*(float scale) const;
CQuaternion exp() const;
CQuaternion operator/(float scale) const;
inline CTransform toTransform() const { return CTransform(CMatrix3f(*this)); }
inline CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
inline float dot(const CQuaternion& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
}
CQuaternion operator-() const;
static CQuaternion lerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerpShort(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion nlerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1);
static CQuaternion clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle);
const CQuaternion& operator+=(const CQuaternion& q);
inline float roll() const { return std::atan2(2.f * (x * y + w * z), w * w + x * x - y * y - z * z); }
const CQuaternion& operator-=(const CQuaternion& q);
inline float pitch() const { return std::atan2(2.f * (y * z + w * x), w * w - x * x - y * y + z * z); }
const CQuaternion& operator*=(const CQuaternion& q);
inline float yaw() const { return std::asin(-2.f * (x * z - w * y)); }
const CQuaternion& operator*=(float scale);
CQuaternion buildEquivalent() const;
const CQuaternion& operator/=(float scale);
zeus::CVector3f getImaginary() const { return {x, y, z}; }
void setImaginary(const zeus::CVector3f& i) { x = i.x; y = i.y; z = i.z; }
float magnitude() const { return std::sqrt(magSquared()); }
CRelAngle angleFrom(const zeus::CQuaternion& other);
float magSquared() const { return mSimd.dot4(mSimd); }
inline float& operator[](size_t idx) { assert(idx < 4); return (&w)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&w)[idx]; }
void normalize() { *this /= magnitude(); }
union
{
__m128 mVec128;
struct
{
float w, x, y, z;
};
float v[4];
};
CQuaternion normalized() const { return *this / magnitude(); }
static const CQuaternion skNoRotation;
void invert();
static CQuaternion fromNUQuaternion(const CNUQuaternion& q);
CQuaternion inverse() const;
/**
* @brief Set the rotation using axis angle notation
* @param axis The axis to rotate around
* @param angle The magnitude of the rotation in radians
* @return
*/
static CQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) {
return CQuaternion(std::cos(angle / 2.f), axis * std::sin(angle / 2.f));
}
void rotateX(const CRelAngle& angle) { *this *= fromAxisAngle({1.0f, 0.0f, 0.0f}, angle); }
void rotateY(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 1.0f, 0.0f}, angle); }
void rotateZ(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 0.0f, 1.0f}, angle); }
static CVector3f rotate(const CQuaternion& rotation, const CAxisAngle& v) {
CQuaternion q = rotation * v;
q *= rotation.inverse();
return {q.mSimd.shuffle<1, 2, 3, 3>()};
}
static CQuaternion lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng);
CVector3f transform(const CVector3f& v) const {
CQuaternion r(0.f, v);
return (*this * r * inverse()).getImaginary();
}
CQuaternion log() const;
CQuaternion exp() const;
CTransform toTransform() const { return CTransform(CMatrix3f(*this)); }
CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
float dot(const CQuaternion& rhs) const {
return mSimd.dot4(rhs.mSimd);
}
static CQuaternion lerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerpShort(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion nlerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1);
static CQuaternion clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle);
float roll() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[1] * f[2] + f[0] * f[3]), f[0] * f[0] + f[1] * f[1] - f[2] * f[2] - f[3] * f[3]);
}
float pitch() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[2] * f[3] + f[0] * f[1]), f[0] * f[0] - f[1] * f[1] - f[2] * f[2] + f[3] * f[3]);
}
float yaw() const {
simd_floats f(mSimd);
return std::asin(-2.f * (f[1] * f[3] - f[0] * f[2]));
}
CQuaternion buildEquivalent() const;
zeus::CVector3f getImaginary() const { return mSimd.shuffle<1, 2, 3, 3>(); }
void setImaginary(const zeus::CVector3f& i) {
x() = i.x();
y() = i.y();
z() = i.z();
}
CRelAngle angleFrom(const zeus::CQuaternion& other);
simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float w() const { return mSimd[0]; }
float x() const { return mSimd[1]; }
float y() const { return mSimd[2]; }
float z() const { return mSimd[3]; }
simd<float>::reference w() { return mSimd[0]; }
simd<float>::reference x() { return mSimd[1]; }
simd<float>::reference y() { return mSimd[2]; }
simd<float>::reference z() { return mSimd[3]; }
simd<float> mSimd;
static const CQuaternion skNoRotation;
static CQuaternion fromNUQuaternion(const CNUQuaternion& q);
};
/** Non-unit quaternion, no guarantee that it's normalized.
* Converting to CQuaternion will perform normalize operation.
*/
class alignas(16) CNUQuaternion
{
class CNUQuaternion {
public:
CNUQuaternion() : w(1.0f), x(0.0f), y(0.0f), z(0.0f) {}
CNUQuaternion(float wi, float xi, float yi, float zi) : w(wi), x(xi), y(yi), z(zi) {}
CNUQuaternion(float win, const zeus::CVector3f& vec) { w = win; x = vec.x; y = vec.y; z = vec.z; }
CNUQuaternion(const CQuaternion& other) { w = other.w; x = other.x; y = other.y; z = other.z; }
CNUQuaternion(const CMatrix3f& mtx) : CNUQuaternion(CQuaternion(mtx)) {}
static inline CNUQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle)
{
return CNUQuaternion(CQuaternion::fromAxisAngle(axis, angle));
}
CNUQuaternion() : mSimd(1.f, 0.f, 0.f, 0.f) {}
float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return w * w + x * x + y * y + z * z; }
void normalize()
{
float magDiv = 1.f / magnitude();
w *= magDiv;
x *= magDiv;
y *= magDiv;
z *= magDiv;
}
CNUQuaternion normalized() const
{
float magDiv = 1.f / magnitude();
return { w * magDiv, x * magDiv, y * magDiv, z * magDiv };
}
CNUQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
CNUQuaternion operator*(const CNUQuaternion& q) const;
CNUQuaternion operator*(float f) const;
const CNUQuaternion& operator+=(const CNUQuaternion& q);
CNUQuaternion(float win, const zeus::CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
w() = win;
}
inline float& operator[](size_t idx) { assert(idx < 4); return (&w)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&w)[idx]; }
CNUQuaternion(const CQuaternion& other) : mSimd(other.mSimd) {}
union
{
__m128 mVec128;
struct
{
float w, x, y, z;
};
};
CNUQuaternion(const CMatrix3f& mtx) : CNUQuaternion(CQuaternion(mtx)) {}
CNUQuaternion(const simd<float>& s) : mSimd(s) {}
static CNUQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) {
return CNUQuaternion(CQuaternion::fromAxisAngle(axis, angle));
}
float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return mSimd.dot4(mSimd); }
void normalize() {
float magDiv = 1.f / magnitude();
mSimd *= magDiv;
}
CNUQuaternion normalized() const {
float magDiv = 1.f / magnitude();
return mSimd * simd<float>(magDiv);
}
CNUQuaternion operator*(const CNUQuaternion& q) const;
CNUQuaternion operator*(float f) const;
const CNUQuaternion& operator+=(const CNUQuaternion& q);
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float w() const { return mSimd[0]; }
float x() const { return mSimd[1]; }
float y() const { return mSimd[2]; }
float z() const { return mSimd[3]; }
simd<float>::reference w() { return mSimd[0]; }
simd<float>::reference x() { return mSimd[1]; }
simd<float>::reference y() { return mSimd[2]; }
simd<float>::reference z() { return mSimd[3]; }
simd<float> mSimd;
};
inline CQuaternion CQuaternion::fromNUQuaternion(const CNUQuaternion& q)
{
auto norm = q.normalized();
return { norm.w, norm.x, norm.y, norm.z };
inline CQuaternion CQuaternion::fromNUQuaternion(const CNUQuaternion& q) {
auto norm = q.normalized();
return norm.mSimd;
}
CQuaternion operator+(float lhs, const CQuaternion& rhs);
CQuaternion operator-(float lhs, const CQuaternion& rhs);
CQuaternion operator*(float lhs, const CQuaternion& rhs);
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs);
}

View File

@ -1,32 +1,30 @@
#pragma once
#include "zeus/CVector2f.hpp"
namespace zeus
{
class CRectangle
{
namespace zeus {
class CRectangle {
public:
CRectangle() {}
CRectangle(float x, float y, float w, float h) : position(x, y), size(w, h) {}
CRectangle() {}
inline bool contains(const CVector2f& point) const
{
if (point.x < position.x || point.x > position.x + size.x)
return false;
if (point.y < position.y || point.y > position.y + size.y)
return false;
CRectangle(float x, float y, float w, float h) : position(x, y), size(w, h) {}
return true;
}
bool contains(const CVector2f& point) const {
if (point.x() < position.x() || point.x() > position.x() + size.x())
return false;
if (point.y() < position.y() || point.y() > position.y() + size.y())
return false;
inline bool intersects(const CRectangle& rect) const
{
return !(position.x > rect.position.x + rect.size.x || rect.position.x > position.x + size.x ||
position.y > rect.position.y + rect.size.y || rect.position.y > position.y + size.y);
}
return true;
}
CVector2f position;
CVector2f size;
bool intersects(const CRectangle& rect) const {
return !(position.x() > rect.position.x() + rect.size.x() || rect.position.x() > position.x() + size.x() ||
position.y() > rect.position.y() + rect.size.y() || rect.position.y() > position.y() + size.y());
}
CVector2f position;
CVector2f size;
};
}

View File

@ -4,51 +4,92 @@
#include "zeus/Math.hpp"
#include <cmath>
namespace zeus
{
namespace zeus {
/**
* @brief The CRelAngle class represents relative angle in radians
*/
struct CRelAngle
{
float angle = 0.f;
struct CRelAngle {
float angle = 0.f;
static float MakeRelativeAngle(float angle)
{
float absAngle = std::fabs(angle);
if (absAngle == 2.f * M_PIF)
return std::copysign(absAngle, angle);
float ret = absAngle - std::floor(absAngle / (2.f * M_PIF)) * (2.f * M_PIF);
return std::copysign(ret, angle);
}
static float MakeRelativeAngle(float angle) {
float absAngle = std::fabs(angle);
if (absAngle == 2.f * M_PIF)
return std::copysign(absAngle, angle);
float ret = absAngle - std::floor(absAngle / (2.f * M_PIF)) * (2.f * M_PIF);
return std::copysign(ret, angle);
}
CRelAngle() = default;
CRelAngle(float angle) : angle(MakeRelativeAngle(angle)) {}
CRelAngle& operator=(float ang) { angle = MakeRelativeAngle(ang); return *this; }
CRelAngle& operator=(const CRelAngle& ang) { angle = ang.angle; return *this; }
float asDegrees() const { return radToDeg(angle); }
float asRadians() const { return angle; }
float arcCosine() const { return std::acos(angle); }
CRelAngle() = default;
static CRelAngle FromDegrees(float angle)
{
CRelAngle ret;
ret.angle = MakeRelativeAngle(degToRad(angle));
return ret;
}
CRelAngle(float angle) : angle(MakeRelativeAngle(angle)) {}
operator float() const { return angle; }
static CRelAngle FromRadians(float angle) { return CRelAngle(angle); }
CRelAngle& operator=(float ang) {
angle = MakeRelativeAngle(ang);
return *this;
}
bool operator <(const CRelAngle& other) const { return angle < other.angle; }
CRelAngle& operator +=(const CRelAngle& other) { angle = MakeRelativeAngle(angle + other.angle); return *this; }
CRelAngle& operator +=(float r) { angle = MakeRelativeAngle(angle + r); return *this; }
CRelAngle& operator -=(const CRelAngle& other) { angle = MakeRelativeAngle(angle - other.angle); return *this; }
CRelAngle& operator -=(float r) { angle = MakeRelativeAngle(angle - r); return *this; }
CRelAngle& operator *=(const CRelAngle& other) { angle = MakeRelativeAngle(angle * other.angle); return *this; }
CRelAngle& operator *=(float r) { angle = MakeRelativeAngle(angle * r); return *this; }
CRelAngle& operator /=(const CRelAngle& other) { angle = MakeRelativeAngle(angle / other.angle); return *this; }
CRelAngle& operator /=(float r) { angle = MakeRelativeAngle(angle / r); return *this; }
CRelAngle& operator=(const CRelAngle& ang) {
angle = ang.angle;
return *this;
}
float asDegrees() const { return radToDeg(angle); }
float asRadians() const { return angle; }
float arcCosine() const { return std::acos(angle); }
static CRelAngle FromDegrees(float angle) {
CRelAngle ret;
ret.angle = MakeRelativeAngle(degToRad(angle));
return ret;
}
operator float() const { return angle; }
static CRelAngle FromRadians(float angle) { return CRelAngle(angle); }
bool operator<(const CRelAngle& other) const { return angle < other.angle; }
CRelAngle& operator+=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle + other.angle);
return *this;
}
CRelAngle& operator+=(float r) {
angle = MakeRelativeAngle(angle + r);
return *this;
}
CRelAngle& operator-=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle - other.angle);
return *this;
}
CRelAngle& operator-=(float r) {
angle = MakeRelativeAngle(angle - r);
return *this;
}
CRelAngle& operator*=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle * other.angle);
return *this;
}
CRelAngle& operator*=(float r) {
angle = MakeRelativeAngle(angle * r);
return *this;
}
CRelAngle& operator/=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle / other.angle);
return *this;
}
CRelAngle& operator/=(float r) {
angle = MakeRelativeAngle(angle / r);
return *this;
}
};
}

View File

@ -2,25 +2,20 @@
#include "zeus/CVector3f.hpp"
namespace zeus
{
class alignas(16) CSphere
{
namespace zeus {
class CSphere {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CSphere(const CVector3f& position, float radius) : position(position), radius(radius) {}
CSphere(const CVector3f& position, float radius) : position(position), radius(radius) {}
CVector3f getSurfaceNormal(const CVector3f& coord) const { return (coord - position).normalized(); }
inline CVector3f getSurfaceNormal(const CVector3f& coord) const { return (coord - position).normalized(); }
bool intersects(const CSphere& other) {
float dist = (position - other.position).magnitude();
return dist < (radius + other.radius);
}
inline bool intersects(const CSphere& other)
{
float dist = (position - other.position).magnitude();
return dist < (radius + other.radius);
}
CVector3f position;
float radius;
CVector3f position;
float radius;
};
}

View File

@ -8,273 +8,277 @@
#include <cstdint>
#include <cstdio>
namespace zeus
{
class alignas(16) CTransform
{
namespace zeus {
class CTransform {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CTransform() : basis(false) {}
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero)
: basis(basis), origin(offset) {}
CTransform() : basis(false) {}
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero) : basis(basis), origin(offset) {}
#if ZE_ATHENA_TYPES
CTransform(const atVec4f* mtx) : basis(mtx[0], mtx[1], mtx[2]), origin(mtx[0].vec[3], mtx[1].vec[3], mtx[2].vec[3]) {}
void read34RowMajor(athena::io::IStreamReader& r)
{
atVec4f r0 = r.readVec4fBig();
atVec4f r1 = r.readVec4fBig();
atVec4f r2 = r.readVec4fBig();
basis = CMatrix3f(r0, r1, r2);
basis.transpose();
origin = CVector3f(r0.vec[3], r1.vec[3], r2.vec[3]);
}
CTransform(const atVec4f* mtx)
: basis(mtx[0], mtx[1], mtx[2])
, origin(mtx[0].simd[3], mtx[1].simd[3], mtx[2].simd[3]) {}
void read34RowMajor(athena::io::IStreamReader& r) {
atVec4f r0 = r.readVec4fBig();
atVec4f r1 = r.readVec4fBig();
atVec4f r2 = r.readVec4fBig();
basis = CMatrix3f(r0, r1, r2);
basis.transpose();
origin = CVector3f(r0.simd[3], r1.simd[3], r2.simd[3]);
}
#endif
/* Column constructor */
CTransform(const CVector3f& c0, const CVector3f& c1, const CVector3f& c2, const CVector3f& c3)
: basis(c0, c1, c2), origin(c3) {}
/* Column constructor */
CTransform(const CVector3f& c0, const CVector3f& c1, const CVector3f& c2, const CVector3f& c3)
: basis(c0, c1, c2), origin(c3) {}
static inline CTransform Identity() { return CTransform(CMatrix3f::skIdentityMatrix3f); }
static CTransform Identity() {
return CTransform(CMatrix3f::skIdentityMatrix3f);
}
inline bool operator ==(const CTransform& other) const
{
return origin == other.origin && basis == other.basis;
}
bool operator==(const CTransform& other) const {
return origin == other.origin && basis == other.basis;
}
inline CTransform operator*(const CTransform& rhs) const
{
return CTransform(basis * rhs.basis, origin + (basis * rhs.origin));
}
CTransform operator*(const CTransform& rhs) const {
return CTransform(basis * rhs.basis, origin + (basis * rhs.origin));
}
inline CTransform inverse() const
{
CMatrix3f inv = basis.inverted();
return CTransform(inv, inv * -origin);
}
CTransform inverse() const {
CMatrix3f inv = basis.inverted();
return CTransform(inv, inv * -origin);
}
static inline CTransform Translate(const CVector3f& position) { return {CMatrix3f::skIdentityMatrix3f, position}; }
static CTransform Translate(const CVector3f& position) {
return {CMatrix3f::skIdentityMatrix3f, position};
}
static inline CTransform Translate(float x, float y, float z) { return Translate({x, y, z}); }
static CTransform Translate(float x, float y, float z) {
return Translate({x, y, z});
}
inline CTransform operator+(const CVector3f& other) { return CTransform(basis, origin + other); }
CTransform operator+(const CVector3f& other) {
return CTransform(basis, origin + other);
}
inline CTransform& operator+=(const CVector3f& other)
{
origin += other;
return *this;
}
CTransform& operator+=(const CVector3f& other) {
origin += other;
return *this;
}
inline CTransform operator-(const CVector3f& other) { return CTransform(basis, origin - other); }
CTransform operator-(const CVector3f& other) {
return CTransform(basis, origin - other);
}
inline CTransform& operator-=(const CVector3f& other)
{
origin -= other;
return *this;
}
CTransform& operator-=(const CVector3f& other) {
origin -= other;
return *this;
}
inline zeus::CVector3f rotate(const CVector3f& vec) const { return basis * vec; }
zeus::CVector3f rotate(const CVector3f& vec) const {
return basis * vec;
}
static inline CTransform RotateX(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{1.f, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, cosT, sinT, 0.f}},
TVectorUnion{{0.f, -sinT, cosT, 0.f}}));
}
static CTransform RotateX(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
simd<float>{0.f, cosT, sinT, 0.f},
simd<float>{0.f, -sinT, cosT, 0.f}));
}
static inline CTransform RotateY(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{cosT, 0.f, -sinT, 0.f}},
TVectorUnion{{0.f, 1.f, 0.f, 0.f}},
TVectorUnion{{sinT, 0.f, cosT, 0.f}}));
}
static CTransform RotateY(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
simd<float>{0.f, 1.f, 0.f, 0.f},
simd<float>{sinT, 0.f, cosT, 0.f}));
}
static inline CTransform RotateZ(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{cosT, sinT, 0.f, 0.f}},
TVectorUnion{{-sinT, cosT, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, 1.f, 0.f}}));
}
static CTransform RotateZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
simd<float>{-sinT, cosT, 0.f, 0.f},
simd<float>{0.f, 0.f, 1.f, 0.f}));
}
inline void rotateLocalX(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
void rotateLocalX(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
zeus::CVector3f b2 = basis[2] * sinT;
zeus::CVector3f b1 = basis[1] * sinT;
zeus::CVector3f cosV(cosT);
zeus::CVector3f b2 = basis[2] * sinT;
zeus::CVector3f b1 = basis[1] * sinT;
zeus::CVector3f cosV(cosT);
basis[1] *= cosV;
basis[2] *= cosV;
basis[1] *= cosV;
basis[2] *= cosV;
basis[1] += b2;
basis[2] -= b1;
}
basis[1] += b2;
basis[2] -= b1;
}
inline void rotateLocalY(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
void rotateLocalY(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
zeus::CVector3f b0 = basis[0] * sinT;
zeus::CVector3f b2 = basis[2] * sinT;
zeus::CVector3f cosV(cosT);
zeus::CVector3f b0 = basis[0] * sinT;
zeus::CVector3f b2 = basis[2] * sinT;
zeus::CVector3f cosV(cosT);
basis[0] *= cosV;
basis[2] *= cosV;
basis[0] *= cosV;
basis[2] *= cosV;
basis[2] += b0;
basis[0] -= b2;
}
basis[2] += b0;
basis[0] -= b2;
}
inline void rotateLocalZ(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
void rotateLocalZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
zeus::CVector3f b0 = basis[0] * sinT;
zeus::CVector3f b1 = basis[1] * sinT;
zeus::CVector3f cosV(cosT);
zeus::CVector3f b0 = basis[0] * sinT;
zeus::CVector3f b1 = basis[1] * sinT;
zeus::CVector3f cosV(cosT);
basis[0] *= cosV;
basis[1] *= cosV;
basis[0] *= cosV;
basis[1] *= cosV;
basis[0] += b1;
basis[1] -= b0;
}
basis[0] += b1;
basis[1] -= b0;
}
inline CVector3f transposeRotate(const CVector3f& in) const
{
return CVector3f(basis[0].dot(in), basis[1].dot(in), basis[2].dot(in));
}
CVector3f transposeRotate(const CVector3f& in) const {
return CVector3f(basis[0].dot(in), basis[1].dot(in), basis[2].dot(in));
}
inline void scaleBy(float factor)
{
CTransform xfrm(CMatrix3f(CVector3f(factor, factor, factor)));
*this = *this * xfrm;
}
void scaleBy(float factor) {
CTransform xfrm(CMatrix3f(CVector3f(factor, factor, factor)));
*this = *this * xfrm;
}
static inline CTransform Scale(const CVector3f& factor)
{
return CTransform(CMatrix3f(TVectorUnion{{factor.x, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, factor.y, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, factor.z, 0.f}}));
}
static CTransform Scale(const CVector3f& factor) {
return CTransform(CMatrix3f(simd<float>{factor.x(), 0.f, 0.f, 0.f},
simd<float>{0.f, factor.y(), 0.f, 0.f},
simd<float>{0.f, 0.f, factor.z(), 0.f}));
}
static inline CTransform Scale(float x, float y, float z)
{
return CTransform(
CMatrix3f(TVectorUnion{{x, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, y, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, z, 0.f}}));
}
static CTransform Scale(float x, float y, float z) {
return CTransform(CMatrix3f(simd<float>{x, 0.f, 0.f, 0.f},
simd<float>{0.f, y, 0.f, 0.f},
simd<float>{0.f, 0.f, z, 0.f}));
}
static inline CTransform Scale(float factor)
{
return CTransform(CMatrix3f(TVectorUnion{{factor, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, factor, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, factor, 0.f}}));
}
static CTransform Scale(float factor) {
return CTransform(CMatrix3f(simd<float>{factor, 0.f, 0.f, 0.f},
simd<float>{0.f, factor, 0.f, 0.f},
simd<float>{0.f, 0.f, factor, 0.f}));
}
inline CTransform multiplyIgnoreTranslation(const CTransform& xfrm) const
{
CTransform ret;
ret.basis = basis * xfrm.basis;
return ret;
}
CTransform multiplyIgnoreTranslation(const CTransform& xfrm) const {
CTransform ret;
ret.basis = basis * xfrm.basis;
return ret;
}
inline CTransform getRotation() const
{
CTransform ret = *this;
ret.origin.zeroOut();
return ret;
}
void setRotation(const CMatrix3f& mat) { basis = mat; }
void setRotation(const CTransform& xfrm) { setRotation(xfrm.basis); }
CTransform getRotation() const {
CTransform ret = *this;
ret.origin.zeroOut();
return ret;
}
/**
* @brief buildMatrix3f Returns the stored matrix
* buildMatrix3f is here for compliance with Retro's Math API
* @return The Matrix (Neo, you are the one)
*/
inline const CMatrix3f& buildMatrix3f() const { return basis; }
void setRotation(const CMatrix3f& mat) {
basis = mat;
}
inline CVector3f operator*(const CVector3f& other) const { return origin + basis * other; }
void setRotation(const CTransform& xfrm) {
setRotation(xfrm.basis);
}
inline CMatrix4f toMatrix4f() const
{
CMatrix4f ret(basis[0], basis[1], basis[2], origin);
ret[0][3] = 0.0f;
ret[1][3] = 0.0f;
ret[2][3] = 0.0f;
ret[3][3] = 1.0f;
return ret;
}
/**
* @brief buildMatrix3f Returns the stored matrix
* buildMatrix3f is here for compliance with Retro's Math API
* @return The Matrix (Neo, you are the one)
*/
const CMatrix3f& buildMatrix3f() const {
return basis;
}
inline CVector3f upVector() const
{
return basis.vec[2];
}
CVector3f operator*(const CVector3f& other) const {
return origin + basis * other;
}
inline CVector3f frontVector() const
{
return basis.vec[1];
}
CMatrix4f toMatrix4f() const {
CMatrix4f ret(basis[0], basis[1], basis[2], origin);
ret[0][3] = 0.0f;
ret[1][3] = 0.0f;
ret[2][3] = 0.0f;
ret[3][3] = 1.0f;
return ret;
}
inline CVector3f rightVector() const
{
return basis.vec[0];
}
CVector3f upVector() const {
return basis.m[2];
}
inline void orthonormalize()
{
basis[0].normalize();
basis[2] = basis[0].cross(basis[1]);
basis[2].normalize();
basis[1] = basis[2].cross(basis[0]);
}
CVector3f frontVector() const {
return basis.m[1];
}
void printMatrix() const
{
printf("%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n",
basis[0][0], basis[1][0], basis[2][0], origin[0],
basis[0][1], basis[1][1], basis[2][1], origin[1],
basis[0][2], basis[1][2], basis[2][2], origin[2],
0.f, 0.f, 0.f, 1.f);
}
CVector3f rightVector() const {
return basis.m[0];
}
static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec)
{
uint32_t i;
if (uVec.y < uVec.x || uVec.z < uVec.y || uVec.z < uVec.x)
i = 2;
else
i = 1;
void orthonormalize() {
basis[0].normalize();
basis[2] = basis[0].cross(basis[1]);
basis[2].normalize();
basis[1] = basis[2].cross(basis[0]);
}
CVector3f v = CVector3f::skZero;
v[i] = 1.f;
CUnitVector3f newUVec(uVec.cross(v));
return {newUVec, uVec, uVec.cross(newUVec), CVector3f::skZero};
}
void printMatrix() const {
printf("%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n",
basis[0][0], basis[1][0], basis[2][0], origin[0],
basis[0][1], basis[1][1], basis[2][1], origin[1],
basis[0][2], basis[1][2], basis[2][2], origin[2],
0.f, 0.f, 0.f, 1.f);
}
CMatrix3f basis;
CVector3f origin;
static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec) {
uint32_t i;
if (uVec.y() < uVec.x() || uVec.z() < uVec.y() || uVec.z() < uVec.x())
i = 2;
else
i = 1;
CVector3f v = CVector3f::skZero;
v[i] = 1.f;
CUnitVector3f newUVec(uVec.cross(v));
return {newUVec, uVec, uVec.cross(newUVec), CVector3f::skZero};
}
CMatrix3f basis;
CVector3f origin;
};
static inline CTransform CTransformFromScaleVector(const CVector3f& scale) { return CTransform(CMatrix3f(scale)); }
static inline CTransform CTransformFromScaleVector(const CVector3f& scale) {
return CTransform(CMatrix3f(scale));
}
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec);
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin);
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle);
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up = CVector3f::skUp);
}

View File

@ -2,23 +2,19 @@
#include "zeus/CVector3f.hpp"
namespace zeus
{
class alignas(16) CUnitVector3f : public CVector3f
{
namespace zeus {
class CUnitVector3f : public CVector3f {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CUnitVector3f() : CVector3f(0.f, 1.f, 0.f) {}
CUnitVector3f() : CVector3f(0, 1, 0) {}
CUnitVector3f(float x, float y, float z, bool doNormalize = true) : CVector3f(x, y, z)
{
if (doNormalize && canBeNormalized())
normalize();
}
CUnitVector3f(const CVector3f& vec, bool doNormalize = true) : CVector3f(vec)
{
if (doNormalize && canBeNormalized())
normalize();
}
CUnitVector3f(float x, float y, float z, bool doNormalize = true) : CVector3f(x, y, z) {
if (doNormalize && canBeNormalized())
normalize();
}
CUnitVector3f(const CVector3f& vec, bool doNormalize = true) : CVector3f(vec) {
if (doNormalize && canBeNormalized())
normalize();
}
};
}

View File

@ -2,431 +2,260 @@
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
#include "zeus/Math.hpp"
#include <cassert>
namespace zeus
{
class alignas(16) CVector2f
{
#if __atdna__
float clangVec __attribute__((__vector_size__(8)));
#endif
namespace zeus {
class CVector2f {
public:
// ZE_DECLARE_ALIGNED_ALLOCATOR();
union {
struct
{
float x, y;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
simd<float> mSimd;
CVector2f() : mSimd(0.f) {}
template <typename T>
CVector2f(const simd<T>& s) : mSimd(s) {}
inline CVector2f() { zeroOut(); }
#if __SSE__
CVector2f(const __m128& mVec128) : mVec128(mVec128)
{
v[2] = 0.0f;
v[3] = 0.0f;
}
#endif
#if ZE_ATHENA_TYPES
CVector2f(const atVec2f& vec)
#if __SSE__
: mVec128(vec.mVec128)
{
}
#else
{
x = vec.vec[0], y = vec.vec[1], v[2] = 0.0f, v[3] = 0.0f;
}
CVector2f(const atVec2f& vec) : mSimd(vec.simd) {}
operator atVec2f&() {
return *reinterpret_cast<atVec2f*>(this);
}
operator const atVec2f&() const {
return *reinterpret_cast<const atVec2f*>(this);
}
void readBig(athena::io::IStreamReader& input) {
mSimd[0] = input.readFloatBig();
mSimd[1] = input.readFloatBig();
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
static CVector2f ReadBig(athena::io::IStreamReader& input) {
CVector2f ret;
ret.readBig(input);
return ret;
}
#endif
operator atVec2f&()
{
return *reinterpret_cast<atVec2f*>(v);
}
operator const atVec2f&() const
{
return *reinterpret_cast<const atVec2f*>(v);
}
explicit CVector2f(float xy) { splat(xy); }
void readBig(athena::io::IStreamReader& input)
{
x = input.readFloatBig();
y = input.readFloatBig();
v[2] = 0.0f;
v[3] = 0.0f;
}
void assign(float x, float y) {
mSimd[0] = x;
mSimd[1] = y;
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
static CVector2f ReadBig(athena::io::IStreamReader& input)
{
CVector2f ret;
ret.readBig(input);
return ret;
}
#endif
CVector2f(float x, float y) { assign(x, y); }
explicit CVector2f(float xy) { splat(xy); }
void assign(float x, float y)
{
v[0] = x;
v[1] = y;
v[2] = 0.0f;
v[3] = 0.0f;
}
CVector2f(float x, float y) { assign(x, y); }
bool operator==(const CVector2f& rhs) const {
return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1];
}
inline bool operator==(const CVector2f& rhs) const { return (x == rhs.x && y == rhs.y); }
inline bool operator!=(const CVector2f& rhs) const { return !(*this == rhs); }
inline bool operator<(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x < rhs.x || y < rhs.y);
#endif
}
inline bool operator<=(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x <= rhs.x || y <= rhs.y);
#endif
}
inline bool operator>(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x > rhs.x || y > rhs.y);
#endif
}
inline bool operator>=(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x >= rhs.x || y >= rhs.y);
#endif
}
bool operator!=(const CVector2f& rhs) const {
return mSimd[0] != rhs.mSimd[0] || mSimd[1] != rhs.mSimd[1];
}
inline CVector2f operator+(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x + rhs.x, y + rhs.y);
#endif
}
inline CVector2f operator-(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x - rhs.x, y - rhs.y);
#endif
}
inline CVector2f operator-() const
{
#if __SSE__
return CVector2f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#else
return CVector2f(-x, -y);
#endif
}
inline CVector2f operator*(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x * rhs.x, y * rhs.y);
#endif
}
inline CVector2f operator/(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x / rhs.x, y / rhs.y);
#endif
}
inline CVector2f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector2f(x + val, y + val);
#endif
}
inline CVector2f operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CVector2f(x - val, y - val);
#endif
}
inline CVector2f operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x * val, y * val);
#endif
}
inline CVector2f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__
TVectorUnion splat = {{ooval, ooval, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x * ooval, y * ooval);
#endif
}
inline const CVector2f& operator+=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
#endif
return *this;
}
inline const CVector2f& operator-=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
#endif
return *this;
}
inline const CVector2f& operator*=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
#endif
return *this;
}
inline const CVector2f& operator/=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
#endif
return *this;
}
inline const CVector2f& operator+=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_add_ps(mVec128, splat.mVec128);
#else
x += rhs;
y += rhs;
#endif
return *this;
}
inline const CVector2f& operator-=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_sub_ps(mVec128, splat.mVec128);
#else
x -= rhs;
y -= rhs;
#endif
return *this;
}
inline const CVector2f& operator*=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x *= rhs;
y *= rhs;
#endif
return *this;
}
inline const CVector2f& operator/=(float rhs)
{
float oorhs = 1.f / rhs;
#if __SSE__
TVectorUnion splat = {{oorhs, oorhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x *= oorhs;
y *= oorhs;
#endif
return *this;
}
inline void normalize()
{
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector2f(mag);
}
bool operator<(const CVector2f& rhs) const {
return mSimd[0] < rhs.mSimd[0] && mSimd[1] < rhs.mSimd[1];
}
inline CVector2f normalized() const
{
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
bool operator<=(const CVector2f& rhs) const {
return mSimd[0] <= rhs.mSimd[0] && mSimd[1] <= rhs.mSimd[1];
}
inline CVector2f perpendicularVector() const { return {-y, x}; }
bool operator>(const CVector2f& rhs) const {
return mSimd[0] > rhs.mSimd[0] && mSimd[1] > rhs.mSimd[1];
}
inline float cross(const CVector2f& rhs) const { return (x * rhs.y) - (y * rhs.x); }
inline float dot(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1];
#endif
#else
return (x * rhs.x) + (y * rhs.y);
#endif
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1];
#endif
#else
return x * x + y * y;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
bool operator>=(const CVector2f& rhs) const {
return mSimd[0] >= rhs.mSimd[0] && mSimd[1] >= rhs.mSimd[1];
}
inline void zeroOut()
{
*this = CVector2f::skZero;
}
CVector2f operator+(const CVector2f& rhs) const {
return mSimd + rhs.mSimd;
}
inline void splat(float xy)
{
#if __SSE__
TVectorUnion splat = {{xy, xy, 0.0f, 0.0f}};
mVec128 = splat.mVec128;
#else
v[0] = xy;
v[1] = xy;
v[2] = 0.0f;
v[3] = 0.0f;
#endif
}
CVector2f operator-(const CVector2f& rhs) const {
return mSimd - rhs.mSimd;
}
static float getAngleDiff(const CVector2f& a, const CVector2f& b);
CVector2f operator-() const {
return -mSimd;
}
static inline CVector2f lerp(const CVector2f& a, const CVector2f& b, float t) { return (a + (b - a) * t); }
static inline CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) { return lerp(a, b, t).normalized(); }
static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
CVector2f operator*(const CVector2f& rhs) const {
return mSimd * rhs.mSimd;
}
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
CVector2f operator/(const CVector2f& rhs) const {
return mSimd / rhs.mSimd;
}
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON;
}
CVector2f operator+(float val) const {
return mSimd + simd<float>(val);
}
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; }
CVector2f operator-(float val) const {
return mSimd - simd<float>(val);
}
inline bool isEqu(const CVector2f& other, float epsilon = 1.1920929e-7f)
{
const CVector2f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon);
}
CVector2f operator*(float val) const {
return mSimd * simd<float>(val);
}
inline float& operator[](size_t idx) { assert(idx < 2); return (&x)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 2); return (&x)[idx]; }
CVector2f operator/(float val) const {
float ooval = 1.f / val;
return mSimd * simd<float>(ooval);
}
static const CVector2f skOne;
static const CVector2f skNegOne;
static const CVector2f skZero;
const CVector2f& operator+=(const CVector2f& rhs) {
mSimd += rhs.mSimd;
return *this;
}
const CVector2f& operator-=(const CVector2f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CVector2f& operator*=(const CVector2f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CVector2f& operator/=(const CVector2f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
const CVector2f& operator+=(float rhs) {
mSimd += simd<float>(rhs);
return *this;
}
const CVector2f& operator-=(float rhs) {
mSimd -= simd<float>(rhs);
return *this;
}
const CVector2f& operator*=(float rhs) {
mSimd *= simd<float>(rhs);
return *this;
}
const CVector2f& operator/=(float rhs) {
float oorhs = 1.f / rhs;
mSimd /= simd<float>(oorhs);
return *this;
}
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector2f(mag);
}
CVector2f normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
CVector2f perpendicularVector() const { return {-y(), x()}; }
float cross(const CVector2f& rhs) const { return (x() * rhs.y()) - (y() * rhs.x()); }
float dot(const CVector2f& rhs) const {
return mSimd.dot2(rhs.mSimd);
}
float magSquared() const {
return mSimd.dot2(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
void zeroOut() {
*this = CVector2f::skZero;
}
void splat(float xy) {
mSimd = zeus::simd<float>(xy);
}
static float getAngleDiff(const CVector2f& a, const CVector2f& b);
static CVector2f lerp(const CVector2f& a, const CVector2f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) {
return lerp(a, b, t).normalized();
}
static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON;
}
bool isZero() const {
return magSquared() <= FLT_EPSILON;
}
bool isEqu(const CVector2f& other, float epsilon = FLT_EPSILON) {
const CVector2f diffVec = other - *this;
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon);
}
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 2);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 2);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
static const CVector2f skOne;
static const CVector2f skNegOne;
static const CVector2f skZero;
};
static inline CVector2f operator+(float lhs, const CVector2f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs + rhs.x, lhs + rhs.y);
#endif
static inline CVector2f operator+(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
}
static inline CVector2f operator-(float lhs, const CVector2f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs - rhs.x, lhs - rhs.y);
#endif
static inline CVector2f operator-(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
}
static inline CVector2f operator*(float lhs, const CVector2f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs * rhs.x, lhs * rhs.y);
#endif
static inline CVector2f operator*(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
}
static inline CVector2f operator/(float lhs, const CVector2f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs / rhs.x, lhs / rhs.y);
#endif
static inline CVector2f operator/(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
}
}

View File

@ -5,56 +5,57 @@
#include "CVector2f.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
namespace zeus
{
namespace zeus {
class CVector2i
{
class CVector2i {
public:
union {
struct
{
int x, y;
};
int v[2];
union {
struct {
int x, y;
};
CVector2i() = default;
CVector2i(int xin, int yin) : x(xin), y(yin) {}
CVector2i(const CVector2f& vec) : x(int(vec.x)), y(int(vec.y)) {}
int v[2];
};
CVector2f toVec2f() const { return CVector2f(x, y); }
CVector2i() = default;
inline CVector2i operator+(const CVector2i& val) const
{
return CVector2i(x + val.x, y + val.y);
}
inline CVector2i operator-(const CVector2i& val) const
{
return CVector2i(x - val.x, y - val.y);
}
inline CVector2i operator*(const CVector2i& val) const
{
return CVector2i(x * val.x, y * val.y);
}
inline CVector2i operator/(const CVector2i& val) const
{
return CVector2i(x / val.x, y / val.y);
}
inline bool operator==(const CVector2i& other) const
{
return x == other.x && y == other.y;
}
inline bool operator!=(const CVector2i& other) const
{
return x != other.x || y != other.y;
}
inline CVector2i operator*(int val) const
{
return CVector2i(x * val, y * val);
}
CVector2i(int xin, int yin) : x(xin), y(yin) {}
CVector2i(const CVector2f& vec) : x(int(vec.x())), y(int(vec.y())) {}
CVector2f toVec2f() const { return CVector2f(x, y); }
CVector2i operator+(const CVector2i& val) const {
return CVector2i(x + val.x, y + val.y);
}
CVector2i operator-(const CVector2i& val) const {
return CVector2i(x - val.x, y - val.y);
}
CVector2i operator*(const CVector2i& val) const {
return CVector2i(x * val.x, y * val.y);
}
CVector2i operator/(const CVector2i& val) const {
return CVector2i(x / val.x, y / val.y);
}
bool operator==(const CVector2i& other) const {
return x == other.x && y == other.y;
}
bool operator!=(const CVector2i& other) const {
return x != other.x || y != other.y;
}
CVector2i operator*(int val) const {
return CVector2i(x * val, y * val);
}
};
}

View File

@ -1,288 +1,118 @@
#pragma once
#include <athena/Types.hpp>
#include "athena/Types.hpp"
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
{
class alignas(32) CVector3d
{
namespace zeus {
class CVector3d {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR32();
CVector3d() { zeroOut(); }
zeus::simd<double> mSimd;
CVector3d() : mSimd(0.0) {}
template <typename T>
CVector3d(const simd<T>& s) : mSimd(s) {}
#if __AVX__
CVector3d(const __m256d& mVec256)
{
this->mVec256 = mVec256;
v[3] = 0.0;
}
#elif __SSE__
CVector3d(const __m128d mVec128[2])
{
this->mVec128[0] = mVec128[0];
this->mVec128[1] = mVec128[1];
v[3] = 0.0;
}
#endif
#if ZE_ATHENA_TYPES
CVector3d(const atVec3d& vec)
{
#if __AVX__
mVec256 = vec.mVec256;
#elif __SSE__
mVec128[0] = vec.mVec128[0];
mVec128[1] = vec.mVec128[1];
#else
x = v[0], y = v[1], z = v[2], v[3] = 0.0f;
#endif
}
CVector3d(const atVec3d& vec) : mSimd(vec.simd) {}
#endif
explicit CVector3d(double xyz) { splat(xyz); }
explicit CVector3d(double xyz) : mSimd(xyz) {}
CVector3d(const CVector3f& vec)
{
#if __AVX__
mVec256 = _mm256_cvtps_pd(vec.mVec128);
#elif __SSE__
mVec128[0] = _mm_cvtps_pd(vec.mVec128);
v[2] = vec[2];
#else
v[0] = vec[0];
v[1] = vec[1];
v[2] = vec[2];
v[3] = 0.0;
#endif
}
CVector3d(const CVector3f& vec) : mSimd(vec.mSimd) {}
CVector3d(double x, double y, double z)
{
#if __AVX__
TDblVectorUnion splat{{x, y, z, 0.0}};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat{{x, y, z, 0.0}};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
#else
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = 0.0;
#endif
}
CVector3d(double x, double y, double z) : mSimd(x, y, z) {}
CVector3f asCVector3f()
{
#if __AVX__
return CVector3f(_mm256_cvtpd_ps(mVec256));
#else
return CVector3f(float(x), float(y), float(z));
#endif
}
CVector3f asCVector3f() {
return mSimd;
}
double magSquared() const
{
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
return result.v[0] + (v[2] * v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
}
double magSquared() const {
return mSimd.dot3(mSimd);
}
double magnitude() const { return sqrt(magSquared()); }
inline CVector3d cross(const CVector3d& rhs) const
{
return {y * rhs.z - z * rhs.y,
z * rhs.x - x * rhs.z,
x * rhs.y - y * rhs.x};
}
double magnitude() const {
return sqrt(magSquared());
}
double dot(const CVector3d& rhs) const
{
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
return result.v[0] + (v[2] * rhs.v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
}
CVector3d cross(const CVector3d& rhs) const {
return {y() * rhs.z() - z() * rhs.y(),
z() * rhs.x() - x() * rhs.z(),
x() * rhs.y() - y() * rhs.x()};
}
CVector3d asNormalized()
{
double mag = magnitude();
mag = 1.0 / mag;
return {x * mag, y * mag, z * mag};
}
double dot(const CVector3d& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
void splat(double xyz)
{
#if __AVX__
TDblVectorUnion splat = {{xyz, xyz, xyz, 0.0}};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat = {{xyz, xyz, xyz, 0.0}};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = 0.0;
#endif
}
CVector3d asNormalized() {
double mag = magnitude();
mag = 1.0 / mag;
return mSimd * zeus::simd<double>(mag);
}
void zeroOut()
{
*this = skZero;
}
void splat(double xyz) {
mSimd = zeus::simd<double>(xyz);
}
inline CVector3d operator+(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_add_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]),
_mm_add_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x + rhs.x, y + rhs.y, z + rhs.z);
#endif
}
inline CVector3d operator-(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_sub_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_sub_pd(mVec128[0], rhs.mVec128[0]),
_mm_sub_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x - rhs.x, y - rhs.y, z - rhs.z);
#endif
}
inline CVector3d operator*(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_mul_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_mul_pd(mVec128[0], rhs.mVec128[0]),
_mm_mul_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x * rhs.x, y * rhs.y, z * rhs.z);
#endif
}
inline CVector3d operator/(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_div_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_div_pd(mVec128[0], rhs.mVec128[0]),
_mm_div_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x / rhs.x, y / rhs.y, z / rhs.z);
#endif
}
void zeroOut() {
*this = skZero;
}
inline double& operator[](size_t idx) { assert(idx < 3); return v[idx]; }
inline const double& operator[](size_t idx) const { assert(idx < 3); return v[idx]; }
CVector3d operator+(const CVector3d& rhs) const {
return mSimd + rhs.mSimd;
}
union {
struct
{
double x, y, z;
};
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif
};
CVector3d operator-(const CVector3d& rhs) const {
return mSimd - rhs.mSimd;
}
static const CVector3d skZero;
CVector3d operator*(const CVector3d& rhs) const {
return mSimd * rhs.mSimd;
}
CVector3d operator/(const CVector3d& rhs) const {
return mSimd / rhs.mSimd;
}
zeus::simd<double>::reference operator[](size_t idx) {
assert(idx < 3);
return mSimd[idx];
}
double operator[](size_t idx) const {
assert(idx < 3);
return mSimd[idx];
}
double x() const { return mSimd[0]; }
double y() const { return mSimd[1]; }
double z() const { return mSimd[2]; }
simd<double>::reference x() { return mSimd[0]; }
simd<double>::reference y() { return mSimd[1]; }
simd<double>::reference z() { return mSimd[2]; }
static const CVector3d skZero;
};
static inline CVector3d operator+(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_add_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_add_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_add_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs + rhs.x, lhs + rhs.y, lhs + rhs.z};
#endif
static inline CVector3d operator+(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) + rhs.mSimd;
}
static inline CVector3d operator-(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_sub_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_sub_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_sub_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs - rhs.x, lhs - rhs.y, lhs - rhs.z};
#endif
static inline CVector3d operator-(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) - rhs.mSimd;
}
static inline CVector3d operator*(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_mul_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_mul_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_mul_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs * rhs.x, lhs * rhs.y, lhs * rhs.z};
#endif
static inline CVector3d operator*(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) * rhs.mSimd;
}
static inline CVector3d operator/(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) / rhs.mSimd;
}
static inline CVector3d operator/(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_div_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_div_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_div_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs.x / rhs.x, lhs.y / rhs.y, lhs.z / rhs.z};
#endif
}
}

View File

@ -3,440 +3,278 @@
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "zeus/CVector2f.hpp"
#include "TVectorUnion.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#include "athena/IStreamReader.hpp"
#endif
namespace zeus
{
namespace zeus {
class CVector3d;
class alignas(16) CVector3f
{
#if __atdna__
float clangVec __attribute__((__vector_size__(12)));
#endif
class CVector3f {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
zeus::simd<float> mSimd;
CVector3f() : mSimd(0.f) {}
union {
struct
{
float x, y, z;
};
float v[4];
#if __SSE__
__m128 mVec128;
#elif __GEKKO_PS__
ps128_t mVec128;
#endif
};
inline CVector3f() { zeroOut(); }
#if __SSE__ || __GEKKO_PS__
CVector3f(const __m128& mVec128) : mVec128(mVec128) { v[3] = 0.0f; }
#endif
template <typename T>
CVector3f(const simd<T>& s) : mSimd(s) {}
#if ZE_ATHENA_TYPES
CVector3f(const atVec3f& vec)
#if __SSE__ || __GEKKO_PS__
: mVec128(vec.mVec128)
{
}
#else
{
x = vec.vec[0], y = vec.vec[1], z = vec.vec[2], v[3] = 0.0f;
}
CVector3f(const atVec3f& vec) : mSimd(vec.simd) {}
operator atVec3f&() {
return *reinterpret_cast<atVec3f*>(this);
}
operator const atVec3f&() const {
return *reinterpret_cast<const atVec3f*>(this);
}
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = 0.0f;
mSimd.copy_from(f);
}
static CVector3f ReadBig(athena::io::IStreamReader& input) {
CVector3f ret;
ret.readBig(input);
return ret;
}
#endif
operator atVec3f&()
{
return *reinterpret_cast<atVec3f*>(v);
}
operator const atVec3f&() const
{
return *reinterpret_cast<const atVec3f*>(v);
CVector3f(const CVector3d& vec);
explicit CVector3f(float xyz) : mSimd(xyz) {}
void assign(float x, float y, float z) {
mSimd = zeus::simd<float>(x, y, z);
}
CVector3f(float x, float y, float z) : mSimd(x, y, z) {}
CVector3f(const float* floats) : mSimd(floats[0], floats[1], floats[2]) {}
CVector3f(const CVector2f& other) {
mSimd = other.mSimd;
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
CVector2f toVec2f() const {
return CVector2f(mSimd);
}
bool operator==(const CVector3f& rhs) const {
return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1] && mSimd[2] == rhs.mSimd[2];
}
bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); }
CVector3f operator+(const CVector3f& rhs) const {
return mSimd + rhs.mSimd;
}
CVector3f operator-(const CVector3f& rhs) const {
return mSimd - rhs.mSimd;
}
CVector3f operator-() const {
return -mSimd;
}
CVector3f operator*(const CVector3f& rhs) const {
return mSimd * rhs.mSimd;
}
CVector3f operator/(const CVector3f& rhs) const {
return mSimd / rhs.mSimd;
}
CVector3f operator+(float val) const {
return mSimd + zeus::simd<float>(val);
}
CVector3f operator-(float val) const {
return mSimd - zeus::simd<float>(val);
}
CVector3f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
}
CVector3f operator/(float val) const {
float ooval = 1.f / val;
return mSimd * zeus::simd<float>(ooval);
}
const CVector3f& operator+=(const CVector3f& rhs) {
mSimd += rhs.mSimd;
return *this;
}
const CVector3f& operator-=(const CVector3f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CVector3f& operator*=(const CVector3f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CVector3f& operator/=(const CVector3f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
void normalize() {
float mag = 1.f / magnitude();
*this *= CVector3f(mag);
}
CVector3f normalized() const {
float mag = 1.f / magnitude();
return *this * mag;
}
CVector3f cross(const CVector3f& rhs) const {
return CVector3f(y() * rhs.z() - z() * rhs.y(),
z() * rhs.x() - x() * rhs.z(),
x() * rhs.y() - y() * rhs.x());
}
float dot(const CVector3f& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
float magSquared() const {
return mSimd.dot3(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
bool isNotInf() const {
return !(std::isinf(x()) || std::isinf(y()) || std::isinf(z()));
}
bool isMagnitudeSafe() const {
return isNotInf() && magSquared() >= 9.9999994e-29;
}
void zeroOut() {
*this = CVector3f::skZero;
}
void splat(float xyz) {
mSimd = zeus::simd<float>(xyz);
}
static float getAngleDiff(const CVector3f& a, const CVector3f& b);
static CVector3f lerp(const CVector3f& a, const CVector3f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) {
return lerp(a, b, t).normalized();
}
static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON || std::fabs(z()) >= FLT_EPSILON;
}
bool isZero() const {
return magSquared() <= FLT_EPSILON;
}
void scaleToLength(float newLength) {
float length = magSquared();
if (length < FLT_EPSILON) {
mSimd[0] = newLength, mSimd[1] = 0.f, mSimd[2] = 0.f;
return;
}
void readBig(athena::io::IStreamReader& input)
{
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
v[3] = 0.0f;
}
length = std::sqrt(length);
float scalar = newLength / length;
*this *= CVector3f(scalar);
}
static CVector3f ReadBig(athena::io::IStreamReader& input)
{
CVector3f ret;
ret.readBig(input);
return ret;
}
#endif
CVector3f scaledToLength(float newLength) const {
CVector3f v = *this;
v.scaleToLength(newLength);
return v;
}
CVector3f(const CVector3d& vec);
bool isEqu(const CVector3f& other, float epsilon = FLT_EPSILON) {
const CVector3f diffVec = other - *this;
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon && diffVec.z() <= epsilon);
}
explicit CVector3f(float xyz) { splat(xyz); }
void assign(float x, float y, float z)
{
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = 0.0f;
}
CVector3f(float x, float y, float z) { assign(x, y, z); }
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 3);
return mSimd[idx];
}
CVector3f(const float* floats)
{
#if __SSE__
mVec128 = _mm_loadu_ps(floats);
#else
x = floats[0];
y = floats[1];
z = floats[2];
#endif
v[3] = 0.0f;
}
float operator[](size_t idx) const {
assert(idx < 3);
return mSimd[idx];
}
CVector3f(const CVector2f& other)
{
x = other.x;
y = other.y;
z = 0.0f;
v[3] = 0.0f;
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
inline CVector2f toVec2f() const
{
#if __SSE__
return CVector2f(mVec128);
#else
return CVector2f(x, y);
#endif
}
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
inline bool operator==(const CVector3f& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); }
inline bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); }
inline CVector3f operator+(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_add_ps(mVec128, rhs.mVec128));
#elif __GEKKO_PS__
return CVector3f(__mm_gekko_add_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x + rhs.x, y + rhs.y, z + rhs.z);
#endif
}
inline CVector3f operator-(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x - rhs.x, y - rhs.y, z - rhs.z);
#endif
}
inline CVector3f operator-() const
{
#if __SSE__
return CVector3f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_neg_ps(mVec128));
#else
return CVector3f(-x, -y, -z);
#endif
}
inline CVector3f operator*(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x * rhs.x, y * rhs.y, z * rhs.z);
#endif
}
inline CVector3f operator/(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x / rhs.x, y / rhs.y, z / rhs.z);
#endif
}
inline CVector3f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, 0.0f}};
return CVector3f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector3f(x + val, y + val, z + val);
#endif
}
inline CVector3f operator-(float val) const
{
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{val, val, val, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_sub_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_sub_ps(mVec128, splat.mVec128));
#else
return CVector3f(x - val, y - val, z - val);
#endif
}
inline CVector3f operator*(float val) const
{
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{val, val, val, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x * val, y * val, z * val);
#endif
}
inline CVector3f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{ooval, ooval, ooval, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x * ooval, y * ooval, z * ooval);
#endif
}
inline const CVector3f& operator+=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#elif __GEKKO_PS__
mVec128 = _mm_gekko_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
z += rhs.z;
#endif
return *this;
}
inline const CVector3f& operator-=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
z -= rhs.z;
#endif
return *this;
}
inline const CVector3f& operator*=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
#endif
return *this;
}
inline const CVector3f& operator/=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
z /= rhs.z;
#endif
return *this;
}
static const CVector3f skOne;
static const CVector3f skNegOne;
static const CVector3f skZero;
static const CVector3f skForward;
static const CVector3f skBack;
static const CVector3f skLeft;
static const CVector3f skRight;
static const CVector3f skUp;
static const CVector3f skDown;
static const CVector3f skRadToDegVec;
static const CVector3f skDegToRadVec;
inline void normalize()
{
float mag = 1.f / magnitude();
*this *= CVector3f(mag);
}
inline CVector3f normalized() const
{
float mag = 1.f / magnitude();
return *this * mag;
}
inline CVector3f cross(const CVector3f& rhs) const
{
return CVector3f(y * rhs.z - z * rhs.y,
z * rhs.x - x * rhs.z,
x * rhs.y - y * rhs.x);
}
static CVector3f radToDeg(const CVector3f& rad) { return rad * skRadToDegVec; }
inline float dot(const CVector3f& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
inline bool isNotInf() const
{
return !(std::isinf(x) || std::isinf(y) || std::isinf(z));
}
inline bool isMagnitudeSafe() const
{
return isNotInf() && magSquared() >= 9.9999994e-29;
}
inline void zeroOut()
{
*this = CVector3f::skZero;
}
inline void splat(float xyz)
{
#if __SSE__
TVectorUnion splat = {{xyz, xyz, xyz, 0.0f}};
mVec128 = splat.mVec128;
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = 0.0f;
#endif
}
static float getAngleDiff(const CVector3f& a, const CVector3f& b);
static inline CVector3f lerp(const CVector3f& a, const CVector3f& b, float t) { return (a + (b - a) * t); }
static inline CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) { return lerp(a, b, t).normalized(); }
static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y) || std::isinf(z))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON;
}
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; }
inline void scaleToLength(float newLength)
{
float length = magSquared();
if (length < 1.1920929e-7f)
{
x = newLength, y = 0.f, z = 0.f;
return;
}
length = std::sqrt(length);
float scalar = newLength / length;
*this *= CVector3f(scalar);
}
inline CVector3f scaledToLength(float newLength) const
{
CVector3f v = *this;
v.scaleToLength(newLength);
return v;
}
inline bool isEqu(const CVector3f& other, float epsilon = 1.1920929e-7f)
{
const CVector3f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon && diffVec.z <= epsilon);
}
inline float& operator[](size_t idx) { assert(idx < 3); return (&x)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 3); return (&x)[idx]; }
static const CVector3f skOne;
static const CVector3f skNegOne;
static const CVector3f skZero;
static const CVector3f skForward;
static const CVector3f skBack;
static const CVector3f skLeft;
static const CVector3f skRight;
static const CVector3f skUp;
static const CVector3f skDown;
static const CVector3f skRadToDegVec;
static const CVector3f skDegToRadVec;
static CVector3f radToDeg(const CVector3f& rad) { return rad * skRadToDegVec; }
static CVector3f degToRad(const CVector3f& deg) { return deg * skDegToRadVec; }
static CVector3f degToRad(const CVector3f& deg) { return deg * skDegToRadVec; }
};
static inline CVector3f operator+(float lhs, const CVector3f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z);
#endif
static inline CVector3f operator+(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
}
static inline CVector3f operator-(float lhs, const CVector3f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z);
#endif
static inline CVector3f operator-(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
}
static inline CVector3f operator*(float lhs, const CVector3f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
#endif
static inline CVector3f operator*(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
}
static inline CVector3f operator/(float lhs, const CVector3f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z);
#endif
static inline CVector3f operator/(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
}
}

View File

@ -1,420 +1,261 @@
#pragma once
#include "Global.hpp"
#include "TVectorUnion.hpp"
#include "zeus/CVector3f.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#include "athena/IStreamReader.hpp"
#endif
#include "zeus/Math.hpp"
#include <cfloat>
#include <cassert>
namespace zeus
{
namespace zeus {
class CColor;
class alignas(16) CVector4f
{
#if __atdna__
float clangVec __attribute__((__vector_size__(16)));
#endif
class CVector4f {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
union {
struct
{
float x, y, z, w;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
zeus::simd<float> mSimd;
CVector4f() : mSimd(0.f) {}
template <typename T>
CVector4f(const simd<T>& s) : mSimd(s) {}
inline CVector4f() { zeroOut(); }
#if __SSE__
CVector4f(const __m128& mVec128) : mVec128(mVec128) {}
#endif
#if ZE_ATHENA_TYPES
CVector4f(const atVec4f& vec)
#if __SSE__
: mVec128(vec.mVec128)
{
}
#else
{
x = vec.vec[0], y = vec.vec[1], z = vec.vec[2], w = vec.vec[3];
}
CVector4f(const atVec4f& vec) : mSimd(vec.simd) {}
operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
}
operator const atVec4f&() const {
return *reinterpret_cast<const atVec4f*>(this);
}
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = input.readFloatBig();
mSimd.copy_from(f);
}
#endif
operator atVec4f&()
{
return *reinterpret_cast<atVec4f*>(v);
}
operator const atVec4f&() const
{
return *reinterpret_cast<const atVec4f*>(v);
}
explicit CVector4f(float xyzw) : mSimd(xyzw) {}
void readBig(athena::io::IStreamReader& input)
{
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
w = input.readFloatBig();
}
#endif
void assign(float x, float y, float z, float w) {
mSimd = simd<float>(x, y, z, w);
}
explicit CVector4f(float xyzw) { splat(xyzw); }
void assign(float x, float y, float z, float w)
{
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = w;
}
CVector4f(float x, float y, float z, float w) { assign(x, y, z, w); }
CVector4f(const CColor& other);
CVector4f(float x, float y, float z, float w) : mSimd(x, y, z, w) {}
CVector4f(const CVector3f& other, float wIn = 1.f)
{
#if __SSE__
mVec128 = other.mVec128;
#else
x = other.x;
y = other.y;
z = other.z;
#endif
w = wIn;
}
CVector4f(const CColor& other);
static CVector4f ToClip(const zeus::CVector3f& v, float w)
{
return CVector4f(v * w, w);
}
CVector4f(const CVector3f& other, float wIn = 1.f) : mSimd(other.mSimd) {
mSimd[3] = wIn;
}
inline CVector3f toVec3f() const
{
#if __SSE__
return CVector3f(mVec128);
#else
return CVector3f(x, y, z);
#endif
}
static CVector4f ToClip(const zeus::CVector3f& v, float w) {
return CVector4f(v * w, w);
}
CVector4f& operator=(const CColor& other);
inline bool operator==(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpeq_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
#else
return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w);
#endif
}
inline bool operator!=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpneq_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
#else
return !(*this == rhs);
#endif
}
inline bool operator<(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x < rhs.x || y < rhs.y || z < rhs.z || w < rhs.w);
#endif
}
inline bool operator<=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x <= rhs.x || y <= rhs.y || z <= rhs.z || w <= rhs.w);
#endif
}
inline bool operator>(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x > rhs.x || y > rhs.y || z > rhs.z || w > rhs.w);
#endif
}
inline bool operator>=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x >= rhs.x || y >= rhs.y || z >= rhs.z || w >= rhs.w);
#endif
}
inline CVector4f operator+(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w);
#endif
}
inline CVector4f operator-(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w);
#endif
}
inline CVector4f operator-() const
{
#if __SSE__
return CVector4f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#else
return CVector4f(-x, -y, -z, -w);
#endif
}
inline CVector4f operator*(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w);
#endif
}
inline CVector4f operator/(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w);
#endif
}
inline CVector4f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector4f(x + val, y + val, z + val, w + val);
#endif
}
inline CVector4f operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CVector4f(x - val, y - val, z - val, w - val);
#endif
}
inline CVector4f operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x * val, y * val, z * val, w * val);
#endif
}
inline CVector4f operator/(float val) const
{
float ooval = 1.f / val;
#if __SSE__
TVectorUnion splat = {{ooval, ooval, ooval, ooval}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x * ooval, y * ooval, z * ooval, w * ooval);
#endif
}
inline const CVector4f& operator+=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
z += rhs.z;
w += rhs.w;
#endif
return *this;
}
inline const CVector4f& operator-=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
z -= rhs.z;
w -= rhs.w;
#endif
return *this;
}
inline const CVector4f& operator*=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
w *= rhs.w;
#endif
return *this;
}
inline const CVector4f& operator/=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
z /= rhs.z;
w /= rhs.w;
#endif
return *this;
}
inline void normalize()
{
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector4f(mag);
}
inline CVector4f normalized() const
{
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
CVector3f toVec3f() const {
return CVector3f(mSimd);
}
inline float dot(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z + w * w;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
CVector4f& operator=(const CColor& other);
inline void zeroOut()
{
*this = CVector4f::skZero;
}
bool operator==(const CVector4f& rhs) const {
auto eq_mask = mSimd == rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline void splat(float xyzw)
{
#if __SSE__
TVectorUnion splat = {{xyzw, xyzw, xyzw, xyzw}};
mVec128 = splat.mVec128;
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = xyzw;
#endif
}
bool operator!=(const CVector4f& rhs) const {
auto eq_mask = mSimd != rhs.mSimd;
return eq_mask[0] || eq_mask[1] || eq_mask[2] || eq_mask[3];
}
static inline CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) { return (a + (b - a) * t); }
static inline CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) { return lerp(a, b, t).normalized(); }
bool operator<(const CVector4f& rhs) const {
auto eq_mask = mSimd < rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
bool operator<=(const CVector4f& rhs) const {
auto eq_mask = mSimd <= rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y) || std::isinf(z) || std::isinf(w))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON || std::fabs(w) >= FLT_EPSILON;
}
bool operator>(const CVector4f& rhs) const {
auto eq_mask = mSimd > rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool isEqu(const CVector4f& other, float epsilon = 1.1920929e-7f)
{
const CVector4f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon && diffVec.z <= epsilon && diffVec.w <= epsilon);
}
bool operator>=(const CVector4f& rhs) const {
auto eq_mask = mSimd >= rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline float& operator[](size_t idx) { assert(idx < 4); return (&x)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&x)[idx]; }
CVector4f operator+(const CVector4f& rhs) const {
return mSimd + rhs.mSimd;
}
static const CVector4f skOne;
static const CVector4f skNegOne;
static const CVector4f skZero;
CVector4f operator-(const CVector4f& rhs) const {
return mSimd - rhs.mSimd;
}
CVector4f operator-() const {
return -mSimd;
}
CVector4f operator*(const CVector4f& rhs) const {
return mSimd * rhs.mSimd;
}
CVector4f operator/(const CVector4f& rhs) const {
return mSimd / rhs.mSimd;
}
CVector4f operator+(float val) const {
return mSimd + zeus::simd<float>(val);
}
CVector4f operator-(float val) const {
return mSimd - zeus::simd<float>(val);
}
CVector4f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
}
CVector4f operator/(float val) const {
float ooval = 1.f / val;
return mSimd * zeus::simd<float>(ooval);
}
const CVector4f& operator+=(const CVector4f& rhs) {
mSimd += rhs.mSimd;
return *this;
}
const CVector4f& operator-=(const CVector4f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CVector4f& operator*=(const CVector4f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CVector4f& operator/=(const CVector4f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector4f(mag);
}
CVector4f normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
float dot(const CVector4f& rhs) const {
return mSimd.dot4(rhs.mSimd);
}
float magSquared() const {
return mSimd.dot4(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
void zeroOut() {
*this = CVector4f::skZero;
}
void splat(float xyzw) {
mSimd = zeus::simd<float>(xyzw);
}
static CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) {
return lerp(a, b, t).normalized();
}
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()) || std::isinf(w()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON ||
std::fabs(z()) >= FLT_EPSILON || std::fabs(w()) >= FLT_EPSILON;
}
bool isEqu(const CVector4f& other, float epsilon = FLT_EPSILON) {
const CVector4f diffVec = other - *this;
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon &&
diffVec.z() <= epsilon && diffVec.w() <= epsilon);
}
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
float w() const { return mSimd[3]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
simd<float>::reference w() { return mSimd[3]; }
static const CVector4f skOne;
static const CVector4f skNegOne;
static const CVector4f skZero;
};
static inline CVector4f operator+(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z, lhs + rhs.w);
#endif
static CVector4f operator+(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
}
static inline CVector4f operator-(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z, lhs - rhs.w);
#endif
static CVector4f operator-(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
}
static inline CVector4f operator*(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z, lhs * rhs.w);
#endif
static CVector4f operator*(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
}
static CVector4f operator/(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
}
static inline CVector4f operator/(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z, lhs / rhs.w);
#endif
}
}

View File

@ -1,61 +1,19 @@
#pragma once
#if _M_IX86_FP >= 1 || _M_X64
#define __SSE__ 1
#endif
#if __SSE__
#include <immintrin.h>
#ifndef _MSC_VER
#include <mm_malloc.h>
#endif
#define zeAlloc(sz, align) _mm_malloc(sz, align)
#define zeFree(ptr) _mm_free(ptr)
#elif GEKKO
#include <ps_intrins.h>
#define zeAlloc(sz, align) _ps_malloc(sz, align)
#define zeFree(ptr) _ps_free(ptr)
#endif
#if __SSE__ || __GEKKO_PS__
#define ZE_DECLARE_ALIGNED_ALLOCATOR() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 16); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 16); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#if ZE_ATHENA_TYPES
#include "athena/IStreamReader.hpp"
#include "athena/simd/simd.hpp"
#else
#define ZE_DECLARE_ALIGNED_ALLOCATOR() void __unused__()
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() void __unused__()
#include "simd/simd.hpp"
#endif
#if __SSE__
#define ZE_SHUFFLE(x, y, z, w) ((w) << 6 | (z) << 4 | (y) << 2 | (x))
#define ze_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask))
#define ze_splat3_ps(_a, _i) ze_pshufd_ps((_a), ZE_SHUFFLE(_i, _i, _i, 3))
#define ze_splat_ps(_a, _i) ze_pshufd_ps((_a), ZE_SHUFFLE(_i, _i, _i, _i))
#if _WIN32
#define zeCastiTo128f(a) (_mm_castsi128_ps(a))
#else
#define zeCastiTo128f(a) ((__m128)(a))
#endif
#elif __GEKKO_PS__
namespace zeus {
#if ZE_ATHENA_TYPES
template<typename T> using simd = athena::simd<T>;
using simd_floats = athena::simd_floats;
using simd_doubles = athena::simd_doubles;
#endif
}
inline int rotr(int x, int n) { return ((x >> n) | (x << (32 - n))); }
inline int rotl(int x, int n) { return ((x << n) | (x >> (32 - n))); }

View File

@ -1,6 +1,7 @@
#pragma once
#include <cfloat>
#undef min
#undef max
@ -26,8 +27,7 @@
#include <cmath>
#include <algorithm>
namespace zeus
{
namespace zeus {
#if _MSC_VER
#if defined(_M_IX86)
@ -43,115 +43,131 @@ namespace zeus
#endif
#endif
struct CPUInfo
{
const char cpuBrand[48] = {0};
const char cpuVendor[32] = {0};
struct CPUInfo {
const char cpuBrand[48] = {0};
const char cpuVendor[32] = {0};
#if ZEUS_ARCH_X86_64 || ZEUS_ARCH_X86
const bool isIntel = false;
const bool SSE1 = false;
const bool SSE2 = false;
const bool SSE3 = false;
const bool SSSE3 = false;
const bool SSE41 = false;
const bool SSE42 = false;
const bool SSE4a = false;
const bool AESNI = false;
const bool AVX = false;
const bool AVX2 = false;
const bool isIntel = false;
const bool SSE1 = false;
const bool SSE2 = false;
const bool SSE3 = false;
const bool SSSE3 = false;
const bool SSE41 = false;
const bool SSE42 = false;
const bool SSE4a = false;
const bool AESNI = false;
const bool AVX = false;
const bool AVX2 = false;
#endif
};
/**
* Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
*/
void detectCPU();
const CPUInfo& cpuFeatures();
std::pair<bool, const CPUInfo&> validateCPU();
void getCpuInfo(int eax, int regs[4]);
void getCpuInfoEx(int eax, int ecx, int regs[4]);
class CVector3f;
class CVector2f;
class CTransform;
template <typename T>
inline constexpr T min(const T& a, const T& b)
{
return a < b ? a : b;
template<typename T>
inline constexpr T min(const T& a, const T& b) {
return a < b ? a : b;
}
template <typename T>
inline constexpr T max(const T& a, const T& b)
{
return a > b ? a : b;
}
template <> CVector3f min(const CVector3f& a, const CVector3f& b);
template <> CVector3f max(const CVector3f& a, const CVector3f& b);
template <typename T>
inline constexpr T clamp(const T& a, const T& val, const T& b)
{
return max<T>(a, min<T>(b, val));
template<typename T>
inline constexpr T max(const T& a, const T& b) {
return a > b ? a : b;
}
template<>
CVector3f min(const CVector3f& a, const CVector3f& b);
template<>
CVector3f max(const CVector3f& a, const CVector3f& b);
template<typename T>
inline constexpr T clamp(const T& a, const T& val, const T& b) {
return max<T>(a, min<T>(b, val));
}
inline constexpr float radToDeg(float rad) { return rad * (180.f / M_PIF); }
inline constexpr float degToRad(float deg) { return deg * (M_PIF / 180.f); }
inline constexpr double radToDeg(double rad) { return rad * (180.0 / M_PI); }
inline constexpr double degToRad(double deg) { return deg * (M_PI / 180.0); }
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary);
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t);
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f
getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d,
float t);
// Since round(double) doesn't exist in some <cmath> implementations
// we'll define our own
inline double round(double val) { return (val < 0.0 ? std::ceil(val - 0.5) : std::ceil(val + 0.5)); }
inline double powD(float a, float b) { return std::exp(b * std::log(a)); }
inline double invSqrtD(double val) { return 1.0 / std::sqrt(val); }
inline float invSqrtF(float val) { return float(1.0 / std::sqrt(val)); }
int floorPowerOfTwo(int x);
int ceilingPowerOfTwo(int x);
template <typename U>
typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x)
{
template<typename U>
typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x) {
#if __GNUC__ >= 4
return __builtin_popcountll(x);
return __builtin_popcountll(x);
#else
const U m1 = U(0x5555555555555555); // binary: 0101...
const U m2 = U(0x3333333333333333); // binary: 00110011..
const U m4 = U(0x0f0f0f0f0f0f0f0f); // binary: 4 zeros, 4 ones ...
const U h01 = U(0x0101010101010101); // the sum of 256 to the power of 0,1,2,3...
const U m1 = U(0x5555555555555555); // binary: 0101...
const U m2 = U(0x3333333333333333); // binary: 00110011..
const U m4 = U(0x0f0f0f0f0f0f0f0f); // binary: 4 zeros, 4 ones ...
const U h01 = U(0x0101010101010101); // the sum of 256 to the power of 0,1,2,3...
x -= (x >> 1) & m1; // put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> 2) & m2); // put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; // put count of each 8 bits into those 8 bits
return (x * h01) >> ((sizeof(U) - 1) * 8); // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
x -= (x >> 1) & m1; // put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> 2) & m2); // put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; // put count of each 8 bits into those 8 bits
return (x * h01) >> ((sizeof(U) - 1) * 8); // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
#endif
}
template <typename E>
typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e)
{
return PopCount(static_cast<typename std::underlying_type<E>::type>(e));
template<typename E>
typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e) {
return PopCount(static_cast<typename std::underlying_type<E>::type>(e));
}
bool close_enough(const CVector3f &a, const CVector3f &b, float epsilon = 0.000099999997f);
bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon = 0.000099999997f);
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon = 0.000099999997f);
inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378752)
{
return std::fabs(a - b) < epsilon;
inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378752) {
return std::fabs(a - b) < epsilon;
}
inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752)
{
return std::fabs(a - b) < epsilon;
inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752) {
return std::fabs(a - b) < epsilon;
}
}

View File

@ -1,22 +0,0 @@
#pragma once
namespace zeus
{
typedef union {
float v[4];
#if __SSE__
__m128 mVec128;
#endif
} TVectorUnion;
typedef union {
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif
} TDblVectorUnion;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
#pragma once
#define _ZEUS_SIMD_INCLUDED
namespace zeus::_simd { using namespace std; }
#include "parallelism_v2_simd.hpp"
#if _M_IX86_FP >= 1 || _M_X64
#define __SSE__ 1
#endif
#if __AVX__
#include "simd_avx.hpp"
#elif __SSE__
#include "simd_sse.hpp"
#else
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = fixed_size<4>; };
template<> struct zeus_native<double> { using type = fixed_size<4>; };
}
#endif
namespace zeus {
template<typename T> using simd = _simd::simd<T,
typename _simd::simd_abi::zeus_native<T>::type>;
template<typename T>
using simd_values = _simd::simd_data<simd<T>>;
using simd_floats = simd_values<float>;
using simd_doubles = simd_values<double>;
}

View File

@ -0,0 +1,188 @@
#pragma once
#ifndef _ZEUS_SIMD_INCLUDED
#error simd_avx.hpp must not be included directly. Include simd.hpp instead.
#endif
#include "simd_sse.hpp"
#include <immintrin.h>
namespace zeus::_simd {
// __m256d storage for AVX
template<>
class __simd_storage<double, m256d_abi> {
public:
using storage_type = __m256d;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, double __val) noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm256_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_ = _mm256_set_pd(d, c, b, a);
}
void __broadcast(double __val) noexcept {
__storage_ = _mm256_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
}
double __dot3(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
}
double __dot4(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
}
void __copy_from(const simd_data<simd<double, m256d_abi>>& __buffer) noexcept {
__storage_ = _mm256_load_pd(__buffer.data());
}
void __copy_to(simd_data<simd<double, m256d_abi>>& __buffer) const noexcept {
_mm256_store_pd(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_ = _mm256_cvtps_pd(other.__storage_);
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m256d mask storage for AVX
template<>
class __simd_mask_storage<double, m256d_abi> : public __simd_storage<double, m256d_abi> {
public:
bool __get(size_t __index) const noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT64_MAX : 0;
__storage_ = _mm256_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m256d_abi> simd<double, m256d_abi>::operator-() const {
return _mm256_xor_pd(__s_.__storage_, _mm256_set1_pd(-0.0));
}
inline simd<double, m256d_abi>
operator+(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator-(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator*(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator/(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>&
operator+=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator-=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator*=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator/=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>::mask_type
operator==(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_EQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator!=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_NEQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GT_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LT_OQ);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m256d_abi>& other) {
__storage_ = _mm256_cvtpd_ps(other.__storage_);
}
namespace simd_abi {
template<> struct zeus_native<double> { using type = m256d_abi; };
} // namespace simd_abi
} // namespace zeus::_simd

View File

@ -0,0 +1,455 @@
#pragma once
#ifndef _ZEUS_SIMD_INCLUDED
#error simd_sse.hpp must not be included directly. Include simd.hpp instead.
#endif
#include <xmmintrin.h>
#if __SSE4_1__
#include <smmintrin.h>
#endif
namespace zeus::_simd {
// __m128 ABI
using m128_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 1), 4>;
// __m128d ABI
using m128d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 2), 4>;
#ifdef __AVX__
// __m256d ABI
using m256d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 3), 4>;
#endif
template <>
class __simd_storage<double, m128d_abi>;
#ifdef __AVX__
template <>
class __simd_storage<double, m256d_abi>;
#endif
// __m128 storage for SSE2+
template <>
class __simd_storage<float, m128_abi> {
public:
using storage_type = __m128;
storage_type __storage_;
float __get(size_t __index) const noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, float __val) noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm_load_ps(sse_data.data());
}
void __set4(float a, float b, float c, float d) noexcept {
__storage_ = _mm_set_ps(d, c, b, a);
}
void __broadcast(float __val) noexcept {
__storage_ = _mm_set1_ps(__val);
}
float __dot2(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x3F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
#endif
}
float __dot3(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x7F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
#endif
}
float __dot4(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0xFF));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
#endif
}
template<int x, int y, int z, int w>
__simd_storage __shuffle() const noexcept {
__simd_storage s;
s.__storage_ = _mm_shuffle_ps(__storage_, __storage_, _MM_SHUFFLE(w, z, y, x));
return s;
}
void __copy_from(const simd_data<simd<float, m128_abi>>& __buffer) noexcept {
__storage_ = _mm_load_ps(__buffer.data());
}
void __copy_to(simd_data<simd<float, m128_abi>>& __buffer) const noexcept {
_mm_store_ps(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<double, m128d_abi>& other);
#ifdef __AVX__
explicit __simd_storage(const __simd_storage<double, m256d_abi>& other);
#endif
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128 mask storage for SSE2+
template <>
class __simd_mask_storage<float, m128_abi> : public __simd_storage<float, m128_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT32_MAX : 0;
__storage_ = _mm_load_ps(reinterpret_cast<float*>(sse_data));
}
};
template <>
inline simd<float, m128_abi> simd<float, m128_abi>::operator-() const {
return _mm_xor_ps(__s_.__storage_, _mm_set1_ps(-0.f));
}
inline simd<float, m128_abi>
operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator-(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator*(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator/(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>&
operator+=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator-=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator*=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator/=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>::mask_type
operator==(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpeq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator!=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpneq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpge_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmple_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpgt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmplt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
// __m128d storage for SSE2+
template <>
class __simd_storage<double, m128d_abi> {
public:
using storage_type = std::array<__m128d, 2>;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
return sse_data[__index % 2];
}
void __set(size_t __index, double __val) noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
sse_data[__index % 2] = __val;
__storage_[__index / 2] = _mm_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_[0] = _mm_set_pd(b, a);
__storage_[1] = _mm_set_pd(d, c);
}
void __broadcast(double __val) noexcept {
for (int i = 0; i < 2; ++i)
__storage_[i] = _mm_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
return ret;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
return sse_data[0] + sse_data[1];
#endif
}
double __dot3(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return ret + sse_data2[0];
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0];
#endif
}
double __dot4(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
double ret2;
_mm_store_sd(&ret2, _mm_dp_pd(__storage_[1], other.__storage_[1], 0x3F));
return ret + ret2;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0] + sse_data2[1];
#endif
}
void __copy_from(const simd_data<simd<double, m128d_abi>>& __buffer) noexcept {
__storage_[0] = _mm_load_pd(__buffer.data());
__storage_[1] = _mm_load_pd(__buffer.data() + 2);
}
void __copy_to(simd_data<simd<double, m128d_abi>>& __buffer) const noexcept {
_mm_store_pd(__buffer.data(), __storage_[0]);
_mm_store_pd(__buffer.data() + 2, __storage_[1]);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_[0] = _mm_cvtps_pd(other.__storage_);
__storage_[1] = _mm_cvtps_pd(_mm_movehl_ps(other.__storage_, other.__storage_));
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128d mask storage for SSE2+
template <>
class __simd_mask_storage<double, m128d_abi> : public __simd_storage<double, m128d_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
sse_data[__index % 2] = __val ? UINT64_MAX : 0;
__storage_[__index / 2] = _mm_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m128d_abi> simd<double, m128d_abi>::operator-() const {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_xor_pd(__s_.__storage_[i], _mm_set1_pd(-0.0));
return ret;
}
inline simd<double, m128d_abi>
operator+(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator-(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator*(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator/(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>&
operator+=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator-=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator*=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator/=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>::mask_type
operator==(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpeq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator!=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpneq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpge_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmple_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpgt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmplt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m128d_abi>& other) {
__storage_ = _mm_movelh_ps(_mm_cvtpd_ps(other.__storage_[0]), _mm_cvtpd_ps(other.__storage_[1]));
}
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = m128_abi; };
#ifndef __AVX__
template<> struct zeus_native<double> { using type = m128d_abi; };
#endif
} // namespace simd_abi
} // namespace zeus::_simd

View File

@ -3,7 +3,6 @@
namespace zeus
{
const CAABox CAABox::skInvertedBox = CAABox();
const CAABox CAABox::skNullBox = CAABox(CVector3f::skZero, CVector3f::skZero);
}

View File

@ -1,8 +1,7 @@
#include "zeus/CColor.hpp"
#include "zeus/CVector4f.hpp"
namespace zeus
{
namespace zeus {
const CColor CColor::skRed(Comp32(0xFF0000FFul));
const CColor CColor::skBlack(Comp32(0x000000FFul));
const CColor CColor::skBlue(Comp32(0x0000FFFFul));
@ -14,133 +13,106 @@ const CColor CColor::skYellow(Comp32(0xFFFF00FFul));
const CColor CColor::skWhite(Comp32(0xFFFFFFFFul));
const CColor CColor::skClear(Comp32(0x00000000ul));
float hueToRgb(float p, float q, float t)
{
if (t < 0.0f)
t += 1.0f;
if (t > 1.0f)
t -= 1.0f;
if (t < 1.f / 6.f)
return p + (q - p) * 6.f * t;
if (t < 1.f / 2.f)
return q;
if (t < 2.f / 3.f)
return p + (q - p) * (2.f / 3.f - t) * 6.f;
return p;
float hueToRgb(float p, float q, float t) {
if (t < 0.0f)
t += 1.0f;
if (t > 1.0f)
t -= 1.0f;
if (t < 1.f / 6.f)
return p + (q - p) * 6.f * t;
if (t < 1.f / 2.f)
return q;
if (t < 2.f / 3.f)
return p + (q - p) * (2.f / 3.f - t) * 6.f;
return p;
}
CColor::CColor(const CVector4f& other)
{
r = other.x;
g = other.y;
b = other.z;
a = other.w;
void CColor::fromHSV(float h, float s, float v, float _a) {
int i = int(h * 6.f);
float f = h * 6.f - i;
float p = v * (1.f - s);
float q = v * (1.f - f * s);
float t = v * (1.f - (1.f - f) * s);
simd_floats fo;
switch (i % 6) {
case 0:
fo[0] = v, fo[1] = t, fo[2] = p;
break;
case 1:
fo[0] = q, fo[1] = v, fo[2] = p;
break;
case 2:
fo[0] = p, fo[1] = v, fo[2] = t;
break;
case 3:
fo[0] = p, fo[1] = q, fo[2] = v;
break;
case 4:
fo[0] = t, fo[1] = p, fo[2] = v;
break;
case 5:
fo[0] = v, fo[1] = p, fo[2] = q;
break;
default:
break;
}
fo[3] = _a;
mSimd.copy_from(fo);
}
CColor& CColor::operator=(const CVector4f& other)
{
r = other.x;
g = other.y;
b = other.z;
a = other.w;
void CColor::toHSV(float& h, float& s, float& v) const {
float min = std::min(r(), std::min(g(), b()));
float max = std::max(r(), std::max(g(), b()));
v = max;
return *this;
float delta = max - min;
s = max == 0.f ? 0.f : delta / max;
if (max == min)
h = 0.f;
else {
if (max == r())
h = (g() - b()) / delta + (g() < b() ? 6.f : 0.f);
else if (max == g())
h = (b() - r()) / delta + 2.f;
else if (max == b())
h = (r() - g()) / delta + 4.f;
h /= 6.f;
}
}
void CColor::fromHSV(float h, float s, float v, float _a)
{
int i = int(h * 6);
float f = h * 6 - i;
float p = v * (1 - s);
float q = v * (1 - f * s);
float t = v * (1 - (1 - f) * s);
float _r, _g, _b;
switch (i % 6)
{
case 0:
_r = v, _g = t, _b = p;
break;
case 1:
_r = q, _g = v, _b = p;
break;
case 2:
_r = p, _g = v, _b = t;
break;
case 3:
_r = p, _g = q, _b = v;
break;
case 4:
_r = t, _g = p, _b = v;
break;
case 5:
_r = v, _g = p, _b = q;
break;
}
r = _r;
g = _g;
b = _b;
a = _a;
void CColor::fromHSL(float h, float s, float l, float _a) {
if (s == 0.0f) {
mSimd = simd<float>(l);
} else {
const float q = l < 0.5f ? l * (1.f + s) : l + s - 1.f * s;
const float p = 2.f * l - q;
r() = hueToRgb(p, q, h + 1.f / 3.f);
g() = hueToRgb(p, q, h);
b() = hueToRgb(p, q, h - 1.f / 3.f);
}
a() = _a;
}
void CColor::toHSV(float& h, float& s, float& v) const
{
float min = std::min(r, std::min(g, b));
float max = std::max(r, std::max(g, b));
v = max;
void CColor::toHSL(float& h, float& s, float& l) const {
const float min = std::min(r(), std::min(g(), b()));
const float max = std::max(r(), std::max(g(), b()));
const float d = max - min;
float delta = max - min;
s = max == 0 ? 0 : delta / max;
if (max == min)
h = s = 0.f;
else {
s = l > 0.5f ? d / (2.f - max - min) : d / (max + min);
if (max == r())
h = (g() - b()) / d + (g() < b() ? 6.f : 0.f);
else if (max == g())
h = (b() - r()) / d + 2.f;
else if (max == b())
h = (r() - g()) / d + 4.f;
if (max == min)
h = 0;
else
{
if (max == r)
h = (g - b) / delta + (g < b ? 6 : 0);
else if (max == g)
h = (b - r) / delta + 2;
else if (max == b)
h = (r - g) / delta + 4;
h /= 6;
}
}
void CColor::fromHSL(float h, float s, float l, float _a)
{
if (s == 0.0f)
r = g = b = l;
else
{
const float q = l < 0.5f ? l * (1.f + s) : l + s - 1.f * s;
const float p = 2 * l - q;
r = hueToRgb(p, q, h + 1.f / 3);
g = hueToRgb(p, q, h);
b = hueToRgb(p, q, h - 1.f / 3);
}
a = _a;
}
void CColor::toHSL(float& h, float& s, float& l)
{
const float min = std::min(r, std::min(g, b));
const float max = std::max(r, std::max(g, b));
const float d = max - min;
if (max == min)
h = s = 0;
else
{
s = l > 0.5f ? d / (2.f - max - min) : d / (max + min);
if (max == r)
h = (g - b) / d + (g < b ? 6.f : 0.f);
else if (max == g)
h = (b - r) / d + 2.f;
else if (max == b)
h = (r - g) / d + 4.f;
h /= 6;
}
h /= 6.f;
}
}
}

View File

@ -10,29 +10,29 @@ CEulerAngles::CEulerAngles(const CQuaternion& quat)
float t0 = 0.f;
if (quatDot > 0.f)
t0 = 2.f / quatDot;
double t1 = 1.0 - (t0 * quat.x * quat.x + t0 * quat.z * quat.z);
double t2 = t0 * quat.y * quat.x - t0 * quat.z * quat.w;
double t1 = 1.0 - (t0 * quat.x() * quat.x() + t0 * quat.z() * quat.z());
double t2 = t0 * quat.y() * quat.x() - t0 * quat.z() * quat.w();
double t3 = t1 * t1 + t2 * t2;
double t4 = 0.0;
if (t3 > 0.0)
t4 = std::sqrt(t3);
double t5 = t0 * quat.z * quat.y + t0 * quat.x * quat.w;
double t5 = t0 * quat.z() * quat.y() + t0 * quat.x() * quat.w();
if (std::abs(t4) > 0.00001)
{
x = -std::atan2(-t5, t4);
y = -std::atan2(t0 * quat.z * quat.x - t0 * quat.y * quat.w,
1.0 - (t0 * quat.x * quat.x + t0 * quat.y * quat.y));
z = -std::atan2(t2, t1);
x() = -std::atan2(-t5, t4);
y() = -std::atan2(t0 * quat.z() * quat.x() - t0 * quat.y() * quat.w(),
1.0 - (t0 * quat.x() * quat.x() + t0 * quat.y() * quat.y()));
z() = -std::atan2(t2, t1);
}
else
{
x = -std::atan2(-t5, t4);
y = -std::atan2(-(t0 * quat.z * quat.x + t0 * quat.y * quat.w),
1.0 - (t0 * quat.y * quat.y + t0 * quat.z * quat.z));
z = 0.f;
x() = -std::atan2(-t5, t4);
y() = -std::atan2(-(t0 * quat.z() * quat.x() + t0 * quat.y() * quat.w()),
1.0 - (t0 * quat.y() * quat.y() + t0 * quat.z() * quat.z()));
z() = 0.f;
}
}
@ -58,15 +58,15 @@ CEulerAngles::CEulerAngles(const CTransform& xf)
if (std::fabs(f1) >= 0.00001)
{
x = -std::atan2(-xf.basis[1][2], f1);
y = -std::atan2(xf.basis[0][2], xf.basis[2][2]);
z = -std::atan2(xf.basis[1][0], xf.basis[1][1]);
x() = -std::atan2(-xf.basis[1][2], f1);
y() = -std::atan2(xf.basis[0][2], xf.basis[2][2]);
z() = -std::atan2(xf.basis[1][0], xf.basis[1][1]);
}
else
{
x = -std::atan2(-xf.basis[1][2], f1);
y = -std::atan2(-xf.basis[2][0], xf.basis[0][0]);
z = 0.f;
x() = -std::atan2(-xf.basis[1][2], f1);
y() = -std::atan2(-xf.basis[2][0], xf.basis[0][0]);
z() = 0.f;
}
}

View File

@ -1,138 +1,88 @@
#include "zeus/CFrustum.hpp"
namespace zeus
{
namespace zeus {
void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection)
{
CMatrix4f mvp = projection * viewMtx;
CMatrix4f mvp_rm = mvp.transposed();
void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection) {
CMatrix4f mvp = projection * viewMtx;
CMatrix4f mvp_rm = mvp.transposed();
#if __SSE__
/* Left */
planes[0].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[0].mSimd;
/* Left */
planes[0].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[0].mVec128);
/* Right */
planes[1].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[0].mSimd;
/* Right */
planes[1].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[0].mVec128);
/* Bottom */
planes[2].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[1].mSimd;
/* Bottom */
planes[2].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[1].mVec128);
/* Top */
planes[3].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[1].mSimd;
/* Top */
planes[3].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[1].mVec128);
/* Near */
planes[4].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[2].mSimd;
/* Near */
planes[4].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[2].mVec128);
/* Far */
planes[5].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[2].mSimd;
/* Far */
planes[5].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[2].mVec128);
planes[0].normalize();
planes[1].normalize();
planes[2].normalize();
planes[3].normalize();
planes[4].normalize();
planes[5].normalize();
#else
/* Left */
planes[0].a = mvp.m[0][0] + mvp.m[3][0];
planes[0].b = mvp.m[0][1] + mvp.m[3][1];
planes[0].c = mvp.m[0][2] + mvp.m[3][2];
planes[0].d = mvp.m[0][3] + mvp.m[3][3];
/* Right */
planes[1].a = -mvp.m[0][0] + mvp.m[3][0];
planes[1].b = -mvp.m[0][1] + mvp.m[3][1];
planes[1].c = -mvp.m[0][2] + mvp.m[3][2];
planes[1].d = -mvp.m[0][3] + mvp.m[3][3];
/* Bottom */
planes[2].a = mvp.m[1][0] + mvp.m[3][0];
planes[2].b = mvp.m[1][1] + mvp.m[3][1];
planes[2].c = mvp.m[1][2] + mvp.m[3][2];
planes[2].d = mvp.m[1][3] + mvp.m[3][3];
/* Top */
planes[3].a = -mvp.m[1][0] + mvp.m[3][0];
planes[3].b = -mvp.m[1][1] + mvp.m[3][1];
planes[3].c = -mvp.m[1][2] + mvp.m[3][2];
planes[3].d = -mvp.m[1][3] + mvp.m[3][3];
/* Near */
planes[4].a = mvp.m[2][0] + mvp.m[3][0];
planes[4].b = mvp.m[2][1] + mvp.m[3][1];
planes[4].c = mvp.m[2][2] + mvp.m[3][2];
planes[4].d = mvp.m[2][3] + mvp.m[3][3];
/* Far */
planes[5].a = -mvp.m[2][0] + mvp.m[3][0];
planes[5].b = -mvp.m[2][1] + mvp.m[3][1];
planes[5].c = -mvp.m[2][2] + mvp.m[3][2];
planes[5].d = -mvp.m[2][3] + mvp.m[3][3];
#endif
planes[0].normalize();
planes[1].normalize();
planes[2].normalize();
planes[3].normalize();
planes[4].normalize();
planes[5].normalize();
valid = true;
valid = true;
}
void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& projection)
{
zeus::CMatrix3f tmp(viewPointMtx.basis[0], viewPointMtx.basis[2], -viewPointMtx.basis[1]);
zeus::CTransform viewBasis = zeus::CTransform(tmp.transposed());
zeus::CTransform viewMtx = viewBasis * zeus::CTransform::Translate(-viewPointMtx.origin);
void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& projection) {
zeus::CMatrix3f tmp(viewPointMtx.basis[0], viewPointMtx.basis[2], -viewPointMtx.basis[1]);
zeus::CTransform viewBasis = zeus::CTransform(tmp.transposed());
zeus::CTransform viewMtx = viewBasis * zeus::CTransform::Translate(-viewPointMtx.origin);
updatePlanes(viewMtx.toMatrix4f(), projection.getCachedMatrix());
updatePlanes(viewMtx.toMatrix4f(), projection.getCachedMatrix());
}
bool CFrustum::aabbFrustumTest(const CAABox& aabb) const
{
if (!valid)
return true;
CVector3f center = aabb.center();
CVector3f extents = aabb.extents();
for (uint32_t i = 0; i < 6; ++i)
{
const CPlane& plane = planes[i];
float m = plane.vec.dot(center) + plane.d;
float n = extents.dot({std::fabs(plane.a), std::fabs(plane.b), std::fabs(plane.c)});
if (m + n < 0)
return false;
}
bool CFrustum::aabbFrustumTest(const CAABox& aabb) const {
if (!valid)
return true;
CVector3f center = aabb.center();
CVector3f extents = aabb.extents();
for (uint32_t i = 0; i < 6; ++i) {
const CPlane& plane = planes[i];
float m = plane.normal().dot(center) + plane.d();
float n = extents.dot({std::fabs(plane.x()), std::fabs(plane.y()), std::fabs(plane.z())});
if (m + n < 0.f)
return false;
}
return true;
}
bool CFrustum::sphereFrustumTest(const CSphere& sphere) const
{
if (!valid)
return true;
for (uint32_t i = 0 ; i<6 ; ++i)
{
float dadot = planes[i].vec.dot(sphere.position);
if ((dadot + planes[i].d + sphere.radius) < 0)
return false;
}
bool CFrustum::sphereFrustumTest(const CSphere& sphere) const {
if (!valid)
return true;
for (uint32_t i = 0; i < 6; ++i) {
float dadot = planes[i].normal().dot(sphere.position);
if ((dadot + planes[i].d() + sphere.radius) < 0.f)
return false;
}
return true;
}
bool CFrustum::pointFrustumTest(const CVector3f& point) const
{
if (!valid)
return true;
for (uint32_t i = 0 ; i<6 ; ++i)
{
float dadot = planes[i].vec.dot(point);
if ((dadot + planes[i].d) < 0)
return false;
}
bool CFrustum::pointFrustumTest(const CVector3f& point) const {
if (!valid)
return true;
for (uint32_t i = 0; i < 6; ++i) {
float dadot = planes[i].normal().dot(point);
if ((dadot + planes[i].d()) < 0.f)
return false;
}
return true;
}
}

View File

@ -2,104 +2,113 @@
#include "zeus/CQuaternion.hpp"
#include "zeus/Global.hpp"
namespace zeus
{
namespace zeus {
const CMatrix3f CMatrix3f::skIdentityMatrix3f = CMatrix3f();
CMatrix3f::CMatrix3f(const CQuaternion& quat)
{
CQuaternion nq = quat.normalized();
float x2 = nq.x * nq.x;
float y2 = nq.y * nq.y;
float z2 = nq.z * nq.z;
CMatrix3f::CMatrix3f(const CQuaternion& quat) {
CQuaternion nq = quat.normalized();
float x2 = nq.x() * nq.x();
float y2 = nq.y() * nq.y();
float z2 = nq.z() * nq.z();
m[0][0] = 1.0 - 2.0 * y2 - 2.0 * z2;
m[1][0] = 2.0 * nq.x * nq.y - 2.0 * nq.z * nq.w;
m[2][0] = 2.0 * nq.x * nq.z + 2.0 * nq.y * nq.w;
m[0][0] = 1.0 - 2.0 * y2 - 2.0 * z2;
m[1][0] = 2.0 * nq.x() * nq.y() - 2.0 * nq.z() * nq.w();
m[2][0] = 2.0 * nq.x() * nq.z() + 2.0 * nq.y() * nq.w();
m[0][1] = 2.0 * nq.x * nq.y + 2.0 * nq.z * nq.w;
m[1][1] = 1.0 - 2.0 * x2 - 2.0 * z2;
m[2][1] = 2.0 * nq.y * nq.z - 2.0 * nq.x * nq.w;
m[0][1] = 2.0 * nq.x() * nq.y() + 2.0 * nq.z() * nq.w();
m[1][1] = 1.0 - 2.0 * x2 - 2.0 * z2;
m[2][1] = 2.0 * nq.y() * nq.z() - 2.0 * nq.x() * nq.w();
m[0][2] = 2.0 * nq.x * nq.z - 2.0 * nq.y * nq.w;
m[1][2] = 2.0 * nq.y * nq.z + 2.0 * nq.x * nq.w;
m[2][2] = 1.0 - 2.0 * x2 - 2.0 * y2;
m[0][3] = 0.0f;
m[1][3] = 0.0f;
m[2][3] = 0.0f;
m[0][2] = 2.0 * nq.x() * nq.z() - 2.0 * nq.y() * nq.w();
m[1][2] = 2.0 * nq.y() * nq.z() + 2.0 * nq.x() * nq.w();
m[2][2] = 1.0 - 2.0 * x2 - 2.0 * y2;
}
void CMatrix3f::transpose()
{
void CMatrix3f::transpose() {
#if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero);
vec[0].mVec128 = _mm_movelh_ps(T0, T2);
vec[1].mVec128 = _mm_movehl_ps(T2, T0);
vec[2].mVec128 = _mm_movelh_ps(T1, T3);
__m128 zero = _mm_xor_ps(m[0].mSimd.native(), m[0].mSimd.native());
__m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), zero);
__m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
m[0].mSimd = _mm_movelh_ps(T0, T2);
m[1].mSimd = _mm_movehl_ps(T2, T0);
m[2].mSimd = _mm_movelh_ps(T1, T3);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
m[0].mSimd = T0.val[0];
m[1].mSimd = T0.val[1];
m[2].mSimd = T1.val[0];
#else
float tmp;
float tmp;
tmp = m[0][1];
m[0][1] = m[1][0];
m[1][0] = tmp;
tmp = m[0][1];
m[0][1] = m[1][0];
m[1][0] = tmp;
tmp = m[0][2];
m[0][2] = m[2][0];
m[2][0] = tmp;
tmp = m[0][2];
m[0][2] = m[2][0];
m[2][0] = tmp;
tmp = m[1][2];
m[1][2] = m[2][1];
m[2][1] = tmp;
tmp = m[1][2];
m[1][2] = m[2][1];
m[2][1] = tmp;
#endif
}
CMatrix3f CMatrix3f::transposed() const
{
CMatrix3f CMatrix3f::transposed() const {
#if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero);
return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3));
__m128 zero = _mm_xor_ps(m[0].mSimd.native(), m[0].mSimd.native());
__m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), zero);
__m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3));
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
return CMatrix3f(T0.val[0], T0.val[1], T1.val[0]);
#else
CMatrix3f ret(*this);
float tmp;
CMatrix3f ret(*this);
float tmp;
tmp = ret.m[0][1];
ret.m[0][1] = ret.m[1][0];
ret.m[1][0] = tmp;
tmp = ret.m[0][1];
ret.m[0][1] = ret.m[1][0];
ret.m[1][0] = tmp;
tmp = m[0][2];
ret.m[0][2] = ret.m[2][0];
ret.m[2][0] = tmp;
tmp = m[0][2];
ret.m[0][2] = ret.m[2][0];
ret.m[2][0] = tmp;
tmp = m[1][2];
ret.m[1][2] = ret.m[2][1];
ret.m[2][1] = tmp;
tmp = m[1][2];
ret.m[1][2] = ret.m[2][1];
ret.m[2][1] = tmp;
return ret;
return ret;
#endif
}
CMatrix3f CMatrix3f::inverted() const
{
float det = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] -
m[0][2] * m[1][1] * m[2][0] - m[1][2] * m[2][1] * m[0][0] - m[2][2] * m[0][1] * m[1][0];
CMatrix3f CMatrix3f::inverted() const {
float det = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] -
m[0][2] * m[1][1] * m[2][0] - m[1][2] * m[2][1] * m[0][0] - m[2][2] * m[0][1] * m[1][0];
if (det == 0.0)
return CMatrix3f();
if (det == 0.0)
return CMatrix3f();
det = 1.0f / det;
return CMatrix3f((m[1][1] * m[2][2] - m[1][2] * m[2][1]) * det, -(m[1][0] * m[2][2] - m[1][2] * m[2][0]) * det,
(m[1][0] * m[2][1] - m[1][1] * m[2][0]) * det, -(m[0][1] * m[2][2] - m[0][2] * m[2][1]) * det,
(m[0][0] * m[2][2] - m[0][2] * m[2][0]) * det, -(m[0][0] * m[2][1] - m[0][1] * m[2][0]) * det,
(m[0][1] * m[1][2] - m[0][2] * m[1][1]) * det, -(m[0][0] * m[1][2] - m[0][2] * m[1][0]) * det,
(m[0][0] * m[1][1] - m[0][1] * m[1][0]) * det);
det = 1.0f / det;
return CMatrix3f((m[1][1] * m[2][2] - m[1][2] * m[2][1]) * det, -(m[1][0] * m[2][2] - m[1][2] * m[2][0]) * det,
(m[1][0] * m[2][1] - m[1][1] * m[2][0]) * det, -(m[0][1] * m[2][2] - m[0][2] * m[2][1]) * det,
(m[0][0] * m[2][2] - m[0][2] * m[2][0]) * det, -(m[0][0] * m[2][1] - m[0][1] * m[2][0]) * det,
(m[0][1] * m[1][2] - m[0][2] * m[1][1]) * det, -(m[0][0] * m[1][2] - m[0][2] * m[1][0]) * det,
(m[0][0] * m[1][1] - m[0][1] * m[1][0]) * det);
}
}

View File

@ -9,14 +9,25 @@ CMatrix4f CMatrix4f::transposed() const
{
CMatrix4f ret;
#if __SSE__
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128);
ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2);
ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0);
ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3);
ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1);
__m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), m[3].mSimd.native());
__m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), m[3].mSimd.native());
ret.m[0].mSimd = _mm_movelh_ps(T0, T2);
ret.m[1].mSimd = _mm_movehl_ps(T2, T0);
ret.m[2].mSimd = _mm_movelh_ps(T1, T3);
ret.m[3].mSimd = _mm_movehl_ps(T3, T1);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
ret.m[0].mSimd = T0.val[0];
ret.m[1].mSimd = T0.val[1];
ret.m[2].mSimd = T1.val[0];
ret.m[3].mSimd = T1.val[1];
#else
ret.m[0][0] = m[0][0];
ret.m[1][0] = m[0][1];

View File

@ -1,139 +1,140 @@
#include "zeus/COBBox.hpp"
namespace zeus
{
namespace zeus {
CAABox COBBox::calculateAABox(const CTransform& worldXf) const
{
CAABox ret = CAABox::skInvertedBox;
CAABox COBBox::calculateAABox(const CTransform& worldXf) const {
CAABox ret = CAABox::skInvertedBox;
CTransform trans = worldXf * transform;
static const CVector3f basis[8] = {{1.f, 1.f, 1.f}, {1.f, 1.f, -1.f}, {1.f, -1.f, 1.f}, {1.f, -1.f, -1.f},
{-1.f, -1.f, -1.f}, {-1.f, -1.f, 1.f}, {-1.f, 1.f, -1.f}, {-1.f, 1.f, 1.f}};
CVector3f p = extents * basis[0];
ret.accumulateBounds(trans * p);
p = extents * basis[1];
ret.accumulateBounds(trans * p);
p = extents * basis[2];
ret.accumulateBounds(trans * p);
p = extents * basis[3];
ret.accumulateBounds(trans * p);
p = extents * basis[4];
ret.accumulateBounds(trans * p);
p = extents * basis[5];
ret.accumulateBounds(trans * p);
p = extents * basis[6];
ret.accumulateBounds(trans * p);
p = extents * basis[7];
ret.accumulateBounds(trans * p);
CTransform trans = worldXf * transform;
static const CVector3f basis[8] = {{1.f, 1.f, 1.f},
{1.f, 1.f, -1.f},
{1.f, -1.f, 1.f},
{1.f, -1.f, -1.f},
{-1.f, -1.f, -1.f},
{-1.f, -1.f, 1.f},
{-1.f, 1.f, -1.f},
{-1.f, 1.f, 1.f}};
CVector3f p = extents * basis[0];
ret.accumulateBounds(trans * p);
p = extents * basis[1];
ret.accumulateBounds(trans * p);
p = extents * basis[2];
ret.accumulateBounds(trans * p);
p = extents * basis[3];
ret.accumulateBounds(trans * p);
p = extents * basis[4];
ret.accumulateBounds(trans * p);
p = extents * basis[5];
ret.accumulateBounds(trans * p);
p = extents * basis[6];
ret.accumulateBounds(trans * p);
p = extents * basis[7];
ret.accumulateBounds(trans * p);
return ret;
return ret;
}
bool COBBox::OBBIntersectsBox(const COBBox& other) const
{
CVector3f v = other.transform.origin - transform.origin;
CVector3f T = CVector3f(v.dot(transform.basis[0]),
v.dot(transform.basis[1]),
v.dot(transform.basis[2]));
bool COBBox::OBBIntersectsBox(const COBBox& other) const {
CVector3f v = other.transform.origin - transform.origin;
CVector3f T = CVector3f(v.dot(transform.basis[0]),
v.dot(transform.basis[1]),
v.dot(transform.basis[2]));
CMatrix3f R;
CMatrix3f R;
float ra, rb, t;
for (int i = 0; i < 3; ++i)
for (int k = 0; k < 3; ++k)
R[i][k] = transform.basis[i].dot(other.transform.basis[k]);
for (int i = 0; i < 3; ++i)
{
ra = extents[i];
rb = (other.extents[0] * std::fabs(R[i][0])) +
(other.extents[1] * std::fabs(R[i][1])) +
(other.extents[2] * std::fabs(R[i][2]));
t = std::fabs(T[i]);
if (t > (ra + rb + FLT_EPSILON))
return false;
}
float ra, rb, t;
for (int i = 0; i < 3; ++i)
for (int k = 0; k < 3; ++k)
{
ra = (extents[0] * std::fabs(R[0][k])) +
(extents[1] * std::fabs(R[1][k])) +
(extents[2] * std::fabs(R[2][k]));
rb = other.extents[k];
R[i][k] = transform.basis[i].dot(other.transform.basis[k]);
t = std::fabs(T[0] * R[0][k] + T[1] * R[1][k] + T[2] * R[2][k]);
for (int i = 0; i < 3; ++i) {
ra = extents[i];
rb = (other.extents[0] * std::fabs(R[i][0])) +
(other.extents[1] * std::fabs(R[i][1])) +
(other.extents[2] * std::fabs(R[i][2]));
t = std::fabs(T[i]);
if (t > (ra + rb + FLT_EPSILON))
return false;
}
/* A0 x B0 */
ra = (extents[1] * std::fabs(R[2][0])) + (extents[2] * std::fabs(R[1][0]));
rb = (other.extents[1] * std::fabs(R[0][2])) + (other.extents[2] * std::fabs(R[0][1]));
t = std::fabs((T[2] * R[1][0]) - (T[1] * R[2][0]));
if (t > (ra + rb + FLT_EPSILON))
return false;
return false;
}
for (int k = 0; k < 3; ++k) {
ra = (extents[0] * std::fabs(R[0][k])) +
(extents[1] * std::fabs(R[1][k])) +
(extents[2] * std::fabs(R[2][k]));
rb = other.extents[k];
t = std::fabs(T[0] * R[0][k] + T[1] * R[1][k] + T[2] * R[2][k]);
/* A0 x B1 */
ra = (extents[1] * std::fabs(R[2][1])) + (extents[2] * std::fabs(R[1][1]));
rb = (other.extents[0] * std::fabs(R[0][2])) + (other.extents[2] * std::fabs(R[0][0]));
t = std::fabs((T[2] * R[1][1]) - (T[1] * R[2][1]));
if (t > (ra + rb + FLT_EPSILON))
return false;
return false;
}
/* A0 x B2 */
ra = (extents[1] * std::fabs(R[2][2])) + (extents[2] * std::fabs(R[1][2]));
rb = (other.extents[0] * std::fabs(R[0][1])) + (other.extents[1] * std::fabs(R[0][0]));
t = std::fabs((T[2] * R[1][2]) - (T[1] * R[2][2]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A0 x B0 */
ra = (extents[1] * std::fabs(R[2][0])) + (extents[2] * std::fabs(R[1][0]));
rb = (other.extents[1] * std::fabs(R[0][2])) + (other.extents[2] * std::fabs(R[0][1]));
t = std::fabs((T[2] * R[1][0]) - (T[1] * R[2][0]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A1 x B0 */
ra = (extents[0] * std::fabs(R[2][0])) + (extents[2] * std::fabs(R[0][0]));
rb = (other.extents[1] * std::fabs(R[1][2])) + (other.extents[2] * std::fabs(R[1][1]));
t = std::fabs((T[0] * R[2][0]) - (T[2] * R[0][0]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A0 x B1 */
ra = (extents[1] * std::fabs(R[2][1])) + (extents[2] * std::fabs(R[1][1]));
rb = (other.extents[0] * std::fabs(R[0][2])) + (other.extents[2] * std::fabs(R[0][0]));
t = std::fabs((T[2] * R[1][1]) - (T[1] * R[2][1]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A1 x B1 */
ra = (extents[0] * std::fabs(R[2][1])) + (extents[2] * std::fabs(R[0][1]));
rb = (other.extents[0] * std::fabs(R[1][2])) + (other.extents[2] * std::fabs(R[1][0]));
t = std::fabs((T[0] * R[2][1]) - (T[2] * R[0][1]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A0 x B2 */
ra = (extents[1] * std::fabs(R[2][2])) + (extents[2] * std::fabs(R[1][2]));
rb = (other.extents[0] * std::fabs(R[0][1])) + (other.extents[1] * std::fabs(R[0][0]));
t = std::fabs((T[2] * R[1][2]) - (T[1] * R[2][2]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A1 x B2 */
ra = (extents[0] * std::fabs(R[2][2])) + (extents[2] * std::fabs(R[0][2]));
rb = (other.extents[0] * std::fabs(R[1][1])) + (other.extents[1] * std::fabs(R[1][0]));
t = std::fabs((T[0] * R[2][2]) - (T[2] * R[0][2]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A1 x B0 */
ra = (extents[0] * std::fabs(R[2][0])) + (extents[2] * std::fabs(R[0][0]));
rb = (other.extents[1] * std::fabs(R[1][2])) + (other.extents[2] * std::fabs(R[1][1]));
t = std::fabs((T[0] * R[2][0]) - (T[2] * R[0][0]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A2 x B0 */
ra = (extents[0] * std::fabs(R[1][0])) + (extents[1] * std::fabs(R[0][0]));
rb = (other.extents[1] * std::fabs(R[2][2])) + (other.extents[2] * std::fabs(R[2][1]));
t = std::fabs((T[1] * R[0][0]) - (T[0] * R[1][0]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A1 x B1 */
ra = (extents[0] * std::fabs(R[2][1])) + (extents[2] * std::fabs(R[0][1]));
rb = (other.extents[0] * std::fabs(R[1][2])) + (other.extents[2] * std::fabs(R[1][0]));
t = std::fabs((T[0] * R[2][1]) - (T[2] * R[0][1]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A2 x B1 */
ra = (extents[0] * std::fabs(R[1][1])) + (extents[1] * std::fabs(R[0][1]));
rb = (other.extents[0] * std::fabs(R[2][2])) + (other.extents[2] * std::fabs(R[2][0]));
t = std::fabs((T[1] * R[0][1]) - (T[0] * R[1][1]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A1 x B2 */
ra = (extents[0] * std::fabs(R[2][2])) + (extents[2] * std::fabs(R[0][2]));
rb = (other.extents[0] * std::fabs(R[1][1])) + (other.extents[1] * std::fabs(R[1][0]));
t = std::fabs((T[0] * R[2][2]) - (T[2] * R[0][2]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A2 x B2 */
ra = (extents[0] * std::fabs(R[1][2])) + (extents[1] * std::fabs(R[0][2]));
rb = (other.extents[0] * std::fabs(R[2][1])) + (other.extents[1] * std::fabs(R[2][0]));
t = std::fabs((T[1] * R[0][2]) - (T[0] * R[1][2]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A2 x B0 */
ra = (extents[0] * std::fabs(R[1][0])) + (extents[1] * std::fabs(R[0][0]));
rb = (other.extents[1] * std::fabs(R[2][2])) + (other.extents[2] * std::fabs(R[2][1]));
t = std::fabs((T[1] * R[0][0]) - (T[0] * R[1][0]));
if (t > (ra + rb + FLT_EPSILON))
return false;
return true;
/* A2 x B1 */
ra = (extents[0] * std::fabs(R[1][1])) + (extents[1] * std::fabs(R[0][1]));
rb = (other.extents[0] * std::fabs(R[2][2])) + (other.extents[2] * std::fabs(R[2][0]));
t = std::fabs((T[1] * R[0][1]) - (T[0] * R[1][1]));
if (t > (ra + rb + FLT_EPSILON))
return false;
/* A2 x B2 */
ra = (extents[0] * std::fabs(R[1][2])) + (extents[1] * std::fabs(R[0][2]));
rb = (other.extents[0] * std::fabs(R[2][1])) + (other.extents[1] * std::fabs(R[2][0]));
t = std::fabs((T[1] * R[0][2]) - (T[0] * R[1][2]));
if (t > (ra + rb + FLT_EPSILON))
return false;
return true;
}
}

View File

@ -1,18 +1,16 @@
#include "zeus/CPlane.hpp"
namespace zeus
{
namespace zeus {
bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const
{
zeus::CVector3f delta = to - from;
if (std::fabs(delta.normalized().dot(vec)) < 0.01f)
return false;
float tmp = -pointToPlaneDist(from) / delta.dot(vec);
if (tmp < -0.f || tmp > 1.0001f)
return false;
point = delta * tmp + from;
return true;
bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const {
zeus::CVector3f delta = to - from;
if (std::fabs(delta.normalized().dot(normal())) < 0.01f)
return false;
float tmp = -pointToPlaneDist(from) / delta.dot(normal());
if (tmp < -0.f || tmp > 1.0001f)
return false;
point = delta * tmp + from;
return true;
}
}

View File

@ -2,72 +2,67 @@
#include "zeus/Math.hpp"
#include <cassert>
namespace zeus
{
void CProjection::_updateCachedMatrix()
{
assert(m_projType == EProjType::Orthographic || m_projType == EProjType::Perspective);
if (m_projType == EProjType::Orthographic)
{
float tmp;
namespace zeus {
void CProjection::_updateCachedMatrix() {
assert(m_projType == EProjType::Orthographic || m_projType == EProjType::Perspective);
if (m_projType == EProjType::Orthographic) {
float tmp;
tmp = 1.0f / (m_ortho.right - m_ortho.left);
m_mtx.m[0][0] = 2.0f * tmp;
m_mtx.m[1][0] = 0.0f;
m_mtx.m[2][0] = 0.0f;
m_mtx.m[3][0] = -(m_ortho.right + m_ortho.left) * tmp;
tmp = 1.0f / (m_ortho.right - m_ortho.left);
m_mtx.m[0][0] = 2.0f * tmp;
m_mtx.m[1][0] = 0.0f;
m_mtx.m[2][0] = 0.0f;
m_mtx.m[3][0] = -(m_ortho.right + m_ortho.left) * tmp;
tmp = 1.0f / (m_ortho.top - m_ortho.bottom);
m_mtx.m[0][1] = 0.0f;
m_mtx.m[1][1] = 2.0f * tmp;
m_mtx.m[2][1] = 0.0f;
m_mtx.m[3][1] = -(m_ortho.top + m_ortho.bottom) * tmp;
tmp = 1.0f / (m_ortho.top - m_ortho.bottom);
m_mtx.m[0][1] = 0.0f;
m_mtx.m[1][1] = 2.0f * tmp;
m_mtx.m[2][1] = 0.0f;
m_mtx.m[3][1] = -(m_ortho.top + m_ortho.bottom) * tmp;
tmp = 1.0f / (m_ortho.zfar - m_ortho.znear);
m_mtx.m[0][2] = 0.0f;
m_mtx.m[1][2] = 0.0f;
m_mtx.m[2][2] = -tmp;
m_mtx.m[3][2] = -m_ortho.zfar * tmp;
tmp = 1.0f / (m_ortho.zfar - m_ortho.znear);
m_mtx.m[0][2] = 0.0f;
m_mtx.m[1][2] = 0.0f;
m_mtx.m[2][2] = -tmp;
m_mtx.m[3][2] = -m_ortho.zfar * tmp;
m_mtx.m[0][3] = 0.0f;
m_mtx.m[1][3] = 0.0f;
m_mtx.m[2][3] = 0.0f;
m_mtx.m[3][3] = 1.0f;
}
else if (m_projType == EProjType::Perspective)
{
float tfov = std::tan(m_persp.fov * 0.5f);
float top = m_persp.znear * tfov;
float bottom = -top;
float right = m_persp.aspect * m_persp.znear * tfov;
float left = -right;
m_mtx.m[0][3] = 0.0f;
m_mtx.m[1][3] = 0.0f;
m_mtx.m[2][3] = 0.0f;
m_mtx.m[3][3] = 1.0f;
} else if (m_projType == EProjType::Perspective) {
float tfov = std::tan(m_persp.fov * 0.5f);
float top = m_persp.znear * tfov;
float bottom = -top;
float right = m_persp.aspect * m_persp.znear * tfov;
float left = -right;
float rml = right - left;
float rpl = right + left;
float tmb = top - bottom;
float tpb = top + bottom;
float fpn = m_persp.zfar + m_persp.znear;
float fmn = m_persp.zfar - m_persp.znear;
float rml = right - left;
float rpl = right + left;
float tmb = top - bottom;
float tpb = top + bottom;
float fpn = m_persp.zfar + m_persp.znear;
float fmn = m_persp.zfar - m_persp.znear;
m_mtx.m[0][0] = 2.f * m_persp.znear / rml;
m_mtx.m[1][0] = 0.0f;
m_mtx.m[2][0] = rpl / rml;
m_mtx.m[3][0] = 0.0f;
m_mtx.m[0][0] = 2.f * m_persp.znear / rml;
m_mtx.m[1][0] = 0.0f;
m_mtx.m[2][0] = rpl / rml;
m_mtx.m[3][0] = 0.0f;
m_mtx.m[0][1] = 0.0f;
m_mtx.m[1][1] = 2.f * m_persp.znear / tmb;
m_mtx.m[2][1] = tpb / tmb;
m_mtx.m[3][1] = 0.0f;
m_mtx.m[0][1] = 0.0f;
m_mtx.m[1][1] = 2.f * m_persp.znear / tmb;
m_mtx.m[2][1] = tpb / tmb;
m_mtx.m[3][1] = 0.0f;
m_mtx.m[0][2] = 0.0f;
m_mtx.m[1][2] = 0.0f;
m_mtx.m[2][2] = -fpn / fmn;
m_mtx.m[3][2] = -2.f * m_persp.zfar * m_persp.znear / fmn;
m_mtx.m[0][2] = 0.0f;
m_mtx.m[1][2] = 0.0f;
m_mtx.m[2][2] = -fpn / fmn;
m_mtx.m[3][2] = -2.f * m_persp.zfar * m_persp.znear / fmn;
m_mtx.m[0][3] = 0.0f;
m_mtx.m[1][3] = 0.0f;
m_mtx.m[2][3] = -1.0f;
m_mtx.m[3][3] = 0.0f;
}
m_mtx.m[0][3] = 0.0f;
m_mtx.m[1][3] = 0.0f;
m_mtx.m[2][3] = -1.0f;
m_mtx.m[3][3] = 0.0f;
}
}
}

View File

@ -1,399 +1,330 @@
#include "zeus/CQuaternion.hpp"
#include "zeus/Math.hpp"
namespace zeus
{
namespace zeus {
const CQuaternion CQuaternion::skNoRotation;
CQuaternion::CQuaternion(const CMatrix3f& mat)
{
float trace = mat[0][0] + mat[1][1] + mat[2][2];
if (trace >= 0.f)
{
float st = std::sqrt(trace + 1.0f);
float s = 0.5f / st;
w = 0.5f * st;
x = (mat[1][2] - mat[2][1]) * s;
y = (mat[2][0] - mat[0][2]) * s;
z = (mat[0][1] - mat[1][0]) * s;
}
else
{
int idx = 0;
if (mat[1][1] > mat[0][0])
{
idx = 1;
if (mat[2][2] > mat[1][1])
idx = 2;
}
else if (mat[2][2] > mat[0][0])
{
idx = 2;
}
switch (idx)
{
case 0:
{
float st = std::sqrt(mat[0][0] - (mat[1][1] + mat[2][2]) + 1.f);
float s = 0.5f / st;
w = (mat[1][2] - mat[2][1]) * s;
x = 0.5f * st;
y = (mat[1][0] + mat[0][1]) * s;
z = (mat[2][0] + mat[0][2]) * s;
break;
}
case 1:
{
float st = std::sqrt(mat[1][1] - (mat[2][2] + mat[0][0]) + 1.f);
float s = 0.5f / st;
w = (mat[2][0] - mat[0][2]) * s;
x = (mat[1][0] + mat[0][1]) * s;
y = 0.5f * st;
z = (mat[2][1] + mat[1][2]) * s;
break;
}
case 2:
{
float st = std::sqrt(mat[2][2] - (mat[0][0] + mat[1][1]) + 1.f);
float s = 0.5f / st;
w = (mat[0][1] - mat[1][0]) * s;
x = (mat[2][0] + mat[0][2]) * s;
y = (mat[2][1] + mat[1][2]) * s;
z = 0.5f * st;
break;
}
default:
w = 0.f;
x = 0.f;
y = 0.f;
z = 0.f;
break;
}
}
}
void CQuaternion::fromVector3f(const CVector3f& vec)
{
float cosX = std::cos(0.5f * vec.x);
float cosY = std::cos(0.5f * vec.y);
float cosZ = std::cos(0.5f * vec.z);
float sinX = std::sin(0.5f * vec.x);
float sinY = std::sin(0.5f * vec.y);
float sinZ = std::sin(0.5f * vec.z);
w = cosZ * cosY * cosX + sinZ * sinY * sinX;
x = cosZ * cosY * sinX - sinZ * sinY * cosX;
y = cosZ * sinY * cosX + sinZ * cosY * sinX;
z = sinZ * cosY * cosX - cosZ * sinY * sinX;
}
CQuaternion& CQuaternion::operator=(const CQuaternion& q)
{
#if __SSE__
mVec128 = q.mVec128;
#else
w = q.w;
x = q.x;
y = q.y;
z = q.z;
#endif
return *this;
}
CQuaternion CQuaternion::operator+(const CQuaternion& q) const { return CQuaternion(w + q.w, x + q.x, y + q.y, z + q.z); }
CQuaternion CQuaternion::operator-(const CQuaternion& q) const { return CQuaternion(w - q.w, x - q.x, y - q.y, z - q.z); }
CQuaternion CQuaternion::operator*(const CQuaternion& q) const
{
return CQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
}
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const
{
return CNUQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
}
CQuaternion CQuaternion::operator/(const CQuaternion& q) const
{
CQuaternion p(q);
p.invert();
return *this * p;
}
CQuaternion CQuaternion::operator*(float scale) const { return CQuaternion(w * scale, x * scale, y * scale, z * scale); }
CNUQuaternion CNUQuaternion::operator*(float scale) const { return CNUQuaternion(w * scale, x * scale, y * scale, z * scale); }
CQuaternion CQuaternion::operator/(float scale) const { return CQuaternion(w / scale, x / scale, y / scale, z / scale); }
CQuaternion CQuaternion::operator-() const { return CQuaternion(-w, -x, -y, -z); }
const CQuaternion& CQuaternion::operator+=(const CQuaternion& q)
{
w += q.w;
x += q.x;
y += q.y;
z += q.z;
return *this;
}
const CNUQuaternion& CNUQuaternion::operator+=(const CNUQuaternion& q)
{
w += q.w;
x += q.x;
y += q.y;
z += q.z;
return *this;
}
const CQuaternion& CQuaternion::operator-=(const CQuaternion& q)
{
w -= q.w;
x -= q.x;
y -= q.y;
z -= q.z;
return *this;
}
const CQuaternion& CQuaternion::operator*=(const CQuaternion& q)
{
CQuaternion orig = *this;
w = orig.w * q.w - CVector3f(orig.x, orig.y, orig.z).dot({q.x, q.y, q.z});
x = orig.y * q.z - orig.z * q.y + orig.w * q.x + orig.x * q.w;
y = orig.z * q.x - orig.x * q.z + orig.w * q.y + orig.y * q.w;
z = orig.x * q.y - orig.y * q.x + orig.w * q.z + orig.z * q.w;
return *this;
}
const CQuaternion& CQuaternion::operator*=(float scale)
{
w *= scale;
x *= scale;
y *= scale;
z *= scale;
return *this;
}
const CQuaternion& CQuaternion::operator/=(float scale)
{
w /= scale;
x /= scale;
y /= scale;
z /= scale;
return *this;
}
void CQuaternion::invert()
{
x = -x;
y = -y;
z = -z;
}
CQuaternion CQuaternion::inverse() const { return CQuaternion(w, -x, -y, -z); }
CQuaternion CQuaternion::log() const
{
float a = std::acos(w);
float sina = std::sin(a);
CQuaternion ret;
ret.w = 0.f;
if (sina > 0.f)
{
ret.x = a * x / sina;
ret.y = a * y / sina;
ret.z = a * z / sina;
}
else
{
ret.x = 0.f;
ret.y = 0.f;
ret.z = 0.f;
CQuaternion::CQuaternion(const CMatrix3f& mat) {
float trace = mat[0][0] + mat[1][1] + mat[2][2];
if (trace >= 0.f) {
float st = std::sqrt(trace + 1.0f);
float s = 0.5f / st;
w() = 0.5f * st;
x() = (mat[1][2] - mat[2][1]) * s;
y() = (mat[2][0] - mat[0][2]) * s;
z() = (mat[0][1] - mat[1][0]) * s;
} else {
int idx = 0;
if (mat[1][1] > mat[0][0]) {
idx = 1;
if (mat[2][2] > mat[1][1])
idx = 2;
} else if (mat[2][2] > mat[0][0]) {
idx = 2;
}
return ret;
switch (idx) {
case 0: {
float st = std::sqrt(mat[0][0] - (mat[1][1] + mat[2][2]) + 1.f);
float s = 0.5f / st;
w() = (mat[1][2] - mat[2][1]) * s;
x() = 0.5f * st;
y() = (mat[1][0] + mat[0][1]) * s;
z() = (mat[2][0] + mat[0][2]) * s;
break;
}
case 1: {
float st = std::sqrt(mat[1][1] - (mat[2][2] + mat[0][0]) + 1.f);
float s = 0.5f / st;
w() = (mat[2][0] - mat[0][2]) * s;
x() = (mat[1][0] + mat[0][1]) * s;
y() = 0.5f * st;
z() = (mat[2][1] + mat[1][2]) * s;
break;
}
case 2: {
float st = std::sqrt(mat[2][2] - (mat[0][0] + mat[1][1]) + 1.f);
float s = 0.5f / st;
w() = (mat[0][1] - mat[1][0]) * s;
x() = (mat[2][0] + mat[0][2]) * s;
y() = (mat[2][1] + mat[1][2]) * s;
z() = 0.5f * st;
break;
}
default:
w() = 0.f;
x() = 0.f;
y() = 0.f;
z() = 0.f;
break;
}
}
}
CQuaternion CQuaternion::exp() const
{
float a = (CVector3f(x, y, z).magnitude());
float sina = std::sin(a);
float cosa = std::cos(a);
CQuaternion ret;
void CQuaternion::fromVector3f(const CVector3f& vec) {
float cosX = std::cos(0.5f * vec.x());
float cosY = std::cos(0.5f * vec.y());
float cosZ = std::cos(0.5f * vec.z());
ret.w = cosa;
if (a > 0.f)
{
ret.x = sina * x / a;
ret.y = sina * y / a;
ret.z = sina * z / a;
}
else
{
ret.x = 0.f;
ret.y = 0.f;
ret.z = 0.f;
}
float sinX = std::sin(0.5f * vec.x());
float sinY = std::sin(0.5f * vec.y());
float sinZ = std::sin(0.5f * vec.z());
return ret;
simd_floats f;
f[0] = cosZ * cosY * cosX + sinZ * sinY * sinX;
f[1] = cosZ * cosY * sinX - sinZ * sinY * cosX;
f[2] = cosZ * sinY * cosX + sinZ * cosY * sinX;
f[3] = sinZ * cosY * cosX - cosZ * sinY * sinX;
mSimd.copy_from(f);
}
CQuaternion& CQuaternion::operator=(const CQuaternion& q) {
mSimd = q.mSimd;
return *this;
}
CQuaternion CQuaternion::operator+(const CQuaternion& q) const {
return mSimd + q.mSimd;
}
CQuaternion CQuaternion::operator-(const CQuaternion& q) const {
return mSimd - q.mSimd;
}
CQuaternion CQuaternion::operator*(const CQuaternion& q) const {
return CQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
y() * q.z() - z() * q.y() + w() * q.x() + x() * q.w(),
z() * q.x() - x() * q.z() + w() * q.y() + y() * q.w(),
x() * q.y() - y() * q.x() + w() * q.z() + z() * q.w());
}
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const {
return CNUQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
y() * q.z() - z() * q.y() + w() * q.x() + x() * q.w(),
z() * q.x() - x() * q.z() + w() * q.y() + y() * q.w(),
x() * q.y() - y() * q.x() + w() * q.z() + z() * q.w());
}
CQuaternion CQuaternion::operator/(const CQuaternion& q) const {
CQuaternion p(q);
p.invert();
return *this * p;
}
CQuaternion CQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
}
CNUQuaternion CNUQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
}
CQuaternion CQuaternion::operator/(float scale) const {
return mSimd / simd<float>(scale);
}
CQuaternion CQuaternion::operator-() const { return -mSimd; }
const CQuaternion& CQuaternion::operator+=(const CQuaternion& q) {
mSimd += q.mSimd;
return *this;
}
const CNUQuaternion& CNUQuaternion::operator+=(const CNUQuaternion& q) {
mSimd += q.mSimd;
return *this;
}
const CQuaternion& CQuaternion::operator-=(const CQuaternion& q) {
mSimd -= q.mSimd;
return *this;
}
const CQuaternion& CQuaternion::operator*=(const CQuaternion& q) {
CQuaternion orig = *this;
w() = orig.w() * q.w() - CVector3f(orig.x(), orig.y(), orig.z()).dot({q.x(), q.y(), q.z()});
x() = orig.y() * q.z() - orig.z() * q.y() + orig.w() * q.x() + orig.x() * q.w();
y() = orig.z() * q.x() - orig.x() * q.z() + orig.w() * q.y() + orig.y() * q.w();
z() = orig.x() * q.y() - orig.y() * q.x() + orig.w() * q.z() + orig.z() * q.w();
return *this;
}
const CQuaternion& CQuaternion::operator*=(float scale) {
mSimd *= simd<float>(scale);
return *this;
}
const CQuaternion& CQuaternion::operator/=(float scale) {
mSimd /= simd<float>(scale);
return *this;
}
static const simd<float> InvertQuat(1.f, -1.f, -1.f, -1.f);
void CQuaternion::invert() {
mSimd *= InvertQuat;
}
CQuaternion CQuaternion::inverse() const { return mSimd * InvertQuat; }
CQuaternion CQuaternion::log() const {
float a = std::acos(w());
float sina = std::sin(a);
CQuaternion ret;
if (sina > 0.f)
ret = a * *this / sina;
else
ret = simd<float>(0.f);
ret.w() = 0.f;
return ret;
}
CQuaternion CQuaternion::exp() const {
float a = (CVector3f(mSimd.shuffle<1, 2, 3, 3>()).magnitude());
float sina = std::sin(a);
float cosa = std::cos(a);
CQuaternion ret;
if (a > 0.f)
ret = sina * *this / a;
else
ret = simd<float>(0.f);
ret.w() = cosa;
return ret;
}
CQuaternion CQuaternion::lerp(const CQuaternion& a, const CQuaternion& b, double t) { return (a + t * (b - a)); }
CQuaternion CQuaternion::nlerp(const CQuaternion& a, const CQuaternion& b, double t) { return lerp(a, b, t).normalized(); }
CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, double t)
{
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
CQuaternion ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);
const double s1 = std::sin(sign * t * theta);
const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0 - t) * theta);
ret.x = float((a.x * s0 + b.x * s1) * d);
ret.y = float((a.y * s0 + b.y * s1) * d);
ret.z = float((a.z * s0 + b.z * s1) * d);
ret.w = float((a.w * s0 + b.w * s1) * d);
return ret;
}
return a;
CQuaternion CQuaternion::nlerp(const CQuaternion& a, const CQuaternion& b, double t) {
return lerp(a, b, t).normalized();
}
CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1)
{
CVector3f v0N = v0;
CVector3f v1N = v1;
CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, double t) {
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
if (!v0N.isZero())
v0N.normalize();
if (!v1N.isZero())
v1N.normalize();
CQuaternion ret;
CVector3f cross = v0N.cross(v1N);
float mag = std::sqrt(a.dot(a) * b.dot(b));
if (cross.magSquared() < 0.001f)
{
if (v0N.dot(v1N) > 0.f)
return CQuaternion::skNoRotation;
if (cross.canBeNormalized())
return CQuaternion(0.0f, cross.normalized());
return CQuaternion::skNoRotation;
}
else
{
float w = std::sqrt((1.f + zeus::clamp(-1.f, v0N.dot(v1N), 1.f)) * 2.f);
return CQuaternion(0.5f * w, cross * (1.f / w));
}
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f) {
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);
const double s1 = std::sin(sign * t * theta);
const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0 - t) * theta);
ret = (a * s0 + b * s1) * d;
return ret;
}
return a;
}
CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1) {
CVector3f v0N = v0;
CVector3f v1N = v1;
if (!v0N.isZero())
v0N.normalize();
if (!v1N.isZero())
v1N.normalize();
CVector3f cross = v0N.cross(v1N);
if (cross.magSquared() < 0.001f) {
if (v0N.dot(v1N) > 0.f)
return CQuaternion::skNoRotation;
if (cross.canBeNormalized())
return CQuaternion(0.0f, cross.normalized());
return CQuaternion::skNoRotation;
} else {
float w = std::sqrt((1.f + zeus::clamp(-1.f, v0N.dot(v1N), 1.f)) * 2.f);
return CQuaternion(0.5f * w, cross * (1.f / w));
}
}
CQuaternion CQuaternion::clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle)
{
CQuaternion arc = shortestRotationArc(v0, v1);
if (angle >= 2.f * std::acos(arc.w))
return arc;
const zeus::CRelAngle& angle) {
CQuaternion arc = shortestRotationArc(v0, v1);
if (angle >= 2.f * std::acos(arc.w()))
return arc;
return fromAxisAngle(arc.getImaginary(), angle);
return fromAxisAngle(arc.getImaginary(), angle);
}
CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b, double t)
{
return zeus::CQuaternion::slerp((b.dot(a) >= 0.f) ? a : a.buildEquivalent(), b, t);
CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b, double t) {
return zeus::CQuaternion::slerp((b.dot(a) >= 0.f) ? a : a.buildEquivalent(), b, t);
}
CQuaternion operator+(float lhs, const CQuaternion& rhs)
{
return CQuaternion(lhs + rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
CQuaternion operator+(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) + rhs.mSimd;
}
CQuaternion operator-(float lhs, const CQuaternion& rhs)
{
return CQuaternion(lhs - rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
CQuaternion operator-(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) - rhs.mSimd;
}
CQuaternion operator*(float lhs, const CQuaternion& rhs)
{
return CQuaternion(lhs * rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
CQuaternion operator*(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) * rhs.mSimd;
}
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs)
{
return CNUQuaternion(lhs * rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs) {
return simd<float>(lhs) * rhs.mSimd;
}
CQuaternion CQuaternion::buildEquivalent() const
{
float tmp = std::acos(clamp(-1.f, w, 1.f)) * 2.0;
if (std::fabs(tmp) < 1.0e-7)
return {-1.f, 0.f, 0.f, 0.f};
else
return CQuaternion::fromAxisAngle(CUnitVector3f(x, y, z), tmp + 2.0 * M_PI);
CQuaternion CQuaternion::buildEquivalent() const {
float tmp = std::acos(clamp(-1.f, w(), 1.f)) * 2.f;
if (std::fabs(tmp) < 1.0e-7)
return {-1.f, 0.f, 0.f, 0.f};
else
return CQuaternion::fromAxisAngle(CUnitVector3f(mSimd.shuffle<1, 2, 3, 3>()), tmp + 2.0 * M_PI);
}
CRelAngle CQuaternion::angleFrom(const zeus::CQuaternion& other)
{
return std::acos(zeus::clamp(-1.f, dot(other), 1.f));
CRelAngle CQuaternion::angleFrom(const zeus::CQuaternion& other) {
return std::acos(zeus::clamp(-1.f, dot(other), 1.f));
}
CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng)
{
CQuaternion q = skNoRotation;
zeus::CVector3f destNoZ = dest;
zeus::CVector3f sourceNoZ = source;
destNoZ.z = 0.f;
sourceNoZ.z = 0.f;
zeus::CVector3f tmp;
if (sourceNoZ.magSquared() > 0.0001f && destNoZ.magSquared() > 0.0001f)
{
sourceNoZ.normalize();
destNoZ.normalize();
CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng) {
CQuaternion q = skNoRotation;
zeus::CVector3f destNoZ = dest;
zeus::CVector3f sourceNoZ = source;
destNoZ.z() = 0.f;
sourceNoZ.z() = 0.f;
zeus::CVector3f tmp;
if (sourceNoZ.magSquared() > 0.0001f && destNoZ.magSquared() > 0.0001f) {
sourceNoZ.normalize();
destNoZ.normalize();
float angleBetween =
normalize_angle(std::atan2(destNoZ.x, destNoZ.y) - std::atan2(sourceNoZ.x, sourceNoZ.y));
float realAngle = zeus::clamp(-maxAng.asRadians(), angleBetween, maxAng.asRadians());
CQuaternion tmpQ;
tmpQ.rotateZ(-realAngle);
q = tmpQ;
tmp = q.transform(sourceNoZ);
}
else if (sourceNoZ.magSquared() > 0.0001f)
tmp = sourceNoZ.normalized();
else if (destNoZ.magSquared() > 0.0001f)
tmp = destNoZ.normalized();
else
return skNoRotation;
float angleBetween =
normalize_angle(std::atan2(destNoZ.x(), destNoZ.y()) - std::atan2(sourceNoZ.x(), sourceNoZ.y()));
float realAngle = zeus::clamp(-maxAng.asRadians(), angleBetween, maxAng.asRadians());
CQuaternion tmpQ;
tmpQ.rotateZ(-realAngle);
q = tmpQ;
tmp = q.transform(sourceNoZ);
} else if (sourceNoZ.magSquared() > 0.0001f)
tmp = sourceNoZ.normalized();
else if (destNoZ.magSquared() > 0.0001f)
tmp = destNoZ.normalized();
else
return skNoRotation;
float realAngle =
zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z) - std::acos(source.z)), maxAng.asRadians());
return CQuaternion::fromAxisAngle(tmp.cross(CVector3f::skUp), -realAngle) * q;
float realAngle =
zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z()) - std::acos(source.z())), maxAng.asRadians());
return CQuaternion::fromAxisAngle(tmp.cross(CVector3f::skUp), -realAngle) * q;
}
}

View File

@ -1,69 +1,65 @@
#include "zeus/CTransform.hpp"
namespace zeus
{
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec)
{
CTransform result;
double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
namespace zeus {
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec) {
CTransform result;
double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
ti = eulerVec[0];
tj = eulerVec[1];
th = eulerVec[2];
ti = eulerVec[0];
tj = eulerVec[1];
th = eulerVec[2];
ci = std::cos(ti);
cj = std::cos(tj);
ch = std::cos(th);
si = std::sin(ti);
sj = std::sin(tj);
sh = std::sin(th);
ci = std::cos(ti);
cj = std::cos(tj);
ch = std::cos(th);
si = std::sin(ti);
sj = std::sin(tj);
sh = std::sin(th);
cc = ci * ch;
cs = ci * sh;
sc = si * ch;
ss = si * sh;
cc = ci * ch;
cs = ci * sh;
sc = si * ch;
ss = si * sh;
result.basis.m[0][0] = float(cj * ch);
result.basis.m[1][0] = float(sj * sc - cs);
result.basis.m[2][0] = float(sj * cc + ss);
result.basis.m[0][1] = float(cj * sh);
result.basis.m[1][1] = float(sj * ss + cc);
result.basis.m[2][1] = float(sj * cs - sc);
result.basis.m[0][2] = float(-sj);
result.basis.m[1][2] = float(cj * si);
result.basis.m[2][2] = float(cj * ci);
result.basis.m[0][0] = float(cj * ch);
result.basis.m[1][0] = float(sj * sc - cs);
result.basis.m[2][0] = float(sj * cc + ss);
result.basis.m[0][1] = float(cj * sh);
result.basis.m[1][1] = float(sj * ss + cc);
result.basis.m[2][1] = float(sj * cs - sc);
result.basis.m[0][2] = float(-sj);
result.basis.m[1][2] = float(cj * si);
result.basis.m[2][2] = float(cj * ci);
return result;
return result;
}
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle)
{
CTransform result;
CVector3f axisN = axis.normalized();
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle) {
CTransform result;
CVector3f axisN = axis.normalized();
float c = std::cos(angle);
float s = std::sin(angle);
float t = 1.f - c;
float c = std::cos(angle);
float s = std::sin(angle);
float t = 1.f - c;
result.basis.m[0][0] = t * axisN.v[0] * axisN.v[0] + c;
result.basis.m[1][0] = t * axisN.v[0] * axisN.v[1] - axisN.v[2] * s;
result.basis.m[2][0] = t * axisN.v[0] * axisN.v[2] + axisN.v[1] * s;
result.basis.m[0][0] = t * axisN[0] * axisN[0] + c;
result.basis.m[1][0] = t * axisN[0] * axisN[1] - axisN[2] * s;
result.basis.m[2][0] = t * axisN[0] * axisN[2] + axisN[1] * s;
result.basis.m[0][1] = t * axisN.v[0] * axisN.v[1] + axisN.v[2] * s;
result.basis.m[1][1] = t * axisN.v[1] * axisN.v[1] + c;
result.basis.m[2][1] = t * axisN.v[1] * axisN.v[2] - axisN.v[0] * s;
result.basis.m[0][1] = t * axisN[0] * axisN[1] + axisN[2] * s;
result.basis.m[1][1] = t * axisN[1] * axisN[1] + c;
result.basis.m[2][1] = t * axisN[1] * axisN[2] - axisN[0] * s;
result.basis.m[0][2] = t * axisN.v[0] * axisN.v[2] - axisN.v[1] * s;
result.basis.m[1][2] = t * axisN.v[1] * axisN.v[2] + axisN.v[0] * s;
result.basis.m[2][2] = t * axisN.v[2] * axisN.v[2] + c;
result.basis.m[0][2] = t * axisN[0] * axisN[2] - axisN[1] * s;
result.basis.m[1][2] = t * axisN[1] * axisN[2] + axisN[0] * s;
result.basis.m[2][2] = t * axisN[2] * axisN[2] + c;
return result;
return result;
}
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin)
{
CTransform ret = CTransformFromEditorEuler(eulerVec);
ret.origin = origin;
return ret;
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin) {
CTransform ret = CTransformFromEditorEuler(eulerVec);
ret.origin = origin;
return ret;
}
}

View File

@ -4,50 +4,46 @@
#include <cassert>
#include "zeus/Math.hpp"
namespace zeus
{
namespace zeus {
const CVector2f CVector2f::skOne = CVector2f(1.0);
const CVector2f CVector2f::skNegOne = CVector2f(-1.0);
const CVector2f CVector2f::skZero(0.f, 0.f);
float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b)
{
float mag1 = a.magnitude();
float mag2 = b.magnitude();
float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b) {
float mag1 = a.magnitude();
float mag2 = b.magnitude();
if (!mag1 || !mag2)
return 0;
if (!mag1 || !mag2)
return 0;
float dot = a.dot(b);
float theta = std::acos(dot / (mag1 * mag2));
return theta;
float dot = a.dot(b);
float theta = std::acos(dot / (mag1 * mag2));
return theta;
}
CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t)
{
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
CVector2f ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);
const double s1 = std::sin(sign * t * theta);
const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0f - t) * theta);
ret = (a * s0 + b * s1) * d;
return ret;
}
CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t) {
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
CVector2f ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f) {
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);
const double s1 = std::sin(sign * t * theta);
const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0f - t) * theta);
ret = (a * s0 + b * s1) * d;
return ret;
}
return a;
}
}

View File

@ -5,8 +5,7 @@
#include <cassert>
#include "zeus/Math.hpp"
namespace zeus
{
namespace zeus {
const CVector3f CVector3f::skOne(1.f);
const CVector3f CVector3f::skNegOne(-1.f);
const CVector3f CVector3f::skZero;
@ -20,59 +19,44 @@ const CVector3f CVector3f::skRadToDegVec(180.0f / M_PIF);
const CVector3f CVector3f::skDegToRadVec(M_PIF / 180.0f);
const CVector3d CVector3d::skZero(0.0, 0.0, 0.0);
CVector3f::CVector3f(const CVector3d& vec)
{
#if __SSE__
mVec128 = _mm_cvtpd_ps(vec.mVec128[0]);
v[2] = vec.v[2];
#else
v[0] = vec.v[0];
v[1] = vec.v[1];
v[2] = vec.v[2];
#endif
CVector3f::CVector3f(const CVector3d& vec) : mSimd(vec.mSimd) {}
float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b) {
float mag1 = a.magnitude();
float mag2 = b.magnitude();
if (!mag1 || !mag2)
return 0.f;
float dot = a.dot(b);
float theta = std::acos(dot / (mag1 * mag2));
return theta;
}
float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b)
{
float mag1 = a.magnitude();
float mag2 = b.magnitude();
if (!mag1 || !mag2)
return 0.f;
float dot = a.dot(b);
float theta = std::acos(dot / (mag1 * mag2));
return theta;
}
CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t)
{
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
CVector3f ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = acos(sign * prod);
const double s1 = sin(sign * t * theta);
const double d = 1.0 / sin(theta);
const double s0 = sin((1.0 - t) * theta);
ret.x = (float)(a.x * s0 + b.x * s1) * d;
ret.y = (float)(a.y * s0 + b.y * s1) * d;
ret.z = (float)(a.z * s0 + b.z * s1) * d;
return ret;
}
CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t) {
if (t <= 0.0f)
return a;
if (t >= 1.0f)
return b;
CVector3f ret;
float mag = std::sqrt(a.dot(a) * b.dot(b));
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f) {
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = acos(sign * prod);
const double s1 = sin(sign * t * theta);
const double d = 1.0 / sin(theta);
const double s0 = sin((1.0 - t) * theta);
ret = (a * s0 + b * s1) * d;
return ret;
}
return a;
}
}

View File

@ -1,19 +1,13 @@
#include "zeus/CVector4f.hpp"
#include "zeus/CColor.hpp"
namespace zeus
{
namespace zeus {
const CVector4f CVector4f::skZero(0.f, 0.f, 0.f, 0.f);
CVector4f::CVector4f(const zeus::CColor& other) : x(other.r), y(other.g), z(other.b), w(other.a) {}
CVector4f::CVector4f(const zeus::CColor& other) : mSimd(other.mSimd) {}
CVector4f& CVector4f::operator=(const CColor& other)
{
x = other.r;
y = other.g;
z = other.b;
w = other.a;
return *this;
CVector4f& CVector4f::operator=(const CColor& other) {
mSimd = other.mSimd;
return *this;
}
}

View File

@ -2,312 +2,292 @@
#include "zeus/CTransform.hpp"
#include "zeus/CVector3f.hpp"
#include "zeus/CVector2f.hpp"
#if _WIN32
#include <intrin.h>
#else
#include <cpuid.h>
#endif
namespace zeus
{
namespace zeus {
static bool isCPUInit = false;
static CPUInfo g_cpuFeatures = {};
static CPUInfo g_missingFeatures = {};
void getCpuInfo(int eax, int regs[4])
{
void getCpuInfo(int eax, int regs[4]) {
#if !GEKKO
#if _WIN32
__cpuid(regs, eax);
__cpuid(regs, eax);
#else
__cpuid(eax, regs[0], regs[1], regs[2], regs[3]);
__cpuid(eax, regs[0], regs[1], regs[2], regs[3]);
#endif
#endif
}
void getCpuInfoEx(int eax, int ecx, int regs[4])
{
void getCpuInfoEx(int eax, int ecx, int regs[4]) {
#if !GEKKO
#if _WIN32
__cpuidex(regs, eax, ecx);
__cpuidex(regs, eax, ecx);
#else
__cpuid_count(eax, ecx, regs[0], regs[1], regs[2], regs[3]);
__cpuid_count(eax, ecx, regs[0], regs[1], regs[2], regs[3]);
#endif
#endif
}
void detectCPU()
{
void detectCPU() {
#if !GEKKO
if (isCPUInit)
return;
if (isCPUInit)
return;
int regs[4];
getCpuInfo(0, regs);
int highestFeature = regs[0];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor) = regs[1];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 4) = regs[3];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 8) = regs[2];
getCpuInfo(0x80000000, regs);
if (regs[0] >= 0x80000004)
{
for (unsigned int i = 0x80000002; i <= 0x80000004; i++)
{
getCpuInfo(i, regs);
// Interpret CPU brand string and cache information.
if (i == 0x80000002)
memcpy((char*)g_cpuFeatures.cpuBrand, regs, sizeof(regs));
else if (i == 0x80000003)
memcpy((char*)g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs));
else if (i == 0x80000004)
memcpy((char*)g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
}
int regs[4];
getCpuInfo(0, regs);
int highestFeature = regs[0];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor) = regs[1];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 4) = regs[3];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 8) = regs[2];
getCpuInfo(0x80000000, regs);
if (regs[0] >= 0x80000004) {
for (unsigned int i = 0x80000002; i <= 0x80000004; i++) {
getCpuInfo(i, regs);
// Interpret CPU brand string and cache information.
if (i == 0x80000002)
memcpy((char*) g_cpuFeatures.cpuBrand, regs, sizeof(regs));
else if (i == 0x80000003)
memcpy((char*) g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs));
else if (i == 0x80000004)
memcpy((char*) g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
}
}
if (highestFeature >= 1)
{
getCpuInfo(1, regs);
memset((bool*)&g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1);
memset((bool*)&g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1);
memset((bool*)&g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1);
}
if (highestFeature >= 1) {
getCpuInfo(1, regs);
memset((bool*) &g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1);
memset((bool*) &g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1);
memset((bool*) &g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1);
}
if (highestFeature >= 7)
{
getCpuInfoEx(7, 0, regs);
memset((bool*)&g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1);
}
if (highestFeature >= 7) {
getCpuInfoEx(7, 0, regs);
memset((bool*) &g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1);
}
isCPUInit = true;
isCPUInit = true;
#endif
}
const CPUInfo& cpuFeatures() { detectCPU(); return g_cpuFeatures; }
const CPUInfo& cpuFeatures() {
detectCPU();
return g_cpuFeatures;
}
std::pair<bool, const CPUInfo&> validateCPU()
{
detectCPU();
bool ret = true;
std::pair<bool, const CPUInfo&> validateCPU() {
detectCPU();
bool ret = true;
#if __AVX2__
if (!g_cpuFeatures.AVX2)
{
*(bool*) &g_missingFeatures.AVX2 = true;
ret = false;
}
if (!g_cpuFeatures.AVX2) {
*(bool*) &g_missingFeatures.AVX2 = true;
ret = false;
}
#endif
#if __AVX__
if (!g_cpuFeatures.AVX)
{
*(bool*) &g_missingFeatures.AVX = true;
ret = false;
}
if (!g_cpuFeatures.AVX) {
*(bool*) &g_missingFeatures.AVX = true;
ret = false;
}
#endif
#if __SSE4A__
if (!g_cpuFeatures.SSE4a)
{
*(bool*) &g_missingFeatures.SSE4a = true;
ret = false;
}
if (!g_cpuFeatures.SSE4a)
{
*(bool*) &g_missingFeatures.SSE4a = true;
ret = false;
}
#endif
#if __SSE4_2__
if (!g_cpuFeatures.SSE42)
{
*(bool*) &g_missingFeatures.SSE42 = true;
ret = false;
}
if (!g_cpuFeatures.SSE42) {
*(bool*) &g_missingFeatures.SSE42 = true;
ret = false;
}
#endif
#if __SSE4_1__
if (!g_cpuFeatures.SSE41)
{
*(bool*) &g_missingFeatures.SSE41 = true;
ret = false;
}
if (!g_cpuFeatures.SSE41) {
*(bool*) &g_missingFeatures.SSE41 = true;
ret = false;
}
#endif
#if __SSSE3__
if (!g_cpuFeatures.SSSE3)
{
*(bool*) &g_missingFeatures.SSSE3 = true;
ret = false;
}
if (!g_cpuFeatures.SSSE3) {
*(bool*) &g_missingFeatures.SSSE3 = true;
ret = false;
}
#endif
#if __SSE3__
if (!g_cpuFeatures.SSE3)
{
*(bool*) &g_missingFeatures.SSE3 = true;
ret = false;
}
if (!g_cpuFeatures.SSE3) {
*(bool*) &g_missingFeatures.SSE3 = true;
ret = false;
}
#endif
#if __SSE2__
if (!g_cpuFeatures.SSE2)
{
*(bool*) &g_missingFeatures.SSE2 = true;
ret = false;
}
if (!g_cpuFeatures.SSE2) {
*(bool*) &g_missingFeatures.SSE2 = true;
ret = false;
}
#endif
#if __SSE__
if (!g_cpuFeatures.SSE1)
{
*(bool*) &g_missingFeatures.SSE1 = true;
ret = false;
}
if (!g_cpuFeatures.SSE1) {
*(bool*) &g_missingFeatures.SSE1 = true;
ret = false;
}
#endif
return {ret, g_missingFeatures};
return {ret, g_missingFeatures};
}
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up)
{
CVector3f vLook, vRight, vUp;
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up) {
CVector3f vLook, vRight, vUp;
vLook = lookPos - pos;
if (vLook.magnitude() <= FLT_EPSILON)
vLook = {0.f, 1.f, 0.f};
else
vLook.normalize();
vLook = lookPos - pos;
if (vLook.magnitude() <= FLT_EPSILON)
vLook = {0.f, 1.f, 0.f};
else
vLook.normalize();
vUp = up - vLook * clamp(-1.f, up.dot(vLook), 1.f);
vUp = up - vLook * clamp(-1.f, up.dot(vLook), 1.f);
if (vUp.magnitude() <= FLT_EPSILON) {
vUp = CVector3f(0.f, 0.f, 1.f) - vLook * vLook.z();
if (vUp.magnitude() <= FLT_EPSILON)
{
vUp = CVector3f(0.f, 0.f, 1.f) - vLook * vLook.z;
if (vUp.magnitude() <= FLT_EPSILON)
vUp = CVector3f(0.f, 1.f, 0.f) - vLook * vLook.y;
}
vUp.normalize();
vRight = vLook.cross(vUp);
vUp = CVector3f(0.f, 1.f, 0.f) - vLook * vLook.y();
}
vUp.normalize();
vRight = vLook.cross(vUp);
CMatrix3f rmBasis(vRight, vLook, vUp);
return CTransform(rmBasis, pos);
CMatrix3f rmBasis(vRight, vLook, vUp);
return CTransform(rmBasis, pos);
}
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b,
const CVector3f& c, const CVector3f& d, float t)
{
const float omt = 1.f - t;
return ((a * omt + b * t) * omt + (b * omt + c * t) * t) * omt +
((b * omt + c * t) * omt + (c * omt + d * t) * t) * t;
const CVector3f& c, const CVector3f& d, float t) {
const float omt = 1.f - t;
return ((a * omt + b * t) * omt + (b * omt + c * t) * t) * omt +
((b * omt + c * t) * omt + (c * omt + d * t) * t) * t;
}
int floorPowerOfTwo(int x)
{
if (x == 0)
return 0;
/*
* we want to ensure that we always get the previous power,
* but if we have values like 256, we'll always get the same value,
* x-1 ensures that we always get the previous power.
*/
x = (x - 1) | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x - (x >> 1);
int floorPowerOfTwo(int x) {
if (x == 0)
return 0;
/*
* we want to ensure that we always get the previous power,
* but if we have values like 256, we'll always get the same value,
* x-1 ensures that we always get the previous power.
*/
x = (x - 1) | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x - (x >> 1);
}
int ceilingPowerOfTwo(int x)
{
if (x == 0)
return 0;
int ceilingPowerOfTwo(int x) {
if (x == 0)
return 0;
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return x;
return x;
}
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t)
{
if (t <= 0.0f)
return b;
if (t >= 1.0f)
return c;
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t) {
if (t <= 0.0f)
return b;
if (t >= 1.0f)
return c;
const float t2 = t * t;
const float t3 = t2 * t;
const float t2 = t * t;
const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) + c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
}
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t)
{
if (t <= 0.0f)
return b;
if (t >= 1.0f)
return c;
CVector3f
getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
if (t <= 0.0f)
return b;
if (t >= 1.0f)
return c;
const float t2 = t * t;
const float t3 = t2 * t;
const float t2 = t * t;
const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) + c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
}
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t)
{
if (t >= 0.0f)
return b;
if (t <= 1.0f)
return c;
CVector3f
getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
if (t >= 0.0f)
return b;
if (t <= 1.0f)
return c;
CVector3f cb = c - b;
if (!cb.canBeNormalized())
return b;
CVector3f ab = a - b;
if (!ab.canBeNormalized())
ab = CVector3f(0, 1, 0);
CVector3f bVelocity = cb.normalized() - ab.normalized();
if (bVelocity.canBeNormalized())
bVelocity.normalize();
CVector3f dc = d - c;
if (!dc.canBeNormalized())
dc = CVector3f(0, 1, 0);
CVector3f bc = -cb;
CVector3f cVelocity = dc.normalized() - bc.normalized();
if (cVelocity.canBeNormalized())
cVelocity.normalize();
const float cbDistance = cb.magnitude();
return zeus::getCatmullRomSplinePoint(b, c, bVelocity * cbDistance, cVelocity * cbDistance, t);
CVector3f cb = c - b;
if (!cb.canBeNormalized())
return b;
CVector3f ab = a - b;
if (!ab.canBeNormalized())
ab = CVector3f(0, 1, 0);
CVector3f bVelocity = cb.normalized() - ab.normalized();
if (bVelocity.canBeNormalized())
bVelocity.normalize();
CVector3f dc = d - c;
if (!dc.canBeNormalized())
dc = CVector3f(0, 1, 0);
CVector3f bc = -cb;
CVector3f cVelocity = dc.normalized() - bc.normalized();
if (cVelocity.canBeNormalized())
cVelocity.normalize();
const float cbDistance = cb.magnitude();
return zeus::getCatmullRomSplinePoint(b, c, bVelocity * cbDistance, cVelocity * cbDistance, t);
}
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary)
{
return bary.x * p0 + bary.y * p1 + bary.z * p2;
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary) {
return bary.x() * p0 + bary.y() * p1 + bary.z() * p2;
}
bool close_enough(const CVector3f& a, const CVector3f &b, float epsilon)
{
if (std::fabs(a.x - b.x) < epsilon && std::fabs(a.y - b.y) < epsilon && std::fabs(a.z - b.z) < epsilon)
return true;
return false;
bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon) {
return std::fabs(a.x() - b.x()) < epsilon &&
std::fabs(a.y() - b.y()) < epsilon &&
std::fabs(a.z() - b.z()) < epsilon;
}
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon)
{
if (std::fabs(a.x - b.x) < epsilon && std::fabs(a.y - b.y) < epsilon)
return true;
return false;
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon) {
return std::fabs(a.x() - b.x()) < epsilon && std::fabs(a.y() - b.y()) < epsilon;
}
template <> CVector3f min(const CVector3f& a, const CVector3f& b)
{
return {min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)};
template<>
CVector3f min(const CVector3f& a, const CVector3f& b) {
return {min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z())};
}
template <> CVector3f max(const CVector3f& a, const CVector3f& b)
{
return {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)};
template<>
CVector3f max(const CVector3f& a, const CVector3f& b) {
return {max(a.x(), b.x()), max(a.y(), b.y()), max(a.z(), b.z())};
}
}

View File

@ -30,6 +30,9 @@ int main()
CAABox test2{{-100, -100, -100}, {100, 100, 100}};
CAABox test3{{-50, -50, -50}, {50, 50, 50}};
CAABox test4{{-50, -50, -105}, {50, 50, 105}};
CVector2f point2(-90, 67);
CVector2f point3(-90, 67);
CVector3f point4 = point2 + point3;
CVector3f point(-90, 67, -105);
test.closestPointAlongVector(point);
CVector3d(100, -100, -200);
@ -72,7 +75,7 @@ int main()
ctest1.fromHSV(0, 255 / 255.f, .5);
float h, s, v;
ctest1.toHSV(h, s, v);
std::cout << (int)ctest1.r << " " << (int)ctest1.g << " " << (int)ctest1.b << " " << (int)ctest1.a << std::endl;
std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a / 255.f) << std::endl;
std::cout << (int)ctest1.r() << " " << (int)ctest1.g() << " " << (int)ctest1.b() << " " << (int)ctest1.a() << std::endl;
std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a() / 255.f) << std::endl;
return 0;
}