SIMD refactor

This commit is contained in:
Jack Andersen 2018-12-07 15:16:50 -10:00
parent d881e58f62
commit e8dfecbb6e
49 changed files with 6047 additions and 4721 deletions

View File

@ -1,5 +1,5 @@
---
IndentWidth: 4
IndentWidth: 2
ColumnLimit: 128
UseTab: Never
---

View File

@ -40,7 +40,6 @@ add_library(zeus
include/zeus/CColor.hpp
include/zeus/Global.hpp
include/zeus/zeus.hpp
include/zeus/TVectorUnion.hpp
include/zeus/CVector2i.hpp
include/zeus/CVector2f.hpp
include/zeus/CVector3f.hpp
@ -56,7 +55,11 @@ add_library(zeus
include/zeus/CSphere.hpp
include/zeus/CUnitVector.hpp
include/zeus/CMRay.hpp
include/zeus/CEulerAngles.hpp)
include/zeus/CEulerAngles.hpp
include/zeus/simd/simd.hpp
include/zeus/simd/simd_sse.hpp
include/zeus/simd/simd_avx.hpp
include/zeus/simd/parallelism_v2_simd.hpp)
add_subdirectory(test)

View File

@ -6,412 +6,367 @@
#include "zeus/CLineSeg.hpp"
#include "zeus/CSphere.hpp"
#include "zeus/Math.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
namespace zeus
{
class alignas(16) CAABox
{
namespace zeus {
class CAABox {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
enum class EBoxEdgeId {
Z0,
X0,
Z1,
X1,
Z2,
X2,
Z3,
X3,
Y0,
Y1,
Y2,
Y3
};
enum class EBoxEdgeId
{
Z0,
X0,
Z1,
X1,
Z2,
X2,
Z3,
X3,
Y0,
Y1,
Y2,
Y3
};
enum class EBoxFaceID {
};
enum class EBoxFaceID
{
};
static const CAABox skInvertedBox;
static const CAABox skNullBox;
static const CAABox skInvertedBox;
static const CAABox skNullBox;
CVector3f min;
CVector3f max;
CVector3f min;
CVector3f max;
// set default AABox to insane inverse min/max to allow for accumulation
CAABox() : CAABox(1e16f, -1e16f) {}
// set default AABox to insane inverse min/max to allow for accumulation
CAABox() : CAABox(1e16f, -1e16f) {}
CAABox(const CVector3f& min, const CVector3f& max) : min(min), max(max) {}
CAABox(const CVector3f& min, const CVector3f& max) : min(min), max(max) {}
CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {}
CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {}
CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ) {
}
CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ)
{
}
#if ZE_ATHENA_TYPES
inline void readBoundingBoxBig(athena::io::IStreamReader& in)
{
min.readBig(in);
max.readBig(in);
}
static inline CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in)
{
CAABox ret;
ret.readBoundingBoxBig(in);
return ret;
}
void readBoundingBoxBig(athena::io::IStreamReader& in) {
min.readBig(in);
max.readBig(in);
}
static CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in) {
CAABox ret;
ret.readBoundingBoxBig(in);
return ret;
}
#endif
float distanceFromPointSquared(const CVector3f& other) const
{
float dist = 0;
for (int i = 0; i < 3; i++)
{
if (other[i] < min[i])
{
const float tmp = (min[i] - other[i]);
dist += tmp * tmp;
}
else if (other[i] > max[i])
{
const float tmp = (other[i] - max[i]);
dist += tmp * tmp;
}
}
return dist;
float distanceFromPointSquared(const CVector3f& other) const {
float dist = 0;
for (int i = 0; i < 3; i++) {
if (other[i] < min[i]) {
const float tmp = (min[i] - other[i]);
dist += tmp * tmp;
} else if (other[i] > max[i]) {
const float tmp = (other[i] - max[i]);
dist += tmp * tmp;
}
}
float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); }
return dist;
}
inline bool intersects(const CAABox& other) const
{
bool x1 = (max[0] >= other.min[0]);
bool x2 = (min[0] <= other.max[0]);
bool y1 = (max[1] >= other.min[1]);
bool y2 = (min[1] <= other.max[1]);
bool z1 = (max[2] >= other.min[2]);
bool z2 = (min[2] <= other.max[2]);
return x1 && x2 && y1 && y2 && z1 && z2;
float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); }
bool intersects(const CAABox& other) const {
bool x1 = (max[0] >= other.min[0]);
bool x2 = (min[0] <= other.max[0]);
bool y1 = (max[1] >= other.min[1]);
bool y2 = (min[1] <= other.max[1]);
bool z1 = (max[2] >= other.min[2]);
bool z2 = (min[2] <= other.max[2]);
return x1 && x2 && y1 && y2 && z1 && z2;
}
bool intersects(const CSphere& other) const {
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
}
float intersectionRadius(const CSphere& other) const {
float dist = distanceFromPoint(other.position);
return (dist < other.radius) ? dist : -1.f;
}
CAABox booleanIntersection(const CAABox& other) const {
CVector3f minVec = CVector3f::skZero;
CVector3f maxVec = CVector3f::skZero;
for (int i = 0; i < 3; ++i) {
if (min[i] <= other.min[i] && max[i] >= other.max[i]) {
minVec[i] = other.min[i];
maxVec[i] = other.max[i];
} else if (other.min[i] <= min[i] && other.max[i] >= max[i]) {
minVec[i] = min[i];
maxVec[i] = max[i];
} else if (other.min[i] <= min[i] && other.max[i] >= min[i]) {
minVec[i] = min[i];
maxVec[i] = other.max[i];
} else if (other.min[i] <= max[i] && other.max[i] >= max[i]) {
minVec[i] = other.min[i];
maxVec[i] = max[i];
}
}
bool intersects(const CSphere& other) const
{
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
return {minVec, maxVec};
}
bool inside(const CAABox& other) const {
bool x = min[0] >= other.min[0] && max[0] <= other.max[0];
bool y = min[1] >= other.min[1] && max[1] <= other.max[1];
bool z = min[2] >= other.min[2] && max[2] <= other.max[2];
return x && y && z;
}
bool insidePlane(const CPlane& plane) const {
CVector3f vmax;
/* X axis */
if (plane.x() >= 0.f)
vmax[0] = max[0];
else
vmax[0] = min[0];
/* Y axis */
if (plane.y() >= 0.f)
vmax[1] = max[1];
else
vmax[1] = min[1];
/* Z axis */
if (plane.z() >= 0.f)
vmax[2] = max[2];
else
vmax[2] = min[2];
return plane.normal().dot(vmax) + plane.d() >= 0.f;
}
CVector3f center() const { return (min + max) * 0.5f; }
CVector3f extents() const { return (max - min) * 0.5f; }
float volume() const {
auto delta = max - min;
return delta.x() * delta.y() * delta.z();
}
CLineSeg getEdge(EBoxEdgeId id) const {
switch (id) {
case EBoxEdgeId::Z0:
default:
return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), min.y(), min.z()});
case EBoxEdgeId::X0:
return CLineSeg({min.x(), min.y(), min.z()}, {max.x(), min.y(), min.z()});
case EBoxEdgeId::Z1:
return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), min.y(), max.z()});
case EBoxEdgeId::X1:
return CLineSeg({max.x(), min.y(), max.z()}, {min.x(), min.y(), max.z()});
case EBoxEdgeId::Z2:
return CLineSeg({max.x(), max.y(), max.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::X2:
return CLineSeg({max.x(), max.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Z3:
return CLineSeg({min.x(), max.y(), min.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::X3:
return CLineSeg({min.x(), max.y(), max.z()}, {max.x(), max.y(), max.z()});
case EBoxEdgeId::Y0:
return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::Y1:
return CLineSeg({min.x(), min.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Y2:
return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::Y3:
return CLineSeg({max.x(), min.y(), max.z()}, {max.x(), max.y(), max.z()});
}
}
CAABox getTransformedAABox(const CTransform& xfrm) const {
CAABox box;
CVector3f point = xfrm * getPoint(0);
box.accumulateBounds(point);
point = xfrm * getPoint(1);
box.accumulateBounds(point);
point = xfrm * getPoint(2);
box.accumulateBounds(point);
point = xfrm * getPoint(3);
box.accumulateBounds(point);
point = xfrm * getPoint(4);
box.accumulateBounds(point);
point = xfrm * getPoint(5);
box.accumulateBounds(point);
point = xfrm * getPoint(6);
box.accumulateBounds(point);
point = xfrm * getPoint(7);
box.accumulateBounds(point);
return box;
}
void accumulateBounds(const CVector3f& point) {
if (min.x() > point.x())
min.x() = point.x();
if (min.y() > point.y())
min.y() = point.y();
if (min.z() > point.z())
min.z() = point.z();
if (max.x() < point.x())
max.x() = point.x();
if (max.y() < point.y())
max.y() = point.y();
if (max.z() < point.z())
max.z() = point.z();
}
void accumulateBounds(const CAABox& other) {
accumulateBounds(other.min);
accumulateBounds(other.max);
}
bool pointInside(const CVector3f& other) const {
return (min.x() <= other.x() && other.x() <= max.x() &&
min.y() <= other.y() && other.y() <= max.y() &&
min.z() <= other.z() && other.z() <= max.z());
}
CVector3f closestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? min.x() : max.x()),
(other.y() >= 0.f ? min.y() : max.y()),
(other.z() >= 0.f ? min.z() : max.z())};
}
CVector3f furthestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? max.x() : min.x()),
(other.y() >= 0.f ? max.y() : min.y()),
(other.z() >= 0.f ? max.z() : min.z())};
}
float distanceBetween(const CAABox& other) {
int intersects = 0;
if (max.x() >= other.min.x() && min.x() <= other.max.x())
intersects |= 0x1;
if (max.y() >= other.min.y() && min.y() <= other.max.y())
intersects |= 0x2;
if (max.z() >= other.min.z() && min.z() <= other.max.z())
intersects |= 0x4;
float minX, maxX;
if (max.x() < other.min.x()) {
minX = max.x();
maxX = other.min.x();
} else {
minX = min.x();
maxX = other.max.x();
}
float intersectionRadius(const CSphere& other) const
{
float dist = distanceFromPoint(other.position);
return (dist < other.radius) ? dist : -1.f;
float minY, maxY;
if (max.y() < other.min.y()) {
minY = max.y();
maxY = other.min.y();
} else {
minY = min.y();
maxY = other.max.y();
}
inline CAABox booleanIntersection(const CAABox& other) const
{
CVector3f minVec = CVector3f::skZero;
CVector3f maxVec = CVector3f::skZero;
for (int i = 0; i < 3; ++i)
{
if (min[i] <= other.min[i] && max[i] >= other.max[i])
{
minVec[i] = other.min[i];
maxVec[i] = other.max[i];
}
else if (other.min[i] <= min[i] && other.max[i] >= max[i])
{
minVec[i] = min[i];
maxVec[i] = max[i];
}
else if (other.min[i] <= min[i] && other.max[i] >= min[i])
{
minVec[i] = min[i];
maxVec[i] = other.max[i];
}
else if (other.min[i] <= max[i] && other.max[i] >= max[i])
{
minVec[i] = other.min[i];
maxVec[i] = max[i];
}
}
return {minVec, maxVec};
float minZ, maxZ;
if (max.z() < other.min.z()) {
minZ = max.z();
maxZ = other.min.z();
} else {
minZ = min.z();
maxZ = other.max.z();
}
inline bool inside(const CAABox& other) const
{
bool x = min[0] >= other.min[0] && max[0] <= other.max[0];
bool y = min[1] >= other.min[1] && max[1] <= other.max[1];
bool z = min[2] >= other.min[2] && max[2] <= other.max[2];
return x && y && z;
switch (intersects) {
case 0:
return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude();
case 1:
return zeus::CVector2f(maxY - minY, maxZ - minZ).magnitude();
case 2:
return zeus::CVector2f(maxX - minX, maxZ - minZ).magnitude();
case 3:
return std::fabs(maxZ - minZ);
case 4:
return zeus::CVector2f(maxX - minX, maxY - minY).magnitude();
case 5:
return std::fabs(maxY - minY);
case 6:
return std::fabs(maxX - minX);
case 7:
default:
return 0.f;
}
}
inline bool insidePlane(const CPlane& plane) const
{
CVector3f vmax;
/* X axis */
if (plane.a >= 0)
vmax[0] = max[0];
else
vmax[0] = min[0];
/* Y axis */
if (plane.b >= 0)
vmax[1] = max[1];
else
vmax[1] = min[1];
/* Z axis */
if (plane.c >= 0)
vmax[2] = max[2];
else
vmax[2] = min[2];
return plane.vec.dot(vmax) + plane.d >= 0.f;
}
CVector3f getPoint(const int point) const {
const CVector3f* vecs = &min;
return CVector3f(vecs[(point & 1) != 0].x(), vecs[(point & 2) != 0].y(), vecs[(point & 4) != 0].z());
}
CVector3f center() const { return (min + max) * 0.5f; }
CVector3f clampToBox(const CVector3f& vec) const {
CVector3f ret = vec;
ret.x() = clamp(min.x(), float(ret.x()), max.x());
ret.y() = clamp(min.y(), float(ret.y()), max.y());
ret.z() = clamp(min.z(), float(ret.z()), max.z());
return ret;
}
CVector3f extents() const { return (max - min) * 0.5f; }
void splitX(CAABox& negX, CAABox& posX) const {
float midX = (max.x() - min.x()) * .5f + min.x();
posX.max = max;
posX.min = min;
posX.min.x() = midX;
negX.max = max;
negX.max.x() = midX;
negX.min = min;
}
float volume() const { return (max.x - min.x) * (max.y - min.y) * (max.z - min.z); }
void splitY(CAABox& negY, CAABox& posY) const {
float midY = (max.y() - min.y()) * .5f + min.y();
posY.max = max;
posY.min = min;
posY.min.y() = midY;
negY.max = max;
negY.max.y() = midY;
negY.min = min;
}
inline CLineSeg getEdge(EBoxEdgeId id) const
{
switch (id)
{
case EBoxEdgeId::Z0:
default:
return CLineSeg({min.x, min.y, max.z}, {min.x, min.y, min.z});
case EBoxEdgeId::X0:
return CLineSeg({min.x, min.y, min.z}, {max.x, min.y, min.z});
case EBoxEdgeId::Z1:
return CLineSeg({max.x, min.y, min.z}, {max.x, min.y, max.z});
case EBoxEdgeId::X1:
return CLineSeg({max.x, min.y, max.z}, {min.x, min.y, max.z});
case EBoxEdgeId::Z2:
return CLineSeg({max.x, max.y, max.z}, {max.x, max.y, min.z});
case EBoxEdgeId::X2:
return CLineSeg({max.x, max.y, min.z}, {min.x, max.y, min.z});
case EBoxEdgeId::Z3:
return CLineSeg({min.x, max.y, min.z}, {min.x, max.y, max.z});
case EBoxEdgeId::X3:
return CLineSeg({min.x, max.y, max.z}, {max.x, max.y, max.z});
case EBoxEdgeId::Y0:
return CLineSeg({min.x, min.y, max.z}, {min.x, max.y, max.z});
case EBoxEdgeId::Y1:
return CLineSeg({min.x, min.y, min.z}, {min.x, max.y, min.z});
case EBoxEdgeId::Y2:
return CLineSeg({max.x, min.y, min.z}, {max.x, max.y, min.z});
case EBoxEdgeId::Y3:
return CLineSeg({max.x, min.y, max.z}, {max.x, max.y, max.z});
}
}
void splitZ(CAABox& negZ, CAABox& posZ) const {
float midZ = (max.z() - min.z()) * .5f + min.z();
posZ.max = max;
posZ.min = min;
posZ.min.z() = midZ;
negZ.max = max;
negZ.max.z() = midZ;
negZ.min = min;
}
inline CAABox getTransformedAABox(const CTransform& xfrm) const
{
CAABox box;
CVector3f point = xfrm * getPoint(0);
box.accumulateBounds(point);
point = xfrm * getPoint(1);
box.accumulateBounds(point);
point = xfrm * getPoint(2);
box.accumulateBounds(point);
point = xfrm * getPoint(3);
box.accumulateBounds(point);
point = xfrm * getPoint(4);
box.accumulateBounds(point);
point = xfrm * getPoint(5);
box.accumulateBounds(point);
point = xfrm * getPoint(6);
box.accumulateBounds(point);
point = xfrm * getPoint(7);
box.accumulateBounds(point);
return box;
}
bool invalid() { return (max.x() < min.x() || max.y() < min.y() || max.z() < min.z()); }
inline void accumulateBounds(const CVector3f& point)
{
if (min.x > point.x)
min.x = point.x;
if (min.y > point.y)
min.y = point.y;
if (min.z > point.z)
min.z = point.z;
if (max.x < point.x)
max.x = point.x;
if (max.y < point.y)
max.y = point.y;
if (max.z < point.z)
max.z = point.z;
}
inline void accumulateBounds(const CAABox& other)
{
accumulateBounds(other.min);
accumulateBounds(other.max);
}
inline bool pointInside(const CVector3f& other) const
{
return (min.x <= other.x && other.x <= max.x &&
min.y <= other.y && other.y <= max.y &&
min.z <= other.z && other.z <= max.z);
}
inline CVector3f closestPointAlongVector(const CVector3f& other) const
{
return {(other.x >= 0.f ? min.x : max.x),
(other.y >= 0.f ? min.y : max.y),
(other.z >= 0.f ? min.z : max.z)};
}
inline CVector3f furthestPointAlongVector(const CVector3f& other) const
{
return {(other.x >= 0.f ? max.x : min.x),
(other.y >= 0.f ? max.y : min.y),
(other.z >= 0.f ? max.z : min.z)};
}
inline float distanceBetween(const CAABox& other)
{
int intersects = 0;
if (max.x >= other.min.x && min.x <= other.max.x)
intersects |= 0x1;
if (max.y >= other.min.y && min.y <= other.max.y)
intersects |= 0x2;
if (max.z >= other.min.z && min.z <= other.max.z)
intersects |= 0x4;
float minX, maxX;
if (max.x < other.min.x)
{
minX = max.x;
maxX = other.min.x;
}
else
{
minX = min.x;
maxX = other.max.x;
}
float minY, maxY;
if (max.y < other.min.y)
{
minY = max.y;
maxY = other.min.y;
}
else
{
minY = min.y;
maxY = other.max.y;
}
float minZ, maxZ;
if (max.z < other.min.z)
{
minZ = max.z;
maxZ = other.min.z;
}
else
{
minZ = min.z;
maxZ = other.max.z;
}
switch (intersects)
{
case 0:
return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude();
case 1:
return zeus::CVector2f(maxY - minY, maxZ - minZ).magnitude();
case 2:
return zeus::CVector2f(maxX - minX, maxZ - minZ).magnitude();
case 3:
return std::fabs(maxZ - minZ);
case 4:
return zeus::CVector2f(maxX - minX, maxY - minY).magnitude();
case 5:
return std::fabs(maxY - minY);
case 6:
return std::fabs(maxX - minX);
case 7:
default:
return 0.f;
}
}
inline CVector3f getPoint(const int point) const
{
const CVector3f* vecs = &min;
return CVector3f(vecs[(point & 1) != 0].x, vecs[(point & 2) != 0].y, vecs[(point & 4) != 0].z);
}
inline CVector3f clampToBox(const CVector3f& vec)
{
CVector3f ret = vec;
clamp(min.x, ret.x, max.x);
clamp(min.y, ret.y, max.y);
clamp(min.z, ret.z, max.z);
return ret;
}
inline void splitX(CAABox& negX, CAABox& posX) const
{
float midX = (max.x - min.x) * .5f + min.x;
posX.max = max;
posX.min = min;
posX.min.x = midX;
negX.max = max;
negX.max.x = midX;
negX.min = min;
}
inline void splitY(CAABox& negY, CAABox& posY) const
{
float midY = (max.y - min.y) * .5f + min.y;
posY.max = max;
posY.min = min;
posY.min.y = midY;
negY.max = max;
negY.max.y = midY;
negY.min = min;
}
inline void splitZ(CAABox& negZ, CAABox& posZ) const
{
float midZ = (max.z - min.z) * .5f + min.z;
posZ.max = max;
posZ.min = min;
posZ.min.z = midZ;
negZ.max = max;
negZ.max.z = midZ;
negZ.min = min;
}
inline bool invalid() { return (max.x < min.x || max.y < min.y || max.z < min.z); }
inline float operator[](size_t idx) const
{
assert(idx < 6);
if (idx < 3)
return min[idx];
else
return max[idx-3];
}
float operator[](size_t idx) const {
assert(idx < 6);
if (idx < 3)
return min[idx];
else
return max[idx - 3];
}
};
inline bool operator==(const CAABox& left, const CAABox& right)
{
return (left.min == right.min && left.max == right.max);
inline bool operator==(const CAABox& left, const CAABox& right) {
return (left.min == right.min && left.max == right.max);
}
inline bool operator!=(const CAABox& left, const CAABox& right)
{
return (left.min != right.min || left.max != right.max);
inline bool operator!=(const CAABox& left, const CAABox& right) {
return (left.min != right.min || left.max != right.max);
}
}

View File

@ -4,22 +4,15 @@
#include "zeus/CVector3f.hpp"
#include "CUnitVector.hpp"
namespace zeus
{
struct alignas(16) CAxisAngle : CVector3f
{
ZE_DECLARE_ALIGNED_ALLOCATOR();
CAxisAngle() = default;
CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {}
CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {}
CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {}
float angle() const { return magnitude(); }
const CVector3f& getVector() const { return *this; }
static const CAxisAngle sIdentity;
namespace zeus {
struct CAxisAngle : CVector3f {
CAxisAngle() = default;
CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {}
CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {}
CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {}
float angle() const { return magnitude(); }
const CVector3f& getVector() const { return *this; }
static const CAxisAngle sIdentity;
};
}

View File

@ -2,11 +2,15 @@
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#include "CVector4f.hpp"
#if ZE_ATHENA_TYPES
#include <athena/FileReader.hpp>
#include <athena/FileWriter.hpp>
#include "athena/FileReader.hpp"
#include "athena/FileWriter.hpp"
#endif
#include <iostream>
#include <cassert>
@ -20,410 +24,315 @@
#define COLOR(rgba) rgba
#endif
namespace zeus
{
namespace zeus {
typedef uint8_t Comp8;
typedef uint32_t Comp32;
constexpr float OneOver255 = 1.f / 255.f;
typedef union {
struct
{
Comp8 r, g, b, a;
};
Comp32 rgba;
struct {
Comp8 r, g, b, a;
};
Comp32 rgba;
} RGBA32;
class CVector4f;
class alignas(16) CColor
{
class CColor {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
simd<float> mSimd;
static const CColor skRed;
static const CColor skBlack;
static const CColor skBlue;
static const CColor skGreen;
static const CColor skGrey;
static const CColor skOrange;
static const CColor skPurple;
static const CColor skYellow;
static const CColor skWhite;
static const CColor skClear;
static const CColor skRed;
static const CColor skBlack;
static const CColor skBlue;
static const CColor skGreen;
static const CColor skGrey;
static const CColor skOrange;
static const CColor skPurple;
static const CColor skYellow;
static const CColor skWhite;
static const CColor skClear;
CColor() : mSimd(1.f) {}
#if __SSE__
CColor(const __m128& mVec128) : mVec128(mVec128) {}
#endif
CColor(float rgb, float a = 1.0) { splat(rgb, a); }
CColor() : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
CColor(float rgb, float a = 1.0) { splat(rgb, a); }
CColor(float r, float g, float b, float a = 1.0f)
{
v[0] = r;
v[1] = g;
v[2] = b;
v[3] = a;
}
#if ZE_ATHENA_TYPES
CColor(const atVec4f& vec)
#if __SSE__ || __GEKKO_PS__
: mVec128(vec.mVec128)
{
}
#else
{
r = vec.vec[0], g = vec.vec[1], b = vec.vec[2], a = vec.vec[3];
}
#endif
#endif
CColor(Comp32 rgba) { fromRGBA32(rgba); }
CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
CColor(const CVector4f& other);
CColor& operator=(const CVector4f& other);
CColor(float r, float g, float b, float a = 1.0f) : mSimd(r, g, b, a) {}
#if ZE_ATHENA_TYPES
static inline CColor ReadRGBABig(athena::io::IStreamReader& reader)
{
CColor ret;
ret.readRGBABig(reader);
return ret;
}
CColor(const atVec4f& vec) : mSimd(vec.simd) {}
inline void readRGBABig(athena::io::IStreamReader& reader)
{
r = reader.readFloatBig();
g = reader.readFloatBig();
b = reader.readFloatBig();
a = reader.readFloatBig();
}
inline void readBGRABig(athena::io::IStreamReader& reader)
{
b = reader.readFloatBig();
g = reader.readFloatBig();
r = reader.readFloatBig();
a = reader.readFloatBig();
}
inline void writeRGBABig(athena::io::IStreamWriter& writer) const
{
writer.writeFloatBig(r);
writer.writeFloatBig(g);
writer.writeFloatBig(b);
writer.writeFloatBig(a);
}
inline void writeBGRABig(athena::io::IStreamWriter& writer) const
{
writer.writeFloatBig(b);
writer.writeFloatBig(g);
writer.writeFloatBig(r);
writer.writeFloatBig(a);
}
inline void writeRGBA8(athena::io::IStreamWriter& writer) const
{
writer.writeUByte(this->r * 255);
writer.writeUByte(this->g * 255);
writer.writeUByte(this->b * 255);
writer.writeUByte(this->a * 255);
}
#endif
inline bool operator==(const CColor& rhs) const { return (r == rhs.r && g == rhs.g && b == rhs.b && a == rhs.a); }
inline bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
inline CColor operator+(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CColor(r + rhs.r, g + rhs.g, b + rhs.b, a + rhs.a);
#endif
}
inline CColor operator-(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CColor(r - rhs.r, g - rhs.g, b - rhs.b, a - rhs.a);
#endif
}
inline CColor operator*(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CColor(r * rhs.r, g * rhs.g, b * rhs.b, a * rhs.a);
#endif
}
inline CColor operator/(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CColor(r / rhs.r, g / rhs.g, b / rhs.b, a / rhs.a);
#endif
}
inline CColor operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_add_ps(mVec128, splat.mVec128));
#else
return CColor(r + val, g + val, b + val, a + val);
#endif
}
inline CColor operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CColor(r - val, g - val, b - val, a - val);
#endif
}
inline CColor operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CColor(r * val, g * val, b * val, a * val);
#endif
}
inline CColor operator/(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_div_ps(mVec128, splat.mVec128));
#else
return CColor(r / val, g / val, b / val, a / val);
#endif
}
inline const CColor& operator+=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
r += rhs.r;
g += rhs.g;
b += rhs.b;
a += rhs.a;
#endif
return *this;
}
inline const CColor& operator-=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
r -= rhs.r;
g -= rhs.g;
b -= rhs.b;
a -= rhs.a;
#endif
return *this;
}
inline const CColor& operator*=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
r *= rhs.r;
g *= rhs.g;
b *= rhs.b;
a *= rhs.a;
#endif
return *this;
}
inline const CColor& operator/=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
r /= rhs.r;
g /= rhs.g;
b /= rhs.b;
a /= rhs.a;
#endif
return *this;
}
inline void normalize()
{
float mag = magnitude();
mag = 1.f / mag;
*this *= mag;
}
inline CColor normalized() const
{
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
CColor(Comp32 rgba) { fromRGBA32(rgba); }
CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
CColor(const CVector4f& other) : mSimd(other.mSimd) {}
template <typename T>
CColor(const simd<T>& s) : mSimd(s) {}
CColor& operator=(const CVector4f& other) {
mSimd = other.mSimd;
return *this;
}
#if ZE_ATHENA_TYPES
static CColor ReadRGBABig(athena::io::IStreamReader& reader) {
CColor ret;
ret.readRGBABig(reader);
return ret;
}
void readRGBABig(athena::io::IStreamReader& reader) {
simd_floats f;
f[0] = reader.readFloatBig();
f[1] = reader.readFloatBig();
f[2] = reader.readFloatBig();
f[3] = reader.readFloatBig();
mSimd.copy_from(f);
}
void readBGRABig(athena::io::IStreamReader& reader) {
simd_floats f;
f[2] = reader.readFloatBig();
f[1] = reader.readFloatBig();
f[0] = reader.readFloatBig();
f[3] = reader.readFloatBig();
mSimd.copy_from(f);
}
void writeRGBABig(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeFloatBig(f[0]);
writer.writeFloatBig(f[1]);
writer.writeFloatBig(f[2]);
writer.writeFloatBig(f[3]);
}
void writeBGRABig(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeFloatBig(f[2]);
writer.writeFloatBig(f[1]);
writer.writeFloatBig(f[0]);
writer.writeFloatBig(f[3]);
}
void writeRGBA8(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeUByte(atUint8(f[0] * 255));
writer.writeUByte(atUint8(f[1] * 255));
writer.writeUByte(atUint8(f[2] * 255));
writer.writeUByte(atUint8(f[3] * 255));
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return r * r + g * g + b * b + a * a;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
static inline CColor lerp(const CColor& a, const CColor& b, float t) { return (a + (b - a) * t); }
static inline CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
inline float& operator[](const size_t& idx) { assert(idx < 4); return (&r)[idx]; }
inline const float& operator[](const size_t& idx) const { assert(idx < 4); return (&r)[idx]; }
inline void splat(float rgb, float a)
{
#if __SSE__
TVectorUnion splat = {{rgb, rgb, rgb, a}};
mVec128 = splat.mVec128;
#else
v[0] = rgb;
v[1] = rgb;
v[2] = rgb;
v[3] = a;
#endif
}
inline float rgbDot(const CColor& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (r * rhs.r) + (g * rhs.g) + (b * rhs.b);
#endif
}
bool operator==(const CColor& rhs) const {
return (r() == rhs.r() && g() == rhs.g() && b() == rhs.b() && a() == rhs.a());
}
union {
struct
{
float r, g, b, a;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
void fromRGBA8(Comp8 r, Comp8 g, Comp8 b, Comp8 a)
{
this->r = r * OneOver255;
this->g = g * OneOver255;
this->b = b * OneOver255;
this->a = a * OneOver255;
}
CColor operator+(const CColor& rhs) const {
return mSimd + rhs.mSimd;
}
void fromRGBA32(Comp32 rgba)
{
static RGBA32 tmp;
tmp.rgba = COLOR(rgba);
fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a);
}
CColor operator-(const CColor& rhs) const {
return mSimd - rhs.mSimd;
}
/*!
* \brief Converts a CColor to RGBA8
* \param r
* \param g
* \param b
* \param a
*/
void toRGBA8(Comp8& r, Comp8& g, Comp8& b, Comp8& a)
{
r = this->r * 255;
g = this->g * 255;
b = this->b * 255;
a = this->a * 255;
}
CColor operator*(const CColor& rhs) const {
return mSimd * rhs.mSimd;
}
/**
* @brief Assigns rgba from hsv
* @param h[0-1] The hue percentagee of the color.
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void fromHSV(float h, float s, float v, float _a = 1.0);
CColor operator/(const CColor& rhs) const {
return mSimd / rhs.mSimd;
}
/**
* @brief Converts rgba to hsv
* @param h[0-1] The hue percentagee of the color.
* @param s[0-1] The saturation percentage of the color.
* @param v[0-1] The value percentage of the color.
* @param a[0-1] The alpha percentage of the color.
*/
void toHSV(float& h, float& s, float& v) const;
CColor operator+(float val) const {
return mSimd + simd<float>(val);
}
void fromHSL(float h, float s, float l, float _a = 1.0);
CColor operator-(float val) const {
return mSimd - simd<float>(val);
}
void toHSL(float& h, float& s, float& l);
CColor operator*(float val) const {
return mSimd * simd<float>(val);
}
CColor toGrayscale() { return {std::sqrt((r * r + g * g + b * b) / 3), a}; }
CColor operator/(float val) const {
return mSimd / simd<float>(val);
}
/**
* @brief Clamps to GPU-safe RGBA values [0,1]
*/
void Clamp()
{
this->r = std::min(1.f, std::max(0.f, this->r));
this->g = std::min(1.f, std::max(0.f, this->g));
this->b = std::min(1.f, std::max(0.f, this->b));
this->a = std::min(1.f, std::max(0.f, this->a));
}
const CColor& operator+=(const CColor& rhs) {
mSimd += rhs.mSimd;
return *this;
}
const CColor& operator-=(const CColor& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CColor& operator*=(const CColor& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CColor& operator/=(const CColor& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
const CColor& operator+=(float rhs) {
mSimd += simd<float>(rhs);
return *this;
}
const CColor& operator-=(float rhs) {
mSimd -= simd<float>(rhs);
return *this;
}
const CColor& operator*=(float rhs) {
mSimd *= simd<float>(rhs);
return *this;
}
const CColor& operator/=(float rhs) {
mSimd /= simd<float>(rhs);
return *this;
}
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= mag;
}
CColor normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
float magSquared() const {
return mSimd.dot4(mSimd);
}
float magnitude() const { return std::sqrt(magSquared()); }
static CColor lerp(const CColor& a, const CColor& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
simd<float>::reference operator[](const size_t& idx) {
assert(idx < 4);
return mSimd[idx];
}