mirror of https://github.com/AxioDL/zeus.git
New code style refactor
This commit is contained in:
@ -1,6 +1,6 @@
IndentWidth: 2
ColumnLimit: 128
BasedOnStyle: LLVM
ColumnLimit: 120
UseTab: Never
Language: Cpp
@ -8,7 +8,6 @@ DerivePointerAlignment: false
PointerAlignment: Left
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
BreakBeforeBraces: Allman
IndentCaseLabels: false
AllowShortBlocksOnASingleLine: true
AlignOperands: true
@ -16,7 +15,6 @@ AlignTrailingComments: true
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BreakConstructorInitializersBeforeComma: true
BreakStringLiterals: true
AlwaysBreakAfterReturnType: None
AlwaysBreakAfterDefinitionReturnType: None
AllowShortFunctionsOnASingleLine: All
@ -25,6 +23,6 @@ NamespaceIndentation: None
BinPackArguments: true
BinPackParameters: true
SortIncludes: false
AccessModifierOffset: -4
AccessModifierOffset: -2
ConstructorInitializerIndentWidth: 0
ConstructorInitializerAllOnOneLineOrOnePerLine: true
@ -16,23 +16,9 @@
namespace zeus {
class CAABox {
enum class EBoxEdgeId {
enum class EBoxEdgeId { Z0, X0, Z1, X1, Z2, X2, Z3, X3, Y0, Y1, Y2, Y3 };
enum class EBoxFaceID {
enum class EBoxFaceID {};
static const CAABox skInvertedBox;
static const CAABox skNullBox;
@ -48,8 +34,7 @@ public:
CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {}
CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ) {
: min(minX, minY, minZ), max(maxX, maxY, maxZ) {}
@ -233,20 +218,17 @@ public:
bool pointInside(const CVector3f& other) const {
return (min.x() <= other.x() && other.x() <= max.x() &&
min.y() <= other.y() && other.y() <= max.y() &&
return (min.x() <= other.x() && other.x() <= max.x() && min.y() <= other.y() && other.y() <= max.y() &&
min.z() <= other.z() && other.z() <= max.z());
CVector3f closestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? min.x() : max.x()),
(other.y() >= 0.f ? min.y() : max.y()),
return {(other.x() >= 0.f ? min.x() : max.x()), (other.y() >= 0.f ? min.y() : max.y()),
(other.z() >= 0.f ? min.z() : max.z())};
CVector3f furthestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? max.x() : min.x()),
(other.y() >= 0.f ? max.y() : min.y()),
return {(other.x() >= 0.f ? max.x() : min.x()), (other.y() >= 0.f ? max.y() : min.y()),
(other.z() >= 0.f ? max.z() : min.z())};
@ -368,5 +350,4 @@ inline bool operator==(const CAABox& left, const CAABox& right) {
inline bool operator!=(const CAABox& left, const CAABox& right) {
return (left.min != right.min || left.max != right.max);
} // namespace zeus
@ -14,5 +14,4 @@ struct CAxisAngle : CVector3f {
const CVector3f& getVector() const { return *this; }
static const CAxisAngle sIdentity;
} // namespace zeus
@ -19,7 +19,8 @@
#define COLOR(rgba) \
(unsigned)(((rgba)&0x000000FF) << 24 | ((rgba)&0x0000FF00) << 8 | ((rgba)&0x00FF0000) >> 8 | ((rgba)&0xFF000000) >> 24)
(unsigned)(((rgba)&0x000000FF) << 24 | ((rgba)&0x0000FF00) << 8 | ((rgba)&0x00FF0000) >> 8 | \
((rgba)&0xFF000000) >> 24)
#define COLOR(rgba) rgba
@ -136,37 +137,21 @@ public:
bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
CColor operator+(const CColor& rhs) const {
return mSimd + rhs.mSimd;
CColor operator+(const CColor& rhs) const { return mSimd + rhs.mSimd; }
CColor operator-(const CColor& rhs) const {
return mSimd - rhs.mSimd;
CColor operator-(const CColor& rhs) const { return mSimd - rhs.mSimd; }
CColor operator*(const CColor& rhs) const {
return mSimd * rhs.mSimd;
CColor operator*(const CColor& rhs) const { return mSimd * rhs.mSimd; }
CColor operator/(const CColor& rhs) const {
return mSimd / rhs.mSimd;
CColor operator/(const CColor& rhs) const { return mSimd / rhs.mSimd; }
CColor operator+(float val) const {
return mSimd + simd<float>(val);
CColor operator+(float val) const { return mSimd + simd<float>(val); }
CColor operator-(float val) const {
return mSimd - simd<float>(val);
CColor operator-(float val) const { return mSimd - simd<float>(val); }
CColor operator*(float val) const {
return mSimd * simd<float>(val);
CColor operator*(float val) const { return mSimd * simd<float>(val); }
CColor operator/(float val) const {
return mSimd / simd<float>(val);
CColor operator/(float val) const { return mSimd / simd<float>(val); }
const CColor& operator+=(const CColor& rhs) {
mSimd += rhs.mSimd;
@ -220,9 +205,7 @@ public:
return *this * mag;
float magSquared() const {
return mSimd.dot4(mSimd);
float magSquared() const { return mSimd.dot4(mSimd); }
float magnitude() const { return std::sqrt(magSquared()); }
@ -247,9 +230,7 @@ public:
mSimd[3] = a;
float rgbDot(const CColor& rhs) const {
return mSimd.dot3(rhs.mSimd);
float rgbDot(const CColor& rhs) const { return mSimd.dot3(rhs.mSimd); }
void fromRGBA8(const Comp8 ri, const Comp8 gi, const Comp8 bi, const Comp8 ai) {
mSimd = simd<float>(ri * OneOver255, gi * OneOver255, bi * OneOver255, ai * OneOver255);
@ -320,19 +301,11 @@ public:
simd<float>::reference a() { return mSimd[3]; }
static inline CColor operator+(float lhs, const CColor& rhs) {
return simd<float>(lhs) + rhs.mSimd;
static inline CColor operator+(float lhs, const CColor& rhs) { return simd<float>(lhs) + rhs.mSimd; }
static inline CColor operator-(float lhs, const CColor& rhs) {
return simd<float>(lhs) - rhs.mSimd;
static inline CColor operator-(float lhs, const CColor& rhs) { return simd<float>(lhs) - rhs.mSimd; }
static inline CColor operator*(float lhs, const CColor& rhs) {
return simd<float>(lhs) * rhs.mSimd;
static inline CColor operator*(float lhs, const CColor& rhs) { return simd<float>(lhs) * rhs.mSimd; }
static inline CColor operator/(float lhs, const CColor& rhs) {
return simd<float>(lhs) / rhs.mSimd;
static inline CColor operator/(float lhs, const CColor& rhs) { return simd<float>(lhs) / rhs.mSimd; }
} // namespace zeus
@ -12,5 +12,4 @@ public:
CEulerAngles(const CTransform& xf);
} // namespace zeus
@ -16,4 +16,4 @@ public:
bool sphereFrustumTest(const CSphere& sphere) const;
bool pointFrustumTest(const CVector3f& point) const;
} // namespace zeus
@ -11,5 +11,4 @@ public:
CVector3f origin;
CVector3f dir;
} // namespace zeus
@ -18,5 +18,4 @@ public:
CVector3f xc_dir;
CVector3f x18_end;
} // namespace zeus
@ -30,5 +30,4 @@ struct CMRay {
float invLength; // x28
CVector3f dir; // x2c
} // namespace zeus
@ -11,7 +11,6 @@ class CQuaternion;
class CMatrix3f {
explicit CMatrix3f(bool zero = false) {
m[0] = simd<float>(0.f);
m[1] = simd<float>(0.f);
@ -23,12 +22,8 @@ public:
CMatrix3f(float m00, float m01, float m02,
float m10, float m11, float m12,
float m20, float m21, float m22)
: m{{m00, m10, m20},
{m01, m11, m21},
{m02, m12, m22}} {}
CMatrix3f(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21, float m22)
: m{{m00, m10, m20}, {m01, m11, m21}, {m02, m12, m22}} {}
CMatrix3f(const CVector3f& scaleVec) {
m[0] = simd<float>(0.f);
@ -99,8 +94,7 @@ public:
CVector3f operator*(const CVector3f& other) const {
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() + m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>();
@ -147,31 +141,26 @@ public:
static CMatrix3f RotateX(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
simd<float>{0.f, cosT, sinT, 0.f},
return CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f}, simd<float>{0.f, cosT, sinT, 0.f},
simd<float>{0.f, -sinT, cosT, 0.f});
static CMatrix3f RotateY(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
simd<float>{0.f, 1.f, 0.f, 0.f},
return CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f}, simd<float>{0.f, 1.f, 0.f, 0.f},
simd<float>{sinT, 0.f, cosT, 0.f});
static CMatrix3f RotateZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
simd<float>{-sinT, cosT, 0.f, 0.f},
return CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f}, simd<float>{-sinT, cosT, 0.f, 0.f},
simd<float>{0.f, 0.f, 1.f, 0.f});
float determinant() const {
m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) +
m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) +
return m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) + m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) +
m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
@ -181,10 +170,8 @@ public:
static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs) {
simd<float> v[3];
for (int i = 0; i < 3; ++i)
v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() + lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>();
return CMatrix3f(v[0], v[1], v[2]);
} // namespace zeus
@ -18,14 +18,9 @@ public:
CMatrix4f(float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23,
float m30, float m31, float m32, float m33)
: m{{m00, m10, m20, m30},
{m01, m11, m21, m31},
{m02, m12, m22, m32},
{m03, m13, m23, m33}} {}
CMatrix4f(float m00, float m01, float m02, float m03, float m10, float m11, float m12, float m13, float m20,
float m21, float m22, float m23, float m30, float m31, float m32, float m33)
: m{{m00, m10, m20, m30}, {m01, m11, m21, m31}, {m02, m12, m22, m32}, {m03, m13, m23, m33}} {}
CMatrix4f(const CVector3f& scaleVec) {
m[0][0] = scaleVec[0];
@ -71,10 +66,8 @@ public:
CVector4f operator*(const CVector4f& other) const {
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>() +
m[3].mSimd * other.mSimd.shuffle<3, 3, 3, 3>();
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() + m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>() + m[3].mSimd * other.mSimd.shuffle<3, 3, 3, 3>();
CVector4f& operator[](size_t i) {
@ -106,11 +99,8 @@ public:
static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs) {
simd<float> v[4];
for (int i = 0; i < 4; ++i)
v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>() +
lhs.m[3].mSimd * rhs[i].mSimd.shuffle<3, 3, 3, 3>();
v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() + lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>() + lhs.m[3].mSimd * rhs[i].mSimd.shuffle<3, 3, 3, 3>();
return CMatrix4f(v[0], v[1], v[2], v[3]);
} // namespace zeus
@ -42,9 +42,6 @@ public:
bool OBBIntersectsBox(const COBBox& other) const;
bool AABoxIntersectsBox(const CAABox& other) {
return OBBIntersectsBox(FromAABox(other, CTransform::Identity()));
bool AABoxIntersectsBox(const CAABox& other) { return OBBIntersectsBox(FromAABox(other, CTransform::Identity())); }
} // namespace zeus
@ -36,9 +36,7 @@ public:
mSimd[3] = nd * mag;
float pointToPlaneDist(const CVector3f& pos) const {
return pos.dot(normal()) - d();
float pointToPlaneDist(const CVector3f& pos) const { return pos.dot(normal()) - d(); }
bool rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const;
@ -66,5 +64,4 @@ public:
zeus::simd<float> mSimd;
} // namespace zeus
@ -7,11 +7,7 @@
#include <cmath>
namespace zeus {
enum class EProjType {
None = 0,
Orthographic = 1,
Perspective = 2
enum class EProjType { None = 0, Orthographic = 1, Perspective = 2 };
class SProjOrtho {
@ -19,16 +15,14 @@ public:
explicit SProjOrtho(float p_top = 1.0f, float p_bottom = -1.0f, float p_left = -1.0f, float p_right = 1.0f,
float p_near = 1.0f, float p_far = -1.0f)
: top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far) {
: top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far) {}
struct SProjPersp {
float fov, aspect, znear, zfar;
SProjPersp(float p_fov = degToRad(55.0f), float p_aspect = 1.0f, float p_near = 0.1f, float p_far = 4096.f)
: fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far) {
: fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far) {}
extern const SProjOrtho kOrthoIdentity;
@ -101,12 +95,10 @@ protected:
/* Projection intermediate */
union {
#ifdef _MSC_VER
struct {
SProjOrtho m_ortho;
struct {
SProjPersp m_persp;
@ -118,5 +110,4 @@ protected:
/* Cached projection matrix */
CMatrix4f m_mtx;
} // namespace zeus
@ -37,9 +37,7 @@ public:
CQuaternion(float xi, float yi, float zi) { fromVector3f(CVector3f(xi, yi, zi)); }
CQuaternion(float wi, const CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
mSimd[0] = wi;
CQuaternion(float wi, const CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) { mSimd[0] = wi; }
template <typename T>
CQuaternion(const simd<T>& s) : mSimd(s) {}
@ -57,13 +55,9 @@ public:
CQuaternion(const atVec4f& vec) : mSimd(vec.simd) {}
operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
operator atVec4f&() { return *reinterpret_cast<atVec4f*>(this); }
operator const atVec4f&() const {
return *reinterpret_cast<const atVec4f*>(this);
operator const atVec4f&() const { return *reinterpret_cast<const atVec4f*>(this); }
@ -158,9 +152,7 @@ public:
CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
float dot(const CQuaternion& rhs) const {
return mSimd.dot4(rhs.mSimd);
float dot(const CQuaternion& rhs) const { return mSimd.dot4(rhs.mSimd); }
static CQuaternion lerp(const CQuaternion& a, const CQuaternion& b, double t);
@ -177,7 +169,7 @@ public:
float roll() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[1] * f[2] + f[0] * f[3]), f[0] * f[0] + f[1] * f[1] - f[2] * f[2] - f[3] * f[3]);
return std::asin(-2.f * (f[1] * f[3] - f[0] * f[2]));
float pitch() const {
@ -187,7 +179,7 @@ public:
float yaw() const {
simd_floats f(mSimd);
return std::asin(-2.f * (f[1] * f[3] - f[0] * f[2]));
return std::atan2(2.f * (f[1] * f[2] + f[0] * f[3]), f[0] * f[0] + f[1] * f[1] - f[2] * f[2] - f[3] * f[3]);
CQuaternion buildEquivalent() const;
@ -238,9 +230,7 @@ public:
CNUQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
CNUQuaternion(float win, const zeus::CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
w() = win;
CNUQuaternion(float win, const zeus::CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) { w() = win; }
CNUQuaternion(const CQuaternion& other) : mSimd(other.mSimd) {}
@ -307,4 +297,4 @@ CQuaternion operator-(float lhs, const CQuaternion& rhs);
CQuaternion operator*(float lhs, const CQuaternion& rhs);
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs);
} // namespace zeus
@ -26,5 +26,4 @@ public:
CVector2f position;
CVector2f size;
} // namespace zeus
@ -91,5 +91,4 @@ struct CRelAngle {
return *this;
} // namespace zeus
@ -17,5 +17,4 @@ public:
CVector3f position;
float radius;
} // namespace zeus
@ -13,14 +13,12 @@ class CTransform {
CTransform() : basis(false) {}
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero)
: basis(basis), origin(offset) {}
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero) : basis(basis), origin(offset) {}
CTransform(const atVec4f* mtx)
: basis(mtx[0], mtx[1], mtx[2])
, origin(mtx[0].simd[3], mtx[1].simd[3], mtx[2].simd[3]) {}
: basis(mtx[0], mtx[1], mtx[2]), origin(mtx[0].simd[3], mtx[1].simd[3], mtx[2].simd[3]) {}
void read34RowMajor(athena::io::IStreamReader& r) {
atVec4f r0 = r.readVec4fBig();
@ -37,13 +35,9 @@ public:
CTransform(const CVector3f& c0, const CVector3f& c1, const CVector3f& c2, const CVector3f& c3)
: basis(c0, c1, c2), origin(c3) {}
static CTransform Identity() {
return CTransform(CMatrix3f::skIdentityMatrix3f);
static CTransform Identity() { return CTransform(CMatrix3f::skIdentityMatrix3f); }
bool operator==(const CTransform& other) const {
return origin == other.origin && basis == other.basis;
bool operator==(const CTransform& other) const { return origin == other.origin && basis == other.basis; }
CTransform operator*(const CTransform& rhs) const {
return CTransform(basis * rhs.basis, origin + (basis * rhs.origin));
@ -54,57 +48,44 @@ public:
return CTransform(inv, inv * -origin);
static CTransform Translate(const CVector3f& position) {
return {CMatrix3f::skIdentityMatrix3f, position};
static CTransform Translate(const CVector3f& position) { return {CMatrix3f::skIdentityMatrix3f, position}; }
static CTransform Translate(float x, float y, float z) {
return Translate({x, y, z});
static CTransform Translate(float x, float y, float z) { return Translate({x, y, z}); }
CTransform operator+(const CVector3f& other) {
return CTransform(basis, origin + other);
CTransform operator+(const CVector3f& other) { return CTransform(basis, origin + other); }
CTransform& operator+=(const CVector3f& other) {
origin += other;
return *this;
CTransform operator-(const CVector3f& other) {
return CTransform(basis, origin - other);
CTransform operator-(const CVector3f& other) { return CTransform(basis, origin - other); }
CTransform& operator-=(const CVector3f& other) {
origin -= other;
return *this;
zeus::CVector3f rotate(const CVector3f& vec) const {
return basis * vec;
zeus::CVector3f rotate(const CVector3f& vec) const { return basis * vec; }
static CTransform RotateX(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
simd<float>{0.f, cosT, sinT, 0.f},
return CTransform(CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f}, simd<float>{0.f, cosT, sinT, 0.f},
simd<float>{0.f, -sinT, cosT, 0.f}));
static CTransform RotateY(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
simd<float>{0.f, 1.f, 0.f, 0.f},
return CTransform(CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f}, simd<float>{0.f, 1.f, 0.f, 0.f},
simd<float>{sinT, 0.f, cosT, 0.f}));
static CTransform RotateZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
simd<float>{-sinT, cosT, 0.f, 0.f},
return CTransform(CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f}, simd<float>{-sinT, cosT, 0.f, 0.f},
simd<float>{0.f, 0.f, 1.f, 0.f}));
@ -163,20 +144,17 @@ public:
static CTransform Scale(const CVector3f& factor) {
return CTransform(CMatrix3f(simd<float>{factor.x(), 0.f, 0.f, 0.f},
simd<float>{0.f, factor.y(), 0.f, 0.f},
return CTransform(CMatrix3f(simd<float>{factor.x(), 0.f, 0.f, 0.f}, simd<float>{0.f, factor.y(), 0.f, 0.f},
simd<float>{0.f, 0.f, factor.z(), 0.f}));
static CTransform Scale(float x, float y, float z) {
return CTransform(CMatrix3f(simd<float>{x, 0.f, 0.f, 0.f},
simd<float>{0.f, y, 0.f, 0.f},
simd<float>{0.f, 0.f, z, 0.f}));
return CTransform(
CMatrix3f(simd<float>{x, 0.f, 0.f, 0.f}, simd<float>{0.f, y, 0.f, 0.f}, simd<float>{0.f, 0.f, z, 0.f}));
static CTransform Scale(float factor) {
return CTransform(CMatrix3f(simd<float>{factor, 0.f, 0.f, 0.f},
simd<float>{0.f, factor, 0.f, 0.f},
return CTransform(CMatrix3f(simd<float>{factor, 0.f, 0.f, 0.f}, simd<float>{0.f, factor, 0.f, 0.f},
simd<float>{0.f, 0.f, factor, 0.f}));
@ -192,26 +170,18 @@ public:
return ret;
void setRotation(const CMatrix3f& mat) {
basis = mat;
void setRotation(const CMatrix3f& mat) { basis = mat; }
void setRotation(const CTransform& xfrm) {
void setRotation(const CTransform& xfrm) { setRotation(xfrm.basis); }
* @brief buildMatrix3f Returns the stored matrix
* buildMatrix3f is here for compliance with Retro's Math API
* @return The Matrix (Neo, you are the one)
const CMatrix3f& buildMatrix3f() const {
return basis;
const CMatrix3f& buildMatrix3f() const { return basis; }
CVector3f operator*(const CVector3f& other) const {
return origin + basis * other;
CVector3f operator*(const CVector3f& other) const { return origin + basis * other; }
CMatrix4f toMatrix4f() const {
CMatrix4f ret(basis[0], basis[1], basis[2], origin);
@ -222,17 +192,11 @@ public:
return ret;
CVector3f upVector() const {
return basis.m[2];
CVector3f upVector() const { return basis.m[2]; }
CVector3f frontVector() const {
return basis.m[1];
CVector3f frontVector() const { return basis.m[1]; }
CVector3f rightVector() const {
return basis.m[0];
CVector3f rightVector() const { return basis.m[0]; }
void orthonormalize() {
@ -242,14 +206,13 @@ public:
void printMatrix() const {
printf("%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n",
basis[0][0], basis[1][0], basis[2][0], origin[0],
basis[0][1], basis[1][1], basis[2][1], origin[1],
basis[0][2], basis[1][2], basis[2][2], origin[2],
0.f, 0.f, 0.f, 1.f);
basis[0][0], basis[1][0], basis[2][0], origin[0], basis[0][1], basis[1][1], basis[2][1], origin[1], basis[0][2],
basis[1][2], basis[2][2], origin[2], 0.f, 0.f, 0.f, 1.f);
static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec) {
@ -269,9 +232,7 @@ public:
CVector3f origin;
static inline CTransform CTransformFromScaleVector(const CVector3f& scale) {
return CTransform(CMatrix3f(scale));
static inline CTransform CTransformFromScaleVector(const CVector3f& scale) { return CTransform(CMatrix3f(scale)); }
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec);
@ -280,5 +241,4 @@ CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle);
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up = CVector3f::skUp);
} // namespace zeus
@ -17,4 +17,4 @@ public:
} // namespace zeus
@ -18,13 +18,9 @@ public:
CVector2f(const atVec2f& vec) : mSimd(vec.simd) {}
operator atVec2f&() {
return *reinterpret_cast<atVec2f*>(this);
operator atVec2f&() { return *reinterpret_cast<atVec2f*>(this); }
operator const atVec2f&() const {
return *reinterpret_cast<const atVec2f*>(this);
operator const atVec2f&() const { return *reinterpret_cast<const atVec2f*>(this); }
void readBig(athena::io::IStreamReader& input) {
mSimd[0] = input.readFloatBig();
@ -52,61 +48,33 @@ public:
CVector2f(float x, float y) { assign(x, y); }
bool operator==(const CVector2f& rhs) const {
return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1];
bool operator==(const CVector2f& rhs) const { return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1]; }
bool operator!=(const CVector2f& rhs) const {
return mSimd[0] != rhs.mSimd[0] || mSimd[1] != rhs.mSimd[1];
bool operator!=(const CVector2f& rhs) const { return mSimd[0] != rhs.mSimd[0] || mSimd[1] != rhs.mSimd[1]; }
bool operator<(const CVector2f& rhs) const {
return mSimd[0] < rhs.mSimd[0] && mSimd[1] < rhs.mSimd[1];
bool operator<(const CVector2f& rhs) const { return mSimd[0] < rhs.mSimd[0] && mSimd[1] < rhs.mSimd[1]; }
bool operator<=(const CVector2f& rhs) const {
return mSimd[0] <= rhs.mSimd[0] && mSimd[1] <= rhs.mSimd[1];
bool operator<=(const CVector2f& rhs) const { return mSimd[0] <= rhs.mSimd[0] && mSimd[1] <= rhs.mSimd[1]; }
bool operator>(const CVector2f& rhs) const {
return mSimd[0] > rhs.mSimd[0] && mSimd[1] > rhs.mSimd[1];
bool operator>(const CVector2f& rhs) const { return mSimd[0] > rhs.mSimd[0] && mSimd[1] > rhs.mSimd[1]; }
bool operator>=(const CVector2f& rhs) const {
return mSimd[0] >= rhs.mSimd[0] && mSimd[1] >= rhs.mSimd[1];
bool operator>=(const CVector2f& rhs) const { return mSimd[0] >= rhs.mSimd[0] && mSimd[1] >= rhs.mSimd[1]; }
CVector2f operator+(const CVector2f& rhs) const {
return mSimd + rhs.mSimd;
CVector2f operator+(const CVector2f& rhs) const { return mSimd + rhs.mSimd; }
CVector2f operator-(const CVector2f& rhs) const {
return mSimd - rhs.mSimd;
CVector2f operator-(const CVector2f& rhs) const { return mSimd - rhs.mSimd; }
CVector2f operator-() const {
return -mSimd;
CVector2f operator-() const { return -mSimd; }
CVector2f operator*(const CVector2f& rhs) const {
return mSimd * rhs.mSimd;
CVector2f operator*(const CVector2f& rhs) const { return mSimd * rhs.mSimd; }
CVector2f operator/(const CVector2f& rhs) const {
return mSimd / rhs.mSimd;
CVector2f operator/(const CVector2f& rhs) const { return mSimd / rhs.mSimd; }
CVector2f operator+(float val) const {
return mSimd + simd<float>(val);
CVector2f operator+(float val) const { return mSimd + simd<float>(val); }
CVector2f operator-(float val) const {
return mSimd - simd<float>(val);
CVector2f operator-(float val) const { return mSimd - simd<float>(val); }
CVector2f operator*(float val) const {
return mSimd * simd<float>(val);
CVector2f operator*(float val) const { return mSimd * simd<float>(val); }
CVector2f operator/(float val) const {
float ooval = 1.f / val;
@ -170,25 +138,15 @@ public:
float cross(const CVector2f& rhs) const { return (x() * rhs.y()) - (y() * rhs.x()); }
float dot(const CVector2f& rhs) const {
return mSimd.dot2(rhs.mSimd);
float dot(const CVector2f& rhs) const { return mSimd.dot2(rhs.mSimd); }
float magSquared() const {
return mSimd.dot2(mSimd);
float magSquared() const { return mSimd.dot2(mSimd); }
float magnitude() const {
return std::sqrt(magSquared());
float magnitude() const { return std::sqrt(magSquared()); }
void zeroOut() {
*this = CVector2f::skZero;
void zeroOut() { *this = CVector2f::skZero; }
void splat(float xy) {
mSimd = zeus::simd<float>(xy);
void splat(float xy) { mSimd = zeus::simd<float>(xy); }
static float getAngleDiff(const CVector2f& a, const CVector2f& b);
@ -196,15 +154,11 @@ public:
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
static CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) {
return lerp(a, b, t).normalized();
static CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) { return lerp(a, b, t).normalized(); }
static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()))
@ -212,9 +166,7 @@ public:
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON;
bool isZero() const {
return magSquared() <= FLT_EPSILON;
bool isZero() const { return magSquared() <= FLT_EPSILON; }
bool isEqu(const CVector2f& other, float epsilon = FLT_EPSILON) {
const CVector2f diffVec = other - *this;
@ -242,20 +194,11 @@ public:
static const CVector2f skZero;
static inline CVector2f operator+(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
static inline CVector2f operator+(float lhs, const CVector2f& rhs) { return zeus::simd<float>(lhs) + rhs.mSimd; }
static inline CVector2f operator-(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
static inline CVector2f operator-(float lhs, const CVector2f& rhs) { return zeus::simd<float>(lhs) - rhs.mSimd; }
static inline CVector2f operator*(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
static inline CVector2f operator/(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
static inline CVector2f operator*(float lhs, const CVector2f& rhs) { return zeus::simd<float>(lhs) * rhs.mSimd; }
static inline CVector2f operator/(float lhs, const CVector2f& rhs) { return zeus::simd<float>(lhs) / rhs.mSimd; }
} // namespace zeus
@ -29,33 +29,18 @@ public:
CVector2f toVec2f() const { return CVector2f(x, y); }
CVector2i operator+(const CVector2i& val) const {
return CVector2i(x + val.x, y + val.y);
CVector2i operator+(const CVector2i& val) const { return CVector2i(x + val.x, y + val.y); }
CVector2i operator-(const CVector2i& val) const {
return CVector2i(x - val.x, y - val.y);
CVector2i operator-(const CVector2i& val) const { return CVector2i(x - val.x, y - val.y); }
CVector2i operator*(const CVector2i& val) const {
return CVector2i(x * val.x, y * val.y);
CVector2i operator*(const CVector2i& val) const { return CVector2i(x * val.x, y * val.y); }
CVector2i operator/(const CVector2i& val) const {
return CVector2i(x / val.x, y / val.y);
CVector2i operator/(const CVector2i& val) const { return CVector2i(x / val.x, y / val.y); }
bool operator==(const CVector2i& other) const {
return x == other.x && y == other.y;
bool operator==(const CVector2i& other) const { return x == other.x && y == other.y; }
bool operator!=(const CVector2i& other) const {
return x != other.x || y != other.y;
bool operator!=(const CVector2i& other) const { return x != other.x || y != other.y; }
CVector2i operator*(int val) const {
return CVector2i(x * val, y * val);
CVector2i operator*(int val) const { return CVector2i(x * val, y * val); }
} // namespace zeus
@ -25,27 +25,17 @@ public:
CVector3d(double x, double y, double z) : mSimd(x, y, z) {}
CVector3f asCVector3f() {
return mSimd;
CVector3f asCVector3f() { return mSimd; }
double magSquared() const {
return mSimd.dot3(mSimd);
double magSquared() const { return mSimd.dot3(mSimd); }
double magnitude() const {
return sqrt(magSquared());
double magnitude() const { return sqrt(magSquared()); }
CVector3d cross(const CVector3d& rhs) const {
return {y() * rhs.z() - z() * rhs.y(),
z() * rhs.x() - x() * rhs.z(),
x() * rhs.y() - y() * rhs.x()};
return {y() * rhs.z() - z() * rhs.y(), z() * rhs.x() - x() * rhs.z(), x() * rhs.y() - y() * rhs.x()};
double dot(const CVector3d& rhs) const {
return mSimd.dot3(rhs.mSimd);
double dot(const CVector3d& rhs) const { return mSimd.dot3(rhs.mSimd); }
CVector3d asNormalized() {
double mag = magnitude();
@ -53,29 +43,17 @@ public:
return mSimd * zeus::simd<double>(mag);
void splat(double xyz) {
mSimd = zeus::simd<double>(xyz);
void splat(double xyz) { mSimd = zeus::simd<double>(xyz); }
void zeroOut() {
*this = skZero;
void zeroOut() { *this = skZero; }
CVector3d operator+(const CVector3d& rhs) const {
return mSimd + rhs.mSimd;
CVector3d operator+(const CVector3d& rhs) const { return mSimd + rhs.mSimd; }
CVector3d operator-(const CVector3d& rhs) const {
return mSimd - rhs.mSimd;
CVector3d operator-(const CVector3d& rhs) const { return mSimd - rhs.mSimd; }
CVector3d operator*(const CVector3d& rhs) const {
return mSimd * rhs.mSimd;
CVector3d operator*(const CVector3d& rhs) const { return mSimd * rhs.mSimd; }
CVector3d operator/(const CVector3d& rhs) const {
return mSimd / rhs.mSimd;
CVector3d operator/(const CVector3d& rhs) const { return mSimd / rhs.mSimd; }
zeus::simd<double>::reference operator[](size_t idx) {
assert(idx < 3);
@ -98,21 +76,12 @@ public:
static const CVector3d skZero;
static inline CVector3d operator+(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) + rhs.mSimd;
static inline CVector3d operator+(double lhs, const CVector3d& rhs) { return zeus::simd<double>(lhs) + rhs.mSimd; }
static inline CVector3d operator-(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) - rhs.mSimd;
static inline CVector3d operator-(double lhs, const CVector3d& rhs) { return zeus::simd<double>(lhs) - rhs.mSimd; }
static inline CVector3d operator*(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) * rhs.mSimd;
static inline CVector3d operator*(double lhs, const CVector3d& rhs) { return zeus::simd<double>(lhs) * rhs.mSimd; }
static inline CVector3d operator/(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) / rhs.mSimd;
static inline CVector3d operator/(double lhs, const CVector3d& rhs) { return zeus::simd<double>(lhs) / rhs.mSimd; }
} // namespace zeus
@ -23,13 +23,9 @@ public:
CVector3f(const atVec3f& vec) : mSimd(vec.simd) {}
operator atVec3f&() {
return *reinterpret_cast<atVec3f*>(this);
operator atVec3f&() { return *reinterpret_cast<atVec3f*>(this); }
operator const atVec3f&() const {
return *reinterpret_cast<const atVec3f*>(this);
operator const atVec3f&() const { return *reinterpret_cast<const atVec3f*>(this); }
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
@ -52,9 +48,7 @@ public:
explicit CVector3f(float xyz) : mSimd(xyz) {}
void assign(float x, float y, float z) {
mSimd = zeus::simd<float>(x, y, z);
void assign(float x, float y, float z) { mSimd = zeus::simd<float>(x, y, z); }
CVector3f(float x, float y, float z) : mSimd(x, y, z) {}
@ -66,9 +60,7 @@ public:
mSimd[3] = 0.0f;
CVector2f toVec2f() const {
return CVector2f(mSimd);
CVector2f toVec2f() const { return CVector2f(mSimd); }
bool operator==(const CVector3f& rhs) const {
return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1] && mSimd[2] == rhs.mSimd[2];
@ -76,37 +68,21 @@ public:
bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); }
CVector3f operator+(const CVector3f& rhs) const {
return mSimd + rhs.mSimd;
CVector3f operator+(const CVector3f& rhs) const { return mSimd + rhs.mSimd; }
CVector3f operator-(const CVector3f& rhs) const {
return mSimd - rhs.mSimd;
CVector3f operator-(const CVector3f& rhs) const { return mSimd - rhs.mSimd; }
CVector3f operator-() const {
return -mSimd;
CVector3f operator-() const { return -mSimd; }
CVector3f operator*(const CVector3f& rhs) const {
return mSimd * rhs.mSimd;
CVector3f operator*(const CVector3f& rhs) const { return mSimd * rhs.mSimd; }
CVector3f operator/(const CVector3f& rhs) const {
return mSimd / rhs.mSimd;
CVector3f operator/(const CVector3f& rhs) const { return mSimd / rhs.mSimd; }
CVector3f operator+(float val) const {
return mSimd + zeus::simd<float>(val);
CVector3f operator+(float val) const { return mSimd + zeus::simd<float>(val); }
CVector3f operator-(float val) const {
return mSimd - zeus::simd<float>(val);
CVector3f operator-(float val) const { return mSimd - zeus::simd<float>(val); }
CVector3f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
CVector3f operator*(float val) const { return mSimd * zeus::simd<float>(val); }
CVector3f operator/(float val) const {
float ooval = 1.f / val;
@ -144,38 +120,22 @@ public:
CVector3f cross(const CVector3f& rhs) const {
return CVector3f(y() * rhs.z() - z() * rhs.y(),
z() * rhs.x() - x() * rhs.z(),
x() * rhs.y() - y() * rhs.x());
return CVector3f(y() * rhs.z() - z() * rhs.y(), z() * rhs.x() - x() * rhs.z(), x() * rhs.y() - y() * rhs.x());
float dot(const CVector3f& rhs) const {
return mSimd.dot3(rhs.mSimd);
float dot(const CVector3f& rhs) const { return mSimd.dot3(rhs.mSimd); }
float magSquared() const {
return mSimd.dot3(mSimd);
float magSquared() const { return mSimd.dot3(mSimd); }
float magnitude() const {
return std::sqrt(magSquared());
float magnitude() const { return std::sqrt(magSquared()); }
bool isNotInf() const {
return !(std::isinf(x()) || std::isinf(y()) || std::isinf(z()));
bool isNotInf() const { return !(std::isinf(x()) || std::isinf(y()) || std::isinf(z())); }
bool isMagnitudeSafe() const {
return isNotInf() && magSquared() >= 9.9999994e-29;
bool isMagnitudeSafe() const { return isNotInf() && magSquared() >= 9.9999994e-29; }
void zeroOut() {
*this = CVector3f::skZero;
void zeroOut() { *this = CVector3f::skZero; }
void splat(float xyz) {
mSimd = zeus::simd<float>(xyz);
void splat(float xyz) { mSimd = zeus::simd<float>(xyz); }
static float getAngleDiff(const CVector3f& a, const CVector3f& b);
@ -183,15 +143,11 @@ public:
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
static CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) {
return lerp(a, b, t).normalized();
static CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) { return lerp(a, b, t).normalized(); }
static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()))
@ -199,9 +155,7 @@ public:
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON || std::fabs(z()) >= FLT_EPSILON;
bool isZero() const {
return magSquared() <= FLT_EPSILON;
bool isZero() const { return magSquared() <= FLT_EPSILON; }
void scaleToLength(float newLength) {
float length = magSquared();
@ -261,21 +215,12 @@ public:
static CVector3f degToRad(const CVector3f& deg) { return deg * skDegToRadVec; }
static inline CVector3f operator+(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
static inline CVector3f operator+(float lhs, const CVector3f& rhs) { return zeus::simd<float>(lhs) + rhs.mSimd; }
static inline CVector3f operator-(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
static inline CVector3f operator-(float lhs, const CVector3f& rhs) { return zeus::simd<float>(lhs) - rhs.mSimd; }
static inline CVector3f operator*(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
static inline CVector3f operator*(float lhs, const CVector3f& rhs) { return zeus::simd<float>(lhs) * rhs.mSimd; }
static inline CVector3f operator/(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
static inline CVector3f operator/(float lhs, const CVector3f& rhs) { return zeus::simd<float>(lhs) / rhs.mSimd; }
} // namespace zeus
@ -29,13 +29,9 @@ public:
CVector4f(const atVec4f& vec) : mSimd(vec.simd) {}
operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
operator atVec4f&() { return *reinterpret_cast<atVec4f*>(this); }
operator const atVec4f&() const {
return *reinterpret_cast<const atVec4f*>(this);
operator const atVec4f&() const { return *reinterpret_cast<const atVec4f*>(this); }
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
@ -50,25 +46,17 @@ public:
explicit CVector4f(float xyzw) : mSimd(xyzw) {}
void assign(float x, float y, float z, float w) {
mSimd = simd<float>(x, y, z, w);
void assign(float x, float y, float z, float w) { mSimd = simd<float>(x, y, z, w); }
CVector4f(float x, float y, float z, float w) : mSimd(x, y, z, w) {}
CVector4f(const CColor& other);
CVector4f(const CVector3f& other, float wIn = 1.f) : mSimd(other.mSimd) {
mSimd[3] = wIn;
CVector4f(const CVector3f& other, float wIn = 1.f) : mSimd(other.mSimd) { mSimd[3] = wIn; }
static CVector4f ToClip(const zeus::CVector3f& v, float w) {
return CVector4f(v * w, w);
static CVector4f ToClip(const zeus::CVector3f& v, float w) { return CVector4f(v * w, w); }
CVector3f toVec3f() const {
return CVector3f(mSimd);
CVector3f toVec3f() const { return CVector3f(mSimd); }
CVector4f& operator=(const CColor& other);
@ -102,37 +90,21 @@ public:
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
CVector4f operator+(const CVector4f& rhs) const {
return mSimd + rhs.mSimd;
CVector4f operator+(const CVector4f& rhs) const { return mSimd + rhs.mSimd; }
CVector4f operator-(const CVector4f& rhs) const {
return mSimd - rhs.mSimd;
CVector4f operator-(const CVector4f& rhs) const { return mSimd - rhs.mSimd; }
CVector4f operator-() const {
return -mSimd;
CVector4f operator-() const { return -mSimd; }
CVector4f operator*(const CVector4f& rhs) const {
return mSimd * rhs.mSimd;
CVector4f operator*(const CVector4f& rhs) const { return mSimd * rhs.mSimd; }
CVector4f operator/(const CVector4f& rhs) const {
return mSimd / rhs.mSimd;
CVector4f operator/(const CVector4f& rhs) const { return mSimd / rhs.mSimd; }
CVector4f operator+(float val) const {
return mSimd + zeus::simd<float>(val);
CVector4f operator+(float val) const { return mSimd + zeus::simd<float>(val); }
CVector4f operator-(float val) const {
return mSimd - zeus::simd<float>(val);
CVector4f operator-(float val) const { return mSimd - zeus::simd<float>(val); }
CVector4f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
CVector4f operator*(float val) const { return mSimd * zeus::simd<float>(val); }
CVector4f operator/(float val) const {
float ooval = 1.f / val;
@ -171,49 +143,34 @@ public:
return *this * mag;
float dot(const CVector4f& rhs) const {
return mSimd.dot4(rhs.mSimd);
float dot(const CVector4f& rhs) const { return mSimd.dot4(rhs.mSimd); }
float magSquared() const {
return mSimd.dot4(mSimd);
float magSquared() const { return mSimd.dot4(mSimd); }
float magnitude() const {
return std::sqrt(magSquared());
float magnitude() const { return std::sqrt(magSquared()); }
void zeroOut() {
*this = CVector4f::skZero;
void zeroOut() { *this = CVector4f::skZero; }
void splat(float xyzw) {
mSimd = zeus::simd<float>(xyzw);
void splat(float xyzw) { mSimd = zeus::simd<float>(xyzw); }
static CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
static CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) {
return lerp(a, b, t).normalized();
static CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) { return lerp(a, b, t).normalized(); }
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()) || std::isinf(w()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON ||
std::fabs(z()) >= FLT_EPSILON || std::fabs(w()) >= FLT_EPSILON;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON || std::fabs(z()) >= FLT_EPSILON ||
std::fabs(w()) >= FLT_EPSILON;
bool isEqu(const CVector4f& other, float epsilon = FLT_EPSILON) {
const CVector4f diffVec = other - *this;
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon &&
diffVec.z() <= epsilon && diffVec.w() <= epsilon);
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon && diffVec.z() <= epsilon && diffVec.w() <= epsilon);
zeus::simd<float>::reference operator[](size_t idx) {
@ -241,21 +198,12 @@ public:
static const CVector4f skZero;
static CVector4f operator+(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
static CVector4f operator+(float lhs, const CVector4f& rhs) { return zeus::simd<float>(lhs) + rhs.mSimd; }
static CVector4f operator-(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
static CVector4f operator-(float lhs, const CVector4f& rhs) { return zeus::simd<float>(lhs) - rhs.mSimd; }
static CVector4f operator*(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
static CVector4f operator*(float lhs, const CVector4f& rhs) { return zeus::simd<float>(lhs) * rhs.mSimd; }
static CVector4f operator/(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
static CVector4f operator/(float lhs, const CVector4f& rhs) { return zeus::simd<float>(lhs) / rhs.mSimd; }
} // namespace zeus
@ -9,12 +9,12 @@
namespace zeus {
template<typename T> using simd = athena::simd<T>;
template <typename T>
using simd = athena::simd<T>;
using simd_floats = athena::simd_floats;
using simd_doubles = athena::simd_doubles;
} // namespace zeus
inline int rotr(int x, int n) { return ((x >> n) | (x << (32 - n))); }
inline int rotl(int x, int n) { return ((x << n) | (x >> (32 - n))); }
@ -80,23 +80,23 @@ class CVector2f;
class CTransform;
template<typename T>
template <typename T>
inline constexpr T min(const T& a, const T& b) {
return a < b ? a : b;
template<typename T>
template <typename T>
inline constexpr T max(const T& a, const T& b) {
return a > b ? a : b;
template <>
CVector3f min(const CVector3f& a, const CVector3f& b);
template <>
CVector3f max(const CVector3f& a, const CVector3f& b);
template<typename T>
template <typename T>
inline constexpr T clamp(const T& a, const T& val, const T& b) {
return max<T>(a, min<T>(b, val));
@ -115,8 +115,8 @@ CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t);
getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d,
float t);
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d,
float t);
@ -135,7 +135,7 @@ int floorPowerOfTwo(int x);
int ceilingPowerOfTwo(int x);
template<typename U>
template <typename U>
typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x) {
#if __GNUC__ >= 4
return __builtin_popcountll(x);
@ -152,12 +152,11 @@ typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, i
template<typename E>
template <typename E>
typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e) {
return PopCount(static_cast<typename std::underlying_type<E>::type>(e));
bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon = 0.000099999997f);
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon = 0.000099999997f);
@ -169,5 +168,4 @@ inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378
inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752) {
return std::fabs(a - b) < epsilon;
} // namespace zeus
@ -688,21 +688,16 @@ class __simd_storage<_Tp, __simd_abi<_StorageKind::_Scalar, 1>> {
_Tp __get(size_t __index) const noexcept { return (&__storage_)[__index]; };
void __set(size_t __index, _Tp __val) noexcept {
(&__storage_)[__index] = __val;
void __set(size_t __index, _Tp __val) noexcept { (&__storage_)[__index] = __val; }
constexpr size_t __floor_pow_of_2(size_t __val) {
return ((__val - 1) & __val) == 0 ? __val
: __floor_pow_of_2((__val - 1) & __val);
return ((__val - 1) & __val) == 0 ? __val : __floor_pow_of_2((__val - 1) & __val);
constexpr size_t __ceil_pow_of_2(size_t __val) {
return __val == 1 ? 1 : __floor_pow_of_2(__val - 1) << 1;
constexpr size_t __ceil_pow_of_2(size_t __val) { return __val == 1 ? 1 : __floor_pow_of_2(__val - 1) << 1; }
template <class _Tp, size_t __bytes>
struct __vec_ext_traits {
@ -715,8 +710,7 @@ struct __vec_ext_traits {
template <> \
struct __vec_ext_traits<_TYPE, sizeof(_TYPE) * _NUM_ELEMENT> { \
using type = \
_TYPE __attribute__((vector_size(sizeof(_TYPE) * _NUM_ELEMENT))); \
using type = _TYPE __attribute__((vector_size(sizeof(_TYPE) * _NUM_ELEMENT))); \
@ -777,8 +771,7 @@ _LIBCPP_SPECIALIZE_VEC_EXT_32(long double);
template <class _Tp, int __num_element>
class __simd_storage<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>> {
using _StorageType =
typename __vec_ext_traits<_Tp, sizeof(_Tp) * __num_element>::type;
using _StorageType = typename __vec_ext_traits<_Tp, sizeof(_Tp) * __num_element>::type;
_StorageType __storage_;
@ -790,9 +783,7 @@ class __simd_storage<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>> {
_Tp __get(size_t __index) const noexcept { return __storage_[__index]; };
void __set(size_t __index, _Tp __val) noexcept {
__storage_[__index] = __val;
void __set(size_t __index, _Tp __val) noexcept { __storage_[__index] = __val; }
@ -810,8 +801,7 @@ class __simd_reference {
__simd_storage<_Tp, _Abi>* __ptr_;
size_t __index_;
__simd_reference(__simd_storage<_Tp, _Abi>* __ptr, size_t __index)
: __ptr_(__ptr), __index_(__index) {}
__simd_reference(__simd_storage<_Tp, _Abi>* __ptr, size_t __index) : __ptr_(__ptr), __index_(__index) {}
__simd_reference(const __simd_reference&) = default;
@ -826,9 +816,7 @@ public:
return *this;
__simd_reference operator++() && {
return std::move(*this) = __ptr_->__get(__index_) + 1;
__simd_reference operator++() && { return std::move(*this) = __ptr_->__get(__index_) + 1; }
_Vp operator++(int) && {
auto __val = __ptr_->__get(__index_);
@ -836,9 +824,7 @@ public:
return __val;
__simd_reference operator--() && {
return std::move(*this) = __ptr_->__get(__index_) - 1;
__simd_reference operator--() && { return std::move(*this) = __ptr_->__get(__index_) - 1; }
_Vp operator--(int) && {
auto __val = __ptr_->__get(__index_);
@ -846,69 +832,37 @@ public:
return __val;
__simd_reference operator+=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) + __value;
__simd_reference operator+=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) + __value; }
__simd_reference operator-=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) - __value;
__simd_reference operator-=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) - __value; }
__simd_reference operator*=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) * __value;
__simd_reference operator*=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) * __value; }
__simd_reference operator/=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) / __value;
__simd_reference operator/=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) / __value; }
__simd_reference operator%=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) % __value;
__simd_reference operator%=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) % __value; }
__simd_reference operator>>=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) >> __value;
__simd_reference operator>>=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) >> __value; }
__simd_reference operator<<=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) << __value;
__simd_reference operator<<=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) << __value; }
__simd_reference operator&=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) & __value;
__simd_reference operator&=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) & __value; }
__simd_reference operator|=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) | __value;
__simd_reference operator|=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) | __value; }
__simd_reference operator^=(_Vp __value) && {
return std::move(*this) = __ptr_->__get(__index_) ^ __value;
__simd_reference operator^=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) ^ __value; }
bool operator<(_Vp __value) const {
return __ptr_->__get(__index_) < __value;
bool operator<(_Vp __value) const { return __ptr_->__get(__index_) < __value; }
bool operator<=(_Vp __value) const {
return __ptr_->__get(__index_) <= __value;
bool operator<=(_Vp __value) const { return __ptr_->__get(__index_) <= __value; }
bool operator>(_Vp __value) const {
return __ptr_->__get(__index_) > __value;
bool operator>(_Vp __value) const { return __ptr_->__get(__index_) > __value; }
bool operator>=(_Vp __value) const {
return __ptr_->__get(__index_) >= __value;
bool operator>=(_Vp __value) const { return __ptr_->__get(__index_) >= __value; }
bool operator==(_Vp __value) const {
return __ptr_->__get(__index_) == __value;
bool operator==(_Vp __value) const { return __ptr_->__get(__index_) == __value; }
bool operator!=(_Vp __value) const {
return __ptr_->__get(__index_) != __value;
bool operator!=(_Vp __value) const { return __ptr_->__get(__index_) != __value; }
template <class _Tp, class _Abi>
@ -922,8 +876,7 @@ class __simd_mask_reference {
__simd_mask_storage<_Tp, _Abi>* __ptr_;
size_t __index_;
__simd_mask_reference(__simd_mask_storage<_Tp, _Abi>* __ptr, size_t __index)
: __ptr_(__ptr), __index_(__index) {}
__simd_mask_reference(__simd_mask_storage<_Tp, _Abi>* __ptr, size_t __index) : __ptr_(__ptr), __index_(__index) {}
__simd_mask_reference(const __simd_mask_reference&) = default;
@ -940,8 +893,7 @@ public:
template <class _To, class _From>
constexpr decltype(_To{std::declval<_From>()}, true)
__is_non_narrowing_convertible_impl(_From) {
constexpr decltype(_To{std::declval<_From>()}, true) __is_non_narrowing_convertible_impl(_From) {
return true;
@ -951,17 +903,13 @@ constexpr bool __is_non_narrowing_convertible_impl(...) {
template <class _From, class _To>
constexpr typename std::enable_if<std::is_arithmetic<_To>::value &&
constexpr typename std::enable_if<std::is_arithmetic<_To>::value && std::is_arithmetic<_From>::value, bool>::type
__is_non_narrowing_arithmetic_convertible() {
return __is_non_narrowing_convertible_impl<_To>(_From{});
template <class _From, class _To>
constexpr typename std::enable_if<!(std::is_arithmetic<_To>::value &&
constexpr typename std::enable_if<!(std::is_arithmetic<_To>::value && std::is_arithmetic<_From>::value), bool>::type
__is_non_narrowing_arithmetic_convertible() {
return false;
@ -983,11 +931,11 @@ struct __nodeduce {
template <class _Tp>
constexpr bool __vectorizable() {
return std::is_arithmetic<_Tp>::value && !std::is_const<_Tp>::value &&
!std::is_volatile<_Tp>::value && !std::is_same<_Tp, bool>::value;
return std::is_arithmetic<_Tp>::value && !std::is_const<_Tp>::value && !std::is_volatile<_Tp>::value &&
!std::is_same<_Tp, bool>::value;
} // namespace zeus::_simd
namespace zeus::_simd::simd_abi {
using scalar = __simd_abi<_StorageKind::_Scalar, 1>;
@ -1006,11 +954,10 @@ template <class _Tp>
using native = __simd_abi<_StorageKind::_VecExt, 16 / sizeof(_Tp)>;
template <class _Tp>
using native =
fixed_size<_Tp, 16 / sizeof(_Tp)>;
using native = fixed_size<_Tp, 16 / sizeof(_Tp)>;
} // namespace zeus::_simd::simd_abi
namespace zeus::_simd {
template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
@ -1032,8 +979,7 @@ template <class _Tp>
struct is_abi_tag : std::integral_constant<bool, false> {};
template <_StorageKind __kind, int _Np>
struct is_abi_tag<__simd_abi<__kind, _Np>>
: std::integral_constant<bool, true> {};
struct is_abi_tag<__simd_abi<__kind, _Np>> : std::integral_constant<bool, true> {};
template <class _Tp>
struct is_simd : std::integral_constant<bool, false> {};
@ -1045,23 +991,19 @@ template <class _Tp>
struct is_simd_mask : std::integral_constant<bool, false> {};
template <class _Tp, class _Abi>
struct is_simd_mask<simd_mask<_Tp, _Abi>> : std::integral_constant<bool, true> {
struct is_simd_mask<simd_mask<_Tp, _Abi>> : std::integral_constant<bool, true> {};
template <class _Tp>
struct is_simd_flag_type : std::integral_constant<bool, false> {};
template <>
struct is_simd_flag_type<element_aligned_tag>
: std::integral_constant<bool, true> {};
struct is_simd_flag_type<element_aligned_tag> : std::integral_constant<bool, true> {};
template <>
struct is_simd_flag_type<vector_aligned_tag>
: std::integral_constant<bool, true> {};
struct is_simd_flag_type<vector_aligned_tag> : std::integral_constant<bool, true> {};
template <size_t _Align>
struct is_simd_flag_type<overaligned_tag<_Align>>
: std::integral_constant<bool, true> {};
struct is_simd_flag_type<overaligned_tag<_Align>> : std::integral_constant<bool, true> {};
template <class _Tp>
inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
@ -1070,8 +1012,7 @@ inline constexpr bool is_simd_v = is_simd<_Tp>::value;
template <class _Tp>
inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
template <class _Tp>
inline constexpr bool is_simd_flag_type_v =
inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
template <class _Tp, size_t _Np>
struct abi_for_size {
using type = simd_abi::fixed_size<_Np>;
@ -1083,11 +1024,8 @@ template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
struct simd_size;
template <class _Tp, _StorageKind __kind, int _Np>
struct simd_size<_Tp, __simd_abi<__kind, _Np>>
: std::integral_constant<size_t, _Np> {
std::is_arithmetic<_Tp>::value &&
!std::is_same<typename std::remove_const<_Tp>::type, bool>::value,
struct simd_size<_Tp, __simd_abi<__kind, _Np>> : std::integral_constant<size_t, _Np> {
static_assert(std::is_arithmetic<_Tp>::value && !std::is_same<typename std::remove_const<_Tp>::type, bool>::value,
"Element type should be vectorizable");
@ -1099,8 +1037,7 @@ template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
template <class _Tp, class _Up = typename _Tp::value_type>
inline constexpr size_t memory_alignment_v =
memory_alignment<_Tp, _Up>::value;
inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
// class template simd [simd.class]
template <class _Tp>
@ -1125,50 +1062,41 @@ struct __static_simd_cast_traits {
template <class _Tp, class _NewAbi>
struct __static_simd_cast_traits<simd<_Tp, _NewAbi>> {
template <class _Up, class _Abi>
static typename std::enable_if<simd<_Up, _Abi>::size() ==
simd<_Tp, _NewAbi>::size(),
simd<_Tp, _NewAbi>>::type
static typename std::enable_if<simd<_Up, _Abi>::size() == simd<_Tp, _NewAbi>::size(), simd<_Tp, _NewAbi>>::type
__apply(const simd<_Up, _Abi>& __v);
template <class _Tp>
struct __simd_cast_traits {
template <class _Up, class _Abi>
static typename std::enable_if<
__is_non_narrowing_arithmetic_convertible<_Up, _Tp>(),
simd<_Tp, _Abi>>::type
static typename std::enable_if<__is_non_narrowing_arithmetic_convertible<_Up, _Tp>(), simd<_Tp, _Abi>>::type
__apply(const simd<_Up, _Abi>& __v);
template <class _Tp, class _NewAbi>
struct __simd_cast_traits<simd<_Tp, _NewAbi>> {
template <class _Up, class _Abi>
static typename std::enable_if<
__is_non_narrowing_arithmetic_convertible<_Up, _Tp>() &&
static typename std::enable_if<__is_non_narrowing_arithmetic_convertible<_Up, _Tp>() &&
simd<_Up, _Abi>::size() == simd<_Tp, _NewAbi>::size(),
simd<_Tp, _NewAbi>>::type
__apply(const simd<_Up, _Abi>& __v);
template <class _Tp, class _Up, class _Abi>
auto simd_cast(const simd<_Up, _Abi>& __v)
-> decltype(__simd_cast_traits<_Tp>::__apply(__v)) {
auto simd_cast(const simd<_Up, _Abi>& __v) -> decltype(__simd_cast_traits<_Tp>::__apply(__v)) {
return __simd_cast_traits<_Tp>::__apply(__v);
template <class _Tp, class _Up, class _Abi>
auto static_simd_cast(const simd<_Up, _Abi>& __v)
-> decltype(__static_simd_cast_traits<_Tp>::__apply(__v)) {
auto static_simd_cast(const simd<_Up, _Abi>& __v) -> decltype(__static_simd_cast_traits<_Tp>::__apply(__v)) {
return __static_simd_cast_traits<_Tp>::__apply(__v);
template <class _Tp, class _Abi>
fixed_size_simd<_Tp, simd_size<_Tp, _Abi>::value>
to_fixed_size(const simd<_Tp, _Abi>&) noexcept;
fixed_size_simd<_Tp, simd_size<_Tp, _Abi>::value> to_fixed_size(const simd<_Tp, _Abi>&) noexcept;
template <class _Tp, class _Abi>
fixed_size_simd_mask<_Tp, simd_size<_Tp, _Abi>::value>
to_fixed_size(const simd_mask<_Tp, _Abi>&) noexcept;
fixed_size_simd_mask<_Tp, simd_size<_Tp, _Abi>::value> to_fixed_size(const simd_mask<_Tp, _Abi>&) noexcept;
template <class _Tp, size_t _Np>
native_simd<_Tp> to_native(const fixed_size_simd<_Tp, _Np>&) noexcept;
@ -1186,26 +1114,21 @@ template <size_t... __sizes, class _Tp, class _Abi>
tuple<simd<_Tp, abi_for_size_t<_Tp, __sizes>>...> split(const simd<_Tp, _Abi>&);
template <size_t... __sizes, class _Tp, class _Abi>
tuple<simd_mask<_Tp, abi_for_size_t<_Tp, __sizes>>...>
split(const simd_mask<_Tp, _Abi>&);
tuple<simd_mask<_Tp, abi_for_size_t<_Tp, __sizes>>...> split(const simd_mask<_Tp, _Abi>&);
template <class _SimdType, class _Abi>
array<_SimdType, simd_size<typename _SimdType::value_type, _Abi>::value /
array<_SimdType, simd_size<typename _SimdType::value_type, _Abi>::value / _SimdType::size()>
split(const simd<typename _SimdType::value_type, _Abi>&);
template <class _SimdType, class _Abi>
array<_SimdType, simd_size<typename _SimdType::value_type, _Abi>::value /
array<_SimdType, simd_size<typename _SimdType::value_type, _Abi>::value / _SimdType::size()>
split(const simd_mask<typename _SimdType::value_type, _Abi>&);
template <class _Tp, class... _Abis>
simd<_Tp, abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>>
concat(const simd<_Tp, _Abis>&...);
simd<_Tp, abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>> concat(const simd<_Tp, _Abis>&...);
template <class _Tp, class... _Abis>
abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>>
simd_mask<_Tp, abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>>
concat(const simd_mask<_Tp, _Abis>&...);
// reductions [simd.mask.reductions]
@ -1239,23 +1162,20 @@ class where_expression;
// masked assignment [simd.mask.where]
template <class _Tp, class _Abi>
where_expression<simd_mask<_Tp, _Abi>, simd<_Tp, _Abi>>
where(const typename simd<_Tp, _Abi>::mask_type&, simd<_Tp, _Abi>&) noexcept;
where_expression<simd_mask<_Tp, _Abi>, simd<_Tp, _Abi>> where(const typename simd<_Tp, _Abi>::mask_type&,
simd<_Tp, _Abi>&) noexcept;
template <class _Tp, class _Abi>
const_where_expression<simd_mask<_Tp, _Abi>, const simd<_Tp, _Abi>>
where(const typename simd<_Tp, _Abi>::mask_type&,
const_where_expression<simd_mask<_Tp, _Abi>, const simd<_Tp, _Abi>> where(const typename simd<_Tp, _Abi>::mask_type&,
const simd<_Tp, _Abi>&) noexcept;
template <class _Tp, class _Abi>
where_expression<simd_mask<_Tp, _Abi>, simd_mask<_Tp, _Abi>>
where(const typename __nodeduce<simd_mask<_Tp, _Abi>>::type&,
simd_mask<_Tp, _Abi>&) noexcept;
where(const typename __nodeduce<simd_mask<_Tp, _Abi>>::type&, simd_mask<_Tp, _Abi>&) noexcept;
template <class _Tp, class _Abi>
const_where_expression<simd_mask<_Tp, _Abi>, const simd_mask<_Tp, _Abi>>
where(const typename __nodeduce<simd_mask<_Tp, _Abi>>::type&,
const simd_mask<_Tp, _Abi>&) noexcept;
where(const typename __nodeduce<simd_mask<_Tp, _Abi>>::type&, const simd_mask<_Tp, _Abi>&) noexcept;
template <class _Tp>
where_expression<bool, _Tp> where(bool, _Tp&) noexcept;
@ -1268,45 +1188,37 @@ template <class _Tp, class _Abi, class _BinaryOp = std::plus<_Tp>>
_Tp reduce(const simd<_Tp, _Abi>&, _BinaryOp = _BinaryOp());
template <class _MaskType, class _SimdType, class _BinaryOp>
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&,
typename _SimdType::value_type neutral_element, _BinaryOp binary_op);
template <class _MaskType, class _SimdType>
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&,
plus<typename _SimdType::value_type> binary_op = {});
template <class _MaskType, class _SimdType>
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&,
multiplies<typename _SimdType::value_type> binary_op);
template <class _MaskType, class _SimdType>
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&,
bit_and<typename _SimdType::value_type> binary_op);
template <class _MaskType, class _SimdType>
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&,
bit_or<typename _SimdType::value_type> binary_op);
template <class _MaskType, class _SimdType>
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&,
bit_xor<typename _SimdType::value_type> binary_op);
template <class _Tp, class _Abi>
_Tp hmin(const simd<_Tp, _Abi>&);
template <class _MaskType, class _SimdType>
typename _SimdType::value_type
hmin(const const_where_expression<_MaskType, _SimdType>&);
typename _SimdType::value_type hmin(const const_where_expression<_MaskType, _SimdType>&);
template <class _Tp, class _Abi>
_Tp hmax(const simd<_Tp, _Abi>&);
template <class _MaskType, class _SimdType>
typename _SimdType::value_type
hmax(const const_where_expression<_MaskType, _SimdType>&);
typename _SimdType::value_type hmax(const const_where_expression<_MaskType, _SimdType>&);
// algorithms [simd.alg]
template <class _Tp, class _Abi>
@ -1316,12 +1228,10 @@ template <class _Tp, class _Abi>
simd<_Tp, _Abi> max(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept;
template <class _Tp, class _Abi>
std::pair<simd<_Tp, _Abi>, simd<_Tp, _Abi>>
minmax(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept;
std::pair<simd<_Tp, _Abi>, simd<_Tp, _Abi>> minmax(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept;
template <class _Tp, class _Abi>
simd<_Tp, _Abi> clamp(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&,
const simd<_Tp, _Abi>&);
simd<_Tp, _Abi> clamp(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&);
// [simd.whereexpr]
// TODO implement where expressions.
@ -1387,6 +1297,7 @@ public:
auto end() { return std::end(__data_); }
auto begin() const { return std::begin(__data_); }
auto end() const { return std::end(__data_); }
value_type __data_[_Simd::size()];
@ -1396,6 +1307,7 @@ template <class _Tp, class _Abi>
class simd {
template <class _Up, class _UAbi>
friend class simd;
using value_type = _Tp;
using reference = __simd_reference<_Tp, _Tp, _Abi>;
@ -1406,34 +1318,25 @@ public:
simd(const simd&) = default;
simd& operator=(const simd&) = default;
static constexpr size_t size() noexcept {
return simd_size<_Tp, _Abi>::value;
static constexpr size_t size() noexcept { return simd_size<_Tp, _Abi>::value; }
__simd_storage<_Tp, _Abi> __s_;
template <class _Up>
static constexpr bool __can_broadcast() {
return (std::is_arithmetic<_Up>::value &&
__is_non_narrowing_arithmetic_convertible<_Up, _Tp>()) ||
(!std::is_arithmetic<_Up>::value &&
std::is_convertible<_Up, _Tp>::value) ||
return (std::is_arithmetic<_Up>::value && __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()) ||
(!std::is_arithmetic<_Up>::value && std::is_convertible<_Up, _Tp>::value) ||
std::is_same<typename std::remove_const<_Up>::type, int>::value ||
(std::is_same<typename std::remove_const<_Up>::type,
unsigned int>::value &&
(std::is_same<typename std::remove_const<_Up>::type, unsigned int>::value && std::is_unsigned<_Tp>::value);
template <class _Generator, size_t... __indicies>
static constexpr decltype(
std::integral_constant<size_t, __indicies>())...),
std::forward_as_tuple(std::declval<_Generator>()(std::integral_constant<size_t, __indicies>())...), bool())
__can_generate(std::index_sequence<__indicies...>) {
return !__variadic_sum<bool>(
std::integral_constant<size_t, __indicies>()))>()...);
!__can_broadcast<decltype(std::declval<_Generator>()(std::integral_constant<size_t, __indicies>()))>()...);
template <class _Generator>
@ -1443,9 +1346,7 @@ private:
template <class _Generator, size_t... __indicies>
void __generator_init(_Generator&& __g, std::index_sequence<__indicies...>) {
int __not_used[]{((*this)[__indicies] =
__g(std::integral_constant<size_t, __indicies>()),
int __not_used[]{((*this)[__indicies] = __g(std::integral_constant<size_t, __indicies>()), 0)...};
@ -1466,8 +1367,8 @@ public:
// implicit type conversion constructor
template <class _Up, class _UAbi,
class = typename std::enable_if<std::is_constructible<
__simd_storage<_Tp, _Abi>, __simd_storage<_Up, _UAbi>>::value>>
class = typename std::enable_if<
std::is_constructible<__simd_storage<_Tp, _Abi>, __simd_storage<_Up, _UAbi>>::value>>
simd(const simd<_Up, _UAbi>& __v) : __s_(__v.__s_) {}
#if 0
@ -1489,28 +1390,19 @@ public:
simd(_Tp __rv) {
simd(_Tp __rv) { __s_.__broadcast(__rv); }
simd(_Tp a, _Tp b, _Tp c = {}, _Tp d = {}) {
__s_.__set4(a, b, c, d);
simd(_Tp a, _Tp b, _Tp c = {}, _Tp d = {}) { __s_.__set4(a, b, c, d); }
// generator constructor
template <class _Generator,
int = typename std::enable_if<
int = typename std::enable_if<__can_generate<_Generator>(std::make_index_sequence<size()>()), int>::type()>
explicit simd(_Generator&& __g) {
__generator_init(std::forward<_Generator>(__g), std::make_index_sequence<size()>());
// load constructor
template <
class _Up, class _Flags,
class = typename std::enable_if<__vectorizable<_Up>()>::type,
template <class _Up, class _Flags, class = typename std::enable_if<__vectorizable<_Up>()>::type,
class = typename std::enable_if<is_simd_flag_type<_Flags>::value>::type>
simd(const _Up* __buffer, _Flags) {
// TODO: optimize for overaligned flags
@ -1529,9 +1421,7 @@ public:
// loads [simd.load]
void copy_from(const simd_data<simd>& __buffer) {
void copy_from(const simd_data<simd>& __buffer) { __s_.__copy_from(__buffer); }
#if 0
// stores [simd.store]
@ -1546,9 +1436,7 @@ public:
// stores [simd.store]
void copy_to(simd_data<simd>& __buffer) const {
void copy_to(simd_data<simd>& __buffer) const { __s_.__copy_to(__buffer); }
// scalar access [simd.subscr]
reference operator[](size_t __i) { return reference(&__s_, __i); }
@ -1606,7 +1494,7 @@ public:
value_type dot3(const simd& other) const { return __s_.__dot3(other.__s_); }
value_type dot4(const simd& other) const { return __s_.__dot4(other.__s_); }
template<int x, int y, int z, int w>
template <int x, int y, int z, int w>
simd shuffle() const {
simd s;
s.__s_ = __s_.template __shuffle<x, y, z, w>();
@ -1624,9 +1512,7 @@ public:
using reference = __simd_mask_reference<_Tp, _Abi>;
using simd_type = simd<_Tp, _Abi>;
using abi_type = _Abi;
static constexpr size_t size() noexcept {
return simd_size<_Tp, _Abi>::value;
static constexpr size_t size() noexcept { return simd_size<_Tp, _Abi>::value; }
simd_mask() = default;
// broadcast constructor
@ -1683,12 +1569,15 @@ public:
template <class _Simd>
inline simd_data<_Simd>::simd_data(const _Simd& s) { s.copy_to(*this); }
inline simd_data<_Simd>::simd_data(const _Simd& s) {
template <class _Tp, int __num_element>
class __simd_storage<_Tp, __simd_abi<_StorageKind::_Array, __num_element>> {
using storage_type = std::array<_Tp, __num_element>;
storage_type __storage_;
@ -1700,34 +1589,26 @@ private:
_Tp __get(size_t __index) const noexcept { return __storage_[__index]; };
void __set(size_t __index, _Tp __val) noexcept {
__storage_[__index] = __val;
void __set(size_t __index, _Tp __val) noexcept { __storage_[__index] = __val; }
std::enable_if_t<__num_element >= 4> __set4(float a, float b, float c, float d) noexcept {
__storage_[0] = a;
__storage_[1] = b;
__storage_[2] = c;
__storage_[3] = d;
void __broadcast(float __val) noexcept {
std::fill(__storage_.begin(), __storage_.end(), __val);
void __broadcast(float __val) noexcept { std::fill(__storage_.begin(), __storage_.end(), __val); }
std::enable_if_t<__num_element >= 2, _Tp> __dot2(const __simd_storage& other) const noexcept {
return __storage_[0] * other.__storage_[0] +
__storage_[1] * other.__storage_[1];
return __storage_[0] * other.__storage_[0] + __storage_[1] * other.__storage_[1];
std::enable_if_t<__num_element >= 3, _Tp> __dot3(const __simd_storage& other) const noexcept {
return __storage_[0] * other.__storage_[0] +
__storage_[1] * other.__storage_[1] +
return __storage_[0] * other.__storage_[0] + __storage_[1] * other.__storage_[1] +
__storage_[2] * other.__storage_[2];
std::enable_if_t<__num_element >= 4, _Tp> __dot4(const __simd_storage& other) const noexcept {
return __storage_[0] * other.__storage_[0] +
__storage_[1] * other.__storage_[1] +
__storage_[2] * other.__storage_[2] +
__storage_[3] * other.__storage_[3];
return __storage_[0] * other.__storage_[0] + __storage_[1] * other.__storage_[1] +
__storage_[2] * other.__storage_[2] + __storage_[3] * other.__storage_[3];
template<int x, int y, int z, int w>
template <int x, int y, int z, int w>
std::enable_if_t<__num_element >= 4, __simd_storage> __shuffle() const noexcept {
__simd_storage s;
s.__storage_[0] = __storage_[x];
@ -1756,13 +1637,10 @@ public:
template <class _Tp, int __num_element>
class __simd_mask_storage<_Tp, __simd_abi<_StorageKind::_Array, __num_element>> {
std::bitset<__num_element> __storage_;
bool __get(size_t __index) const noexcept {
return __storage_.test(__index);
void __set(size_t __index, bool __val) noexcept {
__storage_.set(__index, __val);
bool __get(size_t __index) const noexcept { return __storage_.test(__index); }
void __set(size_t __index, bool __val) noexcept { __storage_.set(__index, __val); }
} // namespace zeus::_simd
@ -1,6 +1,8 @@
#pragma once
namespace zeus::_simd { using namespace std; }
namespace zeus::_simd {
using namespace std;
#include "parallelism_v2_simd.hpp"
#if _M_IX86_FP >= 1 || _M_X64
#define __SSE__ 1
@ -11,16 +13,23 @@ namespace zeus::_simd { using namespace std; }
#include "simd_sse.hpp"
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = fixed_size<4>; };
template<> struct zeus_native<double> { using type = fixed_size<4>; };
template <typename T>
struct zeus_native {};
template <>
struct zeus_native<float> {
using type = fixed_size<4>;
template <>
struct zeus_native<double> {
using type = fixed_size<4>;
} // namespace simd_abi
namespace zeus {
template<typename T> using simd = _simd::simd<T,
typename _simd::simd_abi::zeus_native<T>::type>;
template<typename T>
template <typename T>
using simd = _simd::simd<T, typename _simd::simd_abi::zeus_native<T>::type>;
template <typename T>
using simd_values = _simd::simd_data<simd<T>>;
using simd_floats = simd_values<float>;
using simd_doubles = simd_values<double>;
} // namespace zeus
@ -6,7 +6,7 @@
#include <immintrin.h>
namespace zeus::_simd {
// __m256d storage for AVX
template <>
class __simd_storage<double, m256d_abi> {
using storage_type = __m256d;
@ -22,12 +22,8 @@ public:
sse_data[__index] = __val;
__storage_ = _mm256_load_pd(sse_data.data());
void __set4(double a, double b, double c, double d) noexcept {
__storage_ = _mm256_set_pd(d, c, b, a);
void __broadcast(double __val) noexcept {
__storage_ = _mm256_set1_pd(__val);
void __set4(double a, double b, double c, double d) noexcept { __storage_ = _mm256_set_pd(d, c, b, a); }
void __broadcast(double __val) noexcept { __storage_ = _mm256_set1_pd(__val); }
double __dot2(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
@ -61,7 +57,7 @@ public:
const storage_type& __native() const { return __storage_; }
// __m256d mask storage for AVX
template <>
class __simd_mask_storage<double, m256d_abi> : public __simd_storage<double, m256d_abi> {
bool __get(size_t __index) const noexcept {
@ -83,95 +79,87 @@ inline simd<double, m256d_abi> simd<double, m256d_abi>::operator-() const {
return _mm256_xor_pd(__s_.__storage_, _mm256_set1_pd(-0.0));
inline simd<double, m256d_abi>
operator+(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi> operator+(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<double, m256d_abi>
operator-(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi> operator-(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<double, m256d_abi>
operator*(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi> operator*(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<double, m256d_abi>
operator/(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi> operator/(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<double, m256d_abi>&
operator+=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>& operator+=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
inline simd<double, m256d_abi>&
operator-=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>& operator-=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
inline simd<double, m256d_abi>&
operator*=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>& operator*=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
inline simd<double, m256d_abi>&
operator/=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>& operator/=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
inline simd<double, m256d_abi>::mask_type
operator==(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>::mask_type operator==(const simd<double, m256d_abi>& a,
const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_EQ_OQ);
return ret;
inline simd<double, m256d_abi>::mask_type
operator!=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>::mask_type operator!=(const simd<double, m256d_abi>& a,
const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_NEQ_OQ);
return ret;
inline simd<double, m256d_abi>::mask_type
operator>=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>::mask_type operator>=(const simd<double, m256d_abi>& a,
const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GE_OQ);
return ret;
inline simd<double, m256d_abi>::mask_type
operator<=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>::mask_type operator<=(const simd<double, m256d_abi>& a,
const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LE_OQ);
return ret;
inline simd<double, m256d_abi>::mask_type
operator>(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>::mask_type operator>(const simd<double, m256d_abi>& a,
const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GT_OQ);
return ret;
inline simd<double, m256d_abi>::mask_type
operator<(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
inline simd<double, m256d_abi>::mask_type operator<(const simd<double, m256d_abi>& a,
const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LT_OQ);
return ret;
@ -182,7 +170,10 @@ inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<doub
namespace simd_abi {
template<> struct zeus_native<double> { using type = m256d_abi; };
template <>
struct zeus_native<double> {
using type = m256d_abi;
} // namespace simd_abi
} // namespace zeus::_simd
@ -40,12 +40,8 @@ public:
sse_data[__index] = __val;
__storage_ = _mm_load_ps(sse_data.data());
void __set4(float a, float b, float c, float d) noexcept {
__storage_ = _mm_set_ps(d, c, b, a);
void __broadcast(float __val) noexcept {
__storage_ = _mm_set1_ps(__val);
void __set4(float a, float b, float c, float d) noexcept { __storage_ = _mm_set_ps(d, c, b, a); }
void __broadcast(float __val) noexcept { __storage_ = _mm_set1_ps(__val); }
float __dot2(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
@ -79,7 +75,7 @@ public:
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
template<int x, int y, int z, int w>
template <int x, int y, int z, int w>
__simd_storage __shuffle() const noexcept {
__simd_storage s;
s.__storage_ = _mm_shuffle_ps(__storage_, __storage_, _MM_SHUFFLE(w, z, y, x));
@ -105,8 +101,7 @@ public:
// __m128 mask storage for SSE2+
template <>
class __simd_mask_storage<float, m128_abi> : public __simd_storage<float, m128_abi>
class __simd_mask_storage<float, m128_abi> : public __simd_storage<float, m128_abi> {
bool __get(size_t __index) const noexcept {
alignas(16) uint32_t sse_data[4];
@ -126,95 +121,81 @@ inline simd<float, m128_abi> simd<float, m128_abi>::operator-() const {
return _mm_xor_ps(__s_.__storage_, _mm_set1_ps(-0.f));
inline simd<float, m128_abi>
operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi> operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>
operator-(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi> operator-(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>
operator*(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi> operator*(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>
operator/(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi> operator/(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>&
operator+=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>& operator+=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
inline simd<float, m128_abi>&
operator-=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>& operator-=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
inline simd<float, m128_abi>&
operator*=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>& operator*=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
inline simd<float, m128_abi>&
operator/=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>& operator/=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
inline simd<float, m128_abi>::mask_type
operator==(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>::mask_type operator==(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpeq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>::mask_type
operator!=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>::mask_type operator!=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpneq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>::mask_type
operator>=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>::mask_type operator>=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpge_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>::mask_type
operator<=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>::mask_type operator<=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmple_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>::mask_type
operator>(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>::mask_type operator>(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpgt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
inline simd<float, m128_abi>::mask_type
operator<(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
inline simd<float, m128_abi>::mask_type operator<(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmplt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
@ -308,8 +289,7 @@ public:
// __m128d mask storage for SSE2+
template <>
class __simd_mask_storage<double, m128d_abi> : public __simd_storage<double, m128d_abi>
class __simd_mask_storage<double, m128d_abi> : public __simd_storage<double, m128d_abi> {
bool __get(size_t __index) const noexcept {
alignas(16) uint64_t sse_data[2];
@ -332,108 +312,100 @@ inline simd<double, m128d_abi> simd<double, m128d_abi>::operator-() const {
return ret;
inline simd<double, m128d_abi>
operator+(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi> operator+(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>
operator-(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi> operator-(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>
operator*(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi> operator*(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>
operator/(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi> operator/(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>&
operator+=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>& operator+=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
inline simd<double, m128d_abi>&
operator-=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>& operator-=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
inline simd<double, m128d_abi>&
operator*=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>& operator*=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
inline simd<double, m128d_abi>&
operator/=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>& operator/=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
inline simd<double, m128d_abi>::mask_type
operator==(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>::mask_type operator==(const simd<double, m128d_abi>& a,
const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpeq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>::mask_type
operator!=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>::mask_type operator!=(const simd<double, m128d_abi>& a,
const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpneq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>::mask_type
operator>=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>::mask_type operator>=(const simd<double, m128d_abi>& a,
const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpge_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>::mask_type
operator<=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>::mask_type operator<=(const simd<double, m128d_abi>& a,
const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmple_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>::mask_type
operator>(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>::mask_type operator>(const simd<double, m128d_abi>& a,
const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpgt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
inline simd<double, m128d_abi>::mask_type
operator<(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
inline simd<double, m128d_abi>::mask_type operator<(const simd<double, m128d_abi>& a,
const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmplt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
@ -445,10 +417,17 @@ inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<doub
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = m128_abi; };
template <typename T>
struct zeus_native {};
template <>
struct zeus_native<float> {
using type = m128_abi;
#ifndef __AVX__
template<> struct zeus_native<double> { using type = m128d_abi; };
template <>
struct zeus_native<double> {
using type = m128d_abi;
} // namespace simd_abi
@ -23,4 +23,3 @@
#include "zeus/CColor.hpp"
#include "zeus/Global.hpp"
#include "zeus/Math.hpp"
@ -1,8 +1,7 @@
#include "zeus/CAABox.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
namespace zeus {
const CAABox CAABox::skInvertedBox = CAABox();
const CAABox CAABox::skNullBox = CAABox(CVector3f::skZero, CVector3f::skZero);
} // namespace zeus
@ -1,6 +1,5 @@
#include "zeus/CAxisAngle.hpp"
namespace zeus
namespace zeus {
const CAxisAngle CAxisAngle::sIdentity = {};
@ -115,4 +115,4 @@ void CColor::toHSL(float& h, float& s, float& l) const {
h /= 6.f;
} // namespace zeus
@ -1,11 +1,9 @@
#include "zeus/CEulerAngles.hpp"
#include "zeus/CQuaternion.hpp"
namespace zeus
namespace zeus {
CEulerAngles::CEulerAngles(const CQuaternion& quat)
CEulerAngles::CEulerAngles(const CQuaternion& quat) {
float quatDot = quat.dot(quat);
float t0 = 0.f;
if (quatDot > 0.f)
@ -20,15 +18,12 @@ CEulerAngles::CEulerAngles(const CQuaternion& quat)
double t5 = t0 * quat.z() * quat.y() + t0 * quat.x() * quat.w();
if (std::abs(t4) > 0.00001)
if (std::abs(t4) > 0.00001) {
x() = -std::atan2(-t5, t4);
y() = -std::atan2(t0 * quat.z() * quat.x() - t0 * quat.y() * quat.w(),
1.0 - (t0 * quat.x() * quat.x() + t0 * quat.y() * quat.y()));
z() = -std::atan2(t2, t1);
} else {
x() = -std::atan2(-t5, t4);
y() = -std::atan2(-(t0 * quat.z() * quat.x() + t0 * quat.y() * quat.w()),
1.0 - (t0 * quat.y() * quat.y() + t0 * quat.z() * quat.z()));
@ -36,17 +31,14 @@ CEulerAngles::CEulerAngles(const CQuaternion& quat)
CEulerAngles::CEulerAngles(const CTransform& xf)
CEulerAngles::CEulerAngles(const CTransform& xf) {
float xyMagSq = xf.basis[1][1] * xf.basis[1][1] + xf.basis[1][0] * xf.basis[1][0];
float f1 = 0.f;
if (xyMagSq > 0.f)
if (xyMagSq > 0.f) {
f1 = 1.f / std::sqrt(xyMagSq);
float f0;
for (int i=0 ; i<4 ; ++i)
for (int i = 0; i < 4; ++i) {
f0 = f1 * f1;
f1 *= 0.5f;
f0 = 3.f - xyMagSq * f0;
@ -56,18 +48,15 @@ CEulerAngles::CEulerAngles(const CTransform& xf)
f1 = xyMagSq * f0;
if (std::fabs(f1) >= 0.00001)
if (std::fabs(f1) >= 0.00001) {
x() = -std::atan2(-xf.basis[1][2], f1);
y() = -std::atan2(xf.basis[0][2], xf.basis[2][2]);
z() = -std::atan2(xf.basis[1][0], xf.basis[1][1]);
} else {
x() = -std::atan2(-xf.basis[1][2], f1);
y() = -std::atan2(-xf.basis[2][0], xf.basis[0][0]);
z() = 0.f;
} // namespace zeus
@ -85,4 +85,4 @@ bool CFrustum::pointFrustumTest(const CVector3f& point) const {
return true;
} // namespace zeus
@ -35,11 +35,11 @@ void CMatrix3f::transpose() {
m[1].mSimd = _mm_movehl_ps(T2, T0);
m[2].mSimd = _mm_movelh_ps(T1, T3);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]);
float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]);
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
m[0].mSimd = T0.val[0];
m[1].mSimd = T0.val[1];
@ -70,11 +70,11 @@ CMatrix3f CMatrix3f::transposed() const {
__m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3));
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]);
float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]);
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
return CMatrix3f(T0.val[0], T0.val[1], T1.val[0]);
@ -111,4 +111,4 @@ CMatrix3f CMatrix3f::inverted() const {
(m[0][1] * m[1][2] - m[0][2] * m[1][1]) * det, -(m[0][0] * m[1][2] - m[0][2] * m[1][0]) * det,
(m[0][0] * m[1][1] - m[0][1] * m[1][0]) * det);
} // namespace zeus
@ -1,12 +1,10 @@
#include "zeus/CMatrix4f.hpp"
namespace zeus
namespace zeus {
const CMatrix4f CMatrix4f::skIdentityMatrix4f = CMatrix4f();
CMatrix4f CMatrix4f::transposed() const
CMatrix4f CMatrix4f::transposed() const {
CMatrix4f ret;
#if __SSE__
__m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
@ -18,11 +16,11 @@ CMatrix4f CMatrix4f::transposed() const
ret.m[2].mSimd = _mm_movelh_ps(T1, T3);
ret.m[3].mSimd = _mm_movehl_ps(T3, T1);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]);
float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]);
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
ret.m[0].mSimd = T0.val[0];
ret.m[1].mSimd = T0.val[1];
@ -51,4 +49,4 @@ CMatrix4f CMatrix4f::transposed() const
return ret;
} // namespace zeus
@ -6,14 +6,8 @@ CAABox COBBox::calculateAABox(const CTransform& worldXf) const {
CAABox ret = CAABox::skInvertedBox;
CTransform trans = worldXf * transform;
static const CVector3f basis[8] = {{1.f, 1.f, 1.f},
{1.f, 1.f, -1.f},
{1.f, -1.f, 1.f},
{1.f, -1.f, -1.f},
{-1.f, -1.f, -1.f},
{-1.f, -1.f, 1.f},
{-1.f, 1.f, -1.f},
{-1.f, 1.f, 1.f}};
static const CVector3f basis[8] = {{1.f, 1.f, 1.f}, {1.f, 1.f, -1.f}, {1.f, -1.f, 1.f}, {1.f, -1.f, -1.f},
{-1.f, -1.f, -1.f}, {-1.f, -1.f, 1.f}, {-1.f, 1.f, -1.f}, {-1.f, 1.f, 1.f}};
CVector3f p = extents * basis[0];
ret.accumulateBounds(trans * p);
p = extents * basis[1];
@ -36,9 +30,7 @@ CAABox COBBox::calculateAABox(const CTransform& worldXf) const {
bool COBBox::OBBIntersectsBox(const COBBox& other) const {
CVector3f v = other.transform.origin - transform.origin;
CVector3f T = CVector3f(v.dot(transform.basis[0]),
CVector3f T = CVector3f(v.dot(transform.basis[0]), v.dot(transform.basis[1]), v.dot(transform.basis[2]));
CMatrix3f R;
@ -50,8 +42,7 @@ bool COBBox::OBBIntersectsBox(const COBBox& other) const {
for (int i = 0; i < 3; ++i) {
ra = extents[i];
rb = (other.extents[0] * std::fabs(R[i][0])) +
(other.extents[1] * std::fabs(R[i][1])) +
rb = (other.extents[0] * std::fabs(R[i][0])) + (other.extents[1] * std::fabs(R[i][1])) +
(other.extents[2] * std::fabs(R[i][2]));
t = std::fabs(T[i]);
@ -60,9 +51,7 @@ bool COBBox::OBBIntersectsBox(const COBBox& other) const {
for (int k = 0; k < 3; ++k) {
ra = (extents[0] * std::fabs(R[0][k])) +
(extents[1] * std::fabs(R[1][k])) +
(extents[2] * std::fabs(R[2][k]));
ra = (extents[0] * std::fabs(R[0][k])) + (extents[1] * std::fabs(R[1][k])) + (extents[2] * std::fabs(R[2][k]));
rb = other.extents[k];
t = std::fabs(T[0] * R[0][k] + T[1] * R[1][k] + T[2] * R[2][k]);
@ -137,4 +126,4 @@ bool COBBox::OBBIntersectsBox(const COBBox& other) const {
return true;
} // namespace zeus
@ -13,4 +13,4 @@ bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CV
return true;
} // namespace zeus
@ -65,4 +65,4 @@ void CProjection::_updateCachedMatrix() {
m_mtx.m[3][3] = 0.0f;
} // namespace zeus
@ -83,13 +83,9 @@ CQuaternion& CQuaternion::operator=(const CQuaternion& q) {
return *this;
CQuaternion CQuaternion::operator+(const CQuaternion& q) const {
return mSimd + q.mSimd;
CQuaternion CQuaternion::operator+(const CQuaternion& q) const { return mSimd + q.mSimd; }
CQuaternion CQuaternion::operator-(const CQuaternion& q) const {
return mSimd - q.mSimd;
CQuaternion CQuaternion::operator-(const CQuaternion& q) const { return mSimd - q.mSimd; }
CQuaternion CQuaternion::operator*(const CQuaternion& q) const {
return CQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
@ -111,17 +107,11 @@ CQuaternion CQuaternion::operator/(const CQuaternion& q) const {
return *this * p;
CQuaternion CQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
CQuaternion CQuaternion::operator*(float scale) const { return mSimd * simd<float>(scale); }
CNUQuaternion CNUQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
CNUQuaternion CNUQuaternion::operator*(float scale) const { return mSimd * simd<float>(scale); }
CQuaternion CQuaternion::operator/(float scale) const {
return mSimd / simd<float>(scale);
CQuaternion CQuaternion::operator/(float scale) const { return mSimd / simd<float>(scale); }
CQuaternion CQuaternion::operator-() const { return -mSimd; }
@ -163,9 +153,7 @@ const CQuaternion& CQuaternion::operator/=(float scale) {
static const simd<float> InvertQuat(1.f, -1.f, -1.f, -1.f);
void CQuaternion::invert() {
mSimd *= InvertQuat;
void CQuaternion::invert() { mSimd *= InvertQuat; }
CQuaternion CQuaternion::inverse() const { return mSimd * InvertQuat; }
@ -269,21 +257,13 @@ CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b,
return zeus::CQuaternion::slerp((b.dot(a) >= 0.f) ? a : a.buildEquivalent(), b, t);
CQuaternion operator+(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) + rhs.mSimd;
CQuaternion operator+(float lhs, const CQuaternion& rhs) { return simd<float>(lhs) + rhs.mSimd; }
CQuaternion operator-(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) - rhs.mSimd;
CQuaternion operator-(float lhs, const CQuaternion& rhs) { return simd<float>(lhs) - rhs.mSimd; }
CQuaternion operator*(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) * rhs.mSimd;
CQuaternion operator*(float lhs, const CQuaternion& rhs) { return simd<float>(lhs) * rhs.mSimd; }
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs) {
return simd<float>(lhs) * rhs.mSimd;
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs) { return simd<float>(lhs) * rhs.mSimd; }
CQuaternion CQuaternion::buildEquivalent() const {
float tmp = std::acos(clamp(-1.f, w(), 1.f)) * 2.f;
@ -322,9 +302,9 @@ CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f
return skNoRotation;
float realAngle =
zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z()) - std::acos(source.z())), maxAng.asRadians());
float realAngle = zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z()) - std::acos(source.z())),
return CQuaternion::fromAxisAngle(tmp.cross(CVector3f::skUp), -realAngle) * q;
} // namespace zeus
@ -62,4 +62,4 @@ CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f
ret.origin = origin;
return ret;
} // namespace zeus
@ -46,4 +46,4 @@ CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t) {
return a;
} // namespace zeus
@ -59,4 +59,4 @@ CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t) {
return a;
} // namespace zeus
@ -10,4 +10,4 @@ CVector4f& CVector4f::operator=(const CColor& other) {
mSimd = other.mSimd;
return *this;
} // namespace zeus
@ -45,38 +45,38 @@ void detectCPU() {
int regs[4];
getCpuInfo(0, regs);
int highestFeature = regs[0];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor) = regs[1];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 4) = regs[3];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 8) = regs[2];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor) = regs[1];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 4) = regs[3];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 8) = regs[2];
getCpuInfo(0x80000000, regs);
if (regs[0] >= 0x80000004) {
for (unsigned int i = 0x80000002; i <= 0x80000004; i++) {
getCpuInfo(i, regs);
// Interpret CPU brand string and cache information.
if (i == 0x80000002)
memcpy((char*) g_cpuFeatures.cpuBrand, regs, sizeof(regs));
memcpy((char*)g_cpuFeatures.cpuBrand, regs, sizeof(regs));
else if (i == 0x80000003)
memcpy((char*) g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs));
memcpy((char*)g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs));
else if (i == 0x80000004)
memcpy((char*) g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
memcpy((char*)g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
if (highestFeature >= 1) {
getCpuInfo(1, regs);
memset((bool*) &g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1);
memset((bool*) &g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1);
memset((bool*) &g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1);
memset((bool*)&g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1);
memset((bool*)&g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1);
memset((bool*)&g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1);
if (highestFeature >= 7) {
getCpuInfoEx(7, 0, regs);
memset((bool*) &g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1);
memset((bool*)&g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1);
isCPUInit = true;
@ -94,56 +94,55 @@ std::pair<bool, const CPUInfo&> validateCPU() {
#if __AVX2__
if (!g_cpuFeatures.AVX2) {
*(bool*) &g_missingFeatures.AVX2 = true;
*(bool*)&g_missingFeatures.AVX2 = true;
ret = false;
#if __AVX__
if (!g_cpuFeatures.AVX) {
*(bool*) &g_missingFeatures.AVX = true;
*(bool*)&g_missingFeatures.AVX = true;
ret = false;
#if __SSE4A__
if (!g_cpuFeatures.SSE4a)
*(bool*) &g_missingFeatures.SSE4a = true;
if (!g_cpuFeatures.SSE4a) {
*(bool*)&g_missingFeatures.SSE4a = true;
ret = false;
#if __SSE4_2__
if (!g_cpuFeatures.SSE42) {
*(bool*) &g_missingFeatures.SSE42 = true;
*(bool*)&g_missingFeatures.SSE42 = true;
ret = false;
#if __SSE4_1__
if (!g_cpuFeatures.SSE41) {
*(bool*) &g_missingFeatures.SSE41 = true;
*(bool*)&g_missingFeatures.SSE41 = true;
ret = false;
#if __SSSE3__
if (!g_cpuFeatures.SSSE3) {
*(bool*) &g_missingFeatures.SSSE3 = true;
*(bool*)&g_missingFeatures.SSSE3 = true;
ret = false;
#if __SSE3__
if (!g_cpuFeatures.SSE3) {
*(bool*) &g_missingFeatures.SSE3 = true;
*(bool*)&g_missingFeatures.SSE3 = true;
ret = false;
#if __SSE2__
if (!g_cpuFeatures.SSE2) {
*(bool*) &g_missingFeatures.SSE2 = true;
*(bool*)&g_missingFeatures.SSE2 = true;
ret = false;
#if __SSE__
if (!g_cpuFeatures.SSE1) {
*(bool*) &g_missingFeatures.SSE1 = true;
*(bool*)&g_missingFeatures.SSE1 = true;
ret = false;
@ -173,8 +172,7 @@ CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3
return CTransform(rmBasis, pos);
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b,
const CVector3f& c, const CVector3f& d, float t) {
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
const float omt = 1.f - t;
return ((a * omt + b * t) * omt + (b * omt + c * t) * t) * omt +
((b * omt + c * t) * omt + (c * omt + d * t) * t) * t;
@ -221,12 +219,11 @@ float getCatmullRomSplinePoint(float a, float b, float c, float d, float t) {
const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) + d * (0.5f * t3 - 0.5f * t2));
getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d,
float t) {
if (t <= 0.0f)
return b;
if (t >= 1.0f)
@ -236,12 +233,11 @@ getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f
const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) + d * (0.5f * t3 - 0.5f * t2));
getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d,
float t) {
if (t >= 0.0f)
return b;
if (t <= 1.0f)
@ -272,22 +268,20 @@ CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f&
bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon) {
return std::fabs(a.x() - b.x()) < epsilon &&
std::fabs(a.y() - b.y()) < epsilon &&
std::fabs(a.z() - b.z()) < epsilon;
return std::fabs(a.x() - b.x()) < epsilon && std::fabs(a.y() - b.y()) < epsilon && std::fabs(a.z() - b.z()) < epsilon;
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon) {
return std::fabs(a.x() - b.x()) < epsilon && std::fabs(a.y() - b.y()) < epsilon;
template <>
CVector3f min(const CVector3f& a, const CVector3f& b) {
return {min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z())};
template <>
CVector3f max(const CVector3f& a, const CVector3f& b) {
return {max(a.x(), b.x()), max(a.y(), b.y()), max(a.z(), b.z())};
} // namespace zeus
@ -6,15 +6,13 @@
using namespace zeus;
union Color {
struct {
zeus::Comp8 r, g, b, a;
zeus::Comp32 rgba;
int main()
int main() {
assert(!CAABox({100, 100, 100}, {100, 100, 100}).invalid());
@ -75,7 +73,8 @@ int main()
ctest1.fromHSV(0, 255 / 255.f, .5);
float h, s, v;
ctest1.toHSV(h, s, v);
std::cout << (int)ctest1.r() << " " << (int)ctest1.g() << " " << (int)ctest1.b() << " " << (int)ctest1.a() << std::endl;
std::cout << (int)ctest1.r() << " " << (int)ctest1.g() << " " << (int)ctest1.b() << " " << (int)ctest1.a()
<< std::endl;
std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a() / 255.f) << std::endl;
return 0;
Reference in New Issue