SIMD refactor

This commit is contained in:
Jack Andersen 2018-12-07 15:16:50 -10:00
parent d881e58f62
commit e8dfecbb6e
49 changed files with 6047 additions and 4721 deletions

View File

@ -1,5 +1,5 @@
---
IndentWidth: 4
IndentWidth: 2
ColumnLimit: 128
UseTab: Never
---

View File

@ -40,7 +40,6 @@ add_library(zeus
include/zeus/CColor.hpp
include/zeus/Global.hpp
include/zeus/zeus.hpp
include/zeus/TVectorUnion.hpp
include/zeus/CVector2i.hpp
include/zeus/CVector2f.hpp
include/zeus/CVector3f.hpp
@ -56,7 +55,11 @@ add_library(zeus
include/zeus/CSphere.hpp
include/zeus/CUnitVector.hpp
include/zeus/CMRay.hpp
include/zeus/CEulerAngles.hpp)
include/zeus/CEulerAngles.hpp
include/zeus/simd/simd.hpp
include/zeus/simd/simd_sse.hpp
include/zeus/simd/simd_avx.hpp
include/zeus/simd/parallelism_v2_simd.hpp)
add_subdirectory(test)

View File

@ -6,19 +6,17 @@
#include "zeus/CLineSeg.hpp"
#include "zeus/CSphere.hpp"
#include "zeus/Math.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
namespace zeus
{
class alignas(16) CAABox
{
namespace zeus {
class CAABox {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
enum class EBoxEdgeId
{
enum class EBoxEdgeId {
Z0,
X0,
Z1,
@ -33,8 +31,7 @@ public:
Y3
};
enum class EBoxFaceID
{
enum class EBoxFaceID {
};
static const CAABox skInvertedBox;
@ -51,17 +48,17 @@ public:
CAABox(float min, float max) : min(CVector3f(min)), max(CVector3f(max)) {}
CAABox(float minX, float minY, float minZ, float maxX, float maxY, float maxZ)
: min(minX, minY, minZ), max(maxX, maxY, maxZ)
{
: min(minX, minY, minZ), max(maxX, maxY, maxZ) {
}
#if ZE_ATHENA_TYPES
inline void readBoundingBoxBig(athena::io::IStreamReader& in)
{
void readBoundingBoxBig(athena::io::IStreamReader& in) {
min.readBig(in);
max.readBig(in);
}
static inline CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in)
{
static CAABox ReadBoundingBoxBig(athena::io::IStreamReader& in) {
CAABox ret;
ret.readBoundingBoxBig(in);
return ret;
@ -69,18 +66,13 @@ public:
#endif
float distanceFromPointSquared(const CVector3f& other) const
{
float distanceFromPointSquared(const CVector3f& other) const {
float dist = 0;
for (int i = 0; i < 3; i++)
{
if (other[i] < min[i])
{
for (int i = 0; i < 3; i++) {
if (other[i] < min[i]) {
const float tmp = (min[i] - other[i]);
dist += tmp * tmp;
}
else if (other[i] > max[i])
{
} else if (other[i] > max[i]) {
const float tmp = (other[i] - max[i]);
dist += tmp * tmp;
}
@ -91,8 +83,7 @@ public:
float distanceFromPoint(const CVector3f& other) const { return std::sqrt(distanceFromPointSquared(other)); }
inline bool intersects(const CAABox& other) const
{
bool intersects(const CAABox& other) const {
bool x1 = (max[0] >= other.min[0]);
bool x2 = (min[0] <= other.max[0]);
bool y1 = (max[1] >= other.min[1]);
@ -102,41 +93,30 @@ public:
return x1 && x2 && y1 && y2 && z1 && z2;
}
bool intersects(const CSphere& other) const
{
bool intersects(const CSphere& other) const {
return distanceFromPointSquared(other.position) <= other.radius * other.radius;
}
float intersectionRadius(const CSphere& other) const
{
float intersectionRadius(const CSphere& other) const {
float dist = distanceFromPoint(other.position);
return (dist < other.radius) ? dist : -1.f;
}
inline CAABox booleanIntersection(const CAABox& other) const
{
CAABox booleanIntersection(const CAABox& other) const {
CVector3f minVec = CVector3f::skZero;
CVector3f maxVec = CVector3f::skZero;
for (int i = 0; i < 3; ++i)
{
if (min[i] <= other.min[i] && max[i] >= other.max[i])
{
for (int i = 0; i < 3; ++i) {
if (min[i] <= other.min[i] && max[i] >= other.max[i]) {
minVec[i] = other.min[i];
maxVec[i] = other.max[i];
}
else if (other.min[i] <= min[i] && other.max[i] >= max[i])
{
} else if (other.min[i] <= min[i] && other.max[i] >= max[i]) {
minVec[i] = min[i];
maxVec[i] = max[i];
}
else if (other.min[i] <= min[i] && other.max[i] >= min[i])
{
} else if (other.min[i] <= min[i] && other.max[i] >= min[i]) {
minVec[i] = min[i];
maxVec[i] = other.max[i];
}
else if (other.min[i] <= max[i] && other.max[i] >= max[i])
{
} else if (other.min[i] <= max[i] && other.max[i] >= max[i]) {
minVec[i] = other.min[i];
maxVec[i] = max[i];
}
@ -145,75 +125,73 @@ public:
return {minVec, maxVec};
}
inline bool inside(const CAABox& other) const
{
bool inside(const CAABox& other) const {
bool x = min[0] >= other.min[0] && max[0] <= other.max[0];
bool y = min[1] >= other.min[1] && max[1] <= other.max[1];
bool z = min[2] >= other.min[2] && max[2] <= other.max[2];
return x && y && z;
}
inline bool insidePlane(const CPlane& plane) const
{
bool insidePlane(const CPlane& plane) const {
CVector3f vmax;
/* X axis */
if (plane.a >= 0)
if (plane.x() >= 0.f)
vmax[0] = max[0];
else
vmax[0] = min[0];
/* Y axis */
if (plane.b >= 0)
if (plane.y() >= 0.f)
vmax[1] = max[1];
else
vmax[1] = min[1];
/* Z axis */
if (plane.c >= 0)
if (plane.z() >= 0.f)
vmax[2] = max[2];
else
vmax[2] = min[2];
return plane.vec.dot(vmax) + plane.d >= 0.f;
return plane.normal().dot(vmax) + plane.d() >= 0.f;
}
CVector3f center() const { return (min + max) * 0.5f; }
CVector3f extents() const { return (max - min) * 0.5f; }
float volume() const { return (max.x - min.x) * (max.y - min.y) * (max.z - min.z); }
float volume() const {
auto delta = max - min;
return delta.x() * delta.y() * delta.z();
}
inline CLineSeg getEdge(EBoxEdgeId id) const
{
switch (id)
{
CLineSeg getEdge(EBoxEdgeId id) const {
switch (id) {
case EBoxEdgeId::Z0:
default:
return CLineSeg({min.x, min.y, max.z}, {min.x, min.y, min.z});
return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), min.y(), min.z()});
case EBoxEdgeId::X0:
return CLineSeg({min.x, min.y, min.z}, {max.x, min.y, min.z});
return CLineSeg({min.x(), min.y(), min.z()}, {max.x(), min.y(), min.z()});
case EBoxEdgeId::Z1:
return CLineSeg({max.x, min.y, min.z}, {max.x, min.y, max.z});
return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), min.y(), max.z()});
case EBoxEdgeId::X1:
return CLineSeg({max.x, min.y, max.z}, {min.x, min.y, max.z});
return CLineSeg({max.x(), min.y(), max.z()}, {min.x(), min.y(), max.z()});
case EBoxEdgeId::Z2:
return CLineSeg({max.x, max.y, max.z}, {max.x, max.y, min.z});
return CLineSeg({max.x(), max.y(), max.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::X2:
return CLineSeg({max.x, max.y, min.z}, {min.x, max.y, min.z});
return CLineSeg({max.x(), max.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Z3:
return CLineSeg({min.x, max.y, min.z}, {min.x, max.y, max.z});
return CLineSeg({min.x(), max.y(), min.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::X3:
return CLineSeg({min.x, max.y, max.z}, {max.x, max.y, max.z});
return CLineSeg({min.x(), max.y(), max.z()}, {max.x(), max.y(), max.z()});
case EBoxEdgeId::Y0:
return CLineSeg({min.x, min.y, max.z}, {min.x, max.y, max.z});
return CLineSeg({min.x(), min.y(), max.z()}, {min.x(), max.y(), max.z()});
case EBoxEdgeId::Y1:
return CLineSeg({min.x, min.y, min.z}, {min.x, max.y, min.z});
return CLineSeg({min.x(), min.y(), min.z()}, {min.x(), max.y(), min.z()});
case EBoxEdgeId::Y2:
return CLineSeg({max.x, min.y, min.z}, {max.x, max.y, min.z});
return CLineSeg({max.x(), min.y(), min.z()}, {max.x(), max.y(), min.z()});
case EBoxEdgeId::Y3:
return CLineSeg({max.x, min.y, max.z}, {max.x, max.y, max.z});
return CLineSeg({max.x(), min.y(), max.z()}, {max.x(), max.y(), max.z()});
}
}
inline CAABox getTransformedAABox(const CTransform& xfrm) const
{
CAABox getTransformedAABox(const CTransform& xfrm) const {
CAABox box;
CVector3f point = xfrm * getPoint(0);
box.accumulateBounds(point);
@ -234,97 +212,81 @@ public:
return box;
}
inline void accumulateBounds(const CVector3f& point)
{
if (min.x > point.x)
min.x = point.x;
if (min.y > point.y)
min.y = point.y;
if (min.z > point.z)
min.z = point.z;
if (max.x < point.x)
max.x = point.x;
if (max.y < point.y)
max.y = point.y;
if (max.z < point.z)
max.z = point.z;
void accumulateBounds(const CVector3f& point) {
if (min.x() > point.x())
min.x() = point.x();
if (min.y() > point.y())
min.y() = point.y();
if (min.z() > point.z())
min.z() = point.z();
if (max.x() < point.x())
max.x() = point.x();
if (max.y() < point.y())
max.y() = point.y();
if (max.z() < point.z())
max.z() = point.z();
}
inline void accumulateBounds(const CAABox& other)
{
void accumulateBounds(const CAABox& other) {
accumulateBounds(other.min);
accumulateBounds(other.max);
}
inline bool pointInside(const CVector3f& other) const
{
return (min.x <= other.x && other.x <= max.x &&
min.y <= other.y && other.y <= max.y &&
min.z <= other.z && other.z <= max.z);
bool pointInside(const CVector3f& other) const {
return (min.x() <= other.x() && other.x() <= max.x() &&
min.y() <= other.y() && other.y() <= max.y() &&
min.z() <= other.z() && other.z() <= max.z());
}
inline CVector3f closestPointAlongVector(const CVector3f& other) const
{
return {(other.x >= 0.f ? min.x : max.x),
(other.y >= 0.f ? min.y : max.y),
(other.z >= 0.f ? min.z : max.z)};
CVector3f closestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? min.x() : max.x()),
(other.y() >= 0.f ? min.y() : max.y()),
(other.z() >= 0.f ? min.z() : max.z())};
}
inline CVector3f furthestPointAlongVector(const CVector3f& other) const
{
return {(other.x >= 0.f ? max.x : min.x),
(other.y >= 0.f ? max.y : min.y),
(other.z >= 0.f ? max.z : min.z)};
CVector3f furthestPointAlongVector(const CVector3f& other) const {
return {(other.x() >= 0.f ? max.x() : min.x()),
(other.y() >= 0.f ? max.y() : min.y()),
(other.z() >= 0.f ? max.z() : min.z())};
}
inline float distanceBetween(const CAABox& other)
{
float distanceBetween(const CAABox& other) {
int intersects = 0;
if (max.x >= other.min.x && min.x <= other.max.x)
if (max.x() >= other.min.x() && min.x() <= other.max.x())
intersects |= 0x1;
if (max.y >= other.min.y && min.y <= other.max.y)
if (max.y() >= other.min.y() && min.y() <= other.max.y())
intersects |= 0x2;
if (max.z >= other.min.z && min.z <= other.max.z)
if (max.z() >= other.min.z() && min.z() <= other.max.z())
intersects |= 0x4;
float minX, maxX;
if (max.x < other.min.x)
{
minX = max.x;
maxX = other.min.x;
}
else
{
minX = min.x;
maxX = other.max.x;
if (max.x() < other.min.x()) {
minX = max.x();
maxX = other.min.x();
} else {
minX = min.x();
maxX = other.max.x();
}
float minY, maxY;
if (max.y < other.min.y)
{
minY = max.y;
maxY = other.min.y;
}
else
{
minY = min.y;
maxY = other.max.y;
if (max.y() < other.min.y()) {
minY = max.y();
maxY = other.min.y();
} else {
minY = min.y();
maxY = other.max.y();
}
float minZ, maxZ;
if (max.z < other.min.z)
{
minZ = max.z;
maxZ = other.min.z;
}
else
{
minZ = min.z;
maxZ = other.max.z;
if (max.z() < other.min.z()) {
minZ = max.z();
maxZ = other.min.z();
} else {
minZ = min.z();
maxZ = other.max.z();
}
switch (intersects)
{
switch (intersects) {
case 0:
return zeus::CVector3f(maxX - minX, maxY - minY, maxZ - minZ).magnitude();
case 1:
@ -345,72 +307,65 @@ public:
}
}
inline CVector3f getPoint(const int point) const
{
CVector3f getPoint(const int point) const {
const CVector3f* vecs = &min;
return CVector3f(vecs[(point & 1) != 0].x, vecs[(point & 2) != 0].y, vecs[(point & 4) != 0].z);
return CVector3f(vecs[(point & 1) != 0].x(), vecs[(point & 2) != 0].y(), vecs[(point & 4) != 0].z());
}
inline CVector3f clampToBox(const CVector3f& vec)
{
CVector3f clampToBox(const CVector3f& vec) const {
CVector3f ret = vec;
clamp(min.x, ret.x, max.x);
clamp(min.y, ret.y, max.y);
clamp(min.z, ret.z, max.z);
ret.x() = clamp(min.x(), float(ret.x()), max.x());
ret.y() = clamp(min.y(), float(ret.y()), max.y());
ret.z() = clamp(min.z(), float(ret.z()), max.z());
return ret;
}
inline void splitX(CAABox& negX, CAABox& posX) const
{
float midX = (max.x - min.x) * .5f + min.x;
void splitX(CAABox& negX, CAABox& posX) const {
float midX = (max.x() - min.x()) * .5f + min.x();
posX.max = max;
posX.min = min;
posX.min.x = midX;
posX.min.x() = midX;
negX.max = max;
negX.max.x = midX;
negX.max.x() = midX;
negX.min = min;
}
inline void splitY(CAABox& negY, CAABox& posY) const
{
float midY = (max.y - min.y) * .5f + min.y;
void splitY(CAABox& negY, CAABox& posY) const {
float midY = (max.y() - min.y()) * .5f + min.y();
posY.max = max;
posY.min = min;
posY.min.y = midY;
posY.min.y() = midY;
negY.max = max;
negY.max.y = midY;
negY.max.y() = midY;
negY.min = min;
}
inline void splitZ(CAABox& negZ, CAABox& posZ) const
{
float midZ = (max.z - min.z) * .5f + min.z;
void splitZ(CAABox& negZ, CAABox& posZ) const {
float midZ = (max.z() - min.z()) * .5f + min.z();
posZ.max = max;
posZ.min = min;
posZ.min.z = midZ;
posZ.min.z() = midZ;
negZ.max = max;
negZ.max.z = midZ;
negZ.max.z() = midZ;
negZ.min = min;
}
inline bool invalid() { return (max.x < min.x || max.y < min.y || max.z < min.z); }
bool invalid() { return (max.x() < min.x() || max.y() < min.y() || max.z() < min.z()); }
inline float operator[](size_t idx) const
{
float operator[](size_t idx) const {
assert(idx < 6);
if (idx < 3)
return min[idx];
else
return max[idx-3];
return max[idx - 3];
}
};
inline bool operator==(const CAABox& left, const CAABox& right)
{
inline bool operator==(const CAABox& left, const CAABox& right) {
return (left.min == right.min && left.max == right.max);
}
inline bool operator!=(const CAABox& left, const CAABox& right)
{
inline bool operator!=(const CAABox& left, const CAABox& right) {
return (left.min != right.min || left.max != right.max);
}
}

View File

@ -4,21 +4,14 @@
#include "zeus/CVector3f.hpp"
#include "CUnitVector.hpp"
namespace zeus
{
struct alignas(16) CAxisAngle : CVector3f
{
ZE_DECLARE_ALIGNED_ALLOCATOR();
namespace zeus {
struct CAxisAngle : CVector3f {
CAxisAngle() = default;
CAxisAngle(float x, float y, float z) : CVector3f(x, y, z) {}
CAxisAngle(const CUnitVector3f& axis, float angle) : CVector3f(angle * axis) {}
CAxisAngle(const CVector3f& axisAngle) : CVector3f(axisAngle) {}
float angle() const { return magnitude(); }
const CVector3f& getVector() const { return *this; }
static const CAxisAngle sIdentity;
};
}

View File

@ -2,11 +2,15 @@
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#include "CVector4f.hpp"
#if ZE_ATHENA_TYPES
#include <athena/FileReader.hpp>
#include <athena/FileWriter.hpp>
#include "athena/FileReader.hpp"
#include "athena/FileWriter.hpp"
#endif
#include <iostream>
#include <cassert>
@ -20,15 +24,13 @@
#define COLOR(rgba) rgba
#endif
namespace zeus
{
namespace zeus {
typedef uint8_t Comp8;
typedef uint32_t Comp32;
constexpr float OneOver255 = 1.f / 255.f;
typedef union {
struct
{
struct {
Comp8 r, g, b, a;
};
Comp32 rgba;
@ -36,11 +38,9 @@ typedef union {
class CVector4f;
class alignas(16) CColor
{
class CColor {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
simd<float> mSimd;
static const CColor skRed;
static const CColor skBlack;
static const CColor skBlue;
@ -52,285 +52,210 @@ public:
static const CColor skWhite;
static const CColor skClear;
#if __SSE__
CColor(const __m128& mVec128) : mVec128(mVec128) {}
#endif
CColor() : mSimd(1.f) {}
CColor() : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
CColor(float rgb, float a = 1.0) { splat(rgb, a); }
CColor(float r, float g, float b, float a = 1.0f)
{
v[0] = r;
v[1] = g;
v[2] = b;
v[3] = a;
}
CColor(float r, float g, float b, float a = 1.0f) : mSimd(r, g, b, a) {}
#if ZE_ATHENA_TYPES
CColor(const atVec4f& vec)
#if __SSE__ || __GEKKO_PS__
: mVec128(vec.mVec128)
{
}
#else
{
r = vec.vec[0], g = vec.vec[1], b = vec.vec[2], a = vec.vec[3];
}
#endif
CColor(const atVec4f& vec) : mSimd(vec.simd) {}
#endif
CColor(Comp32 rgba) { fromRGBA32(rgba); }
CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
CColor(const CVector4f& other);
CColor& operator=(const CVector4f& other);
CColor(const CVector4f& other) : mSimd(other.mSimd) {}
template <typename T>
CColor(const simd<T>& s) : mSimd(s) {}
CColor& operator=(const CVector4f& other) {
mSimd = other.mSimd;
return *this;
}
#if ZE_ATHENA_TYPES
static inline CColor ReadRGBABig(athena::io::IStreamReader& reader)
{
static CColor ReadRGBABig(athena::io::IStreamReader& reader) {
CColor ret;
ret.readRGBABig(reader);
return ret;
}
inline void readRGBABig(athena::io::IStreamReader& reader)
{
r = reader.readFloatBig();
g = reader.readFloatBig();
b = reader.readFloatBig();
a = reader.readFloatBig();
void readRGBABig(athena::io::IStreamReader& reader) {
simd_floats f;
f[0] = reader.readFloatBig();
f[1] = reader.readFloatBig();
f[2] = reader.readFloatBig();
f[3] = reader.readFloatBig();
mSimd.copy_from(f);
}
inline void readBGRABig(athena::io::IStreamReader& reader)
{
b = reader.readFloatBig();
g = reader.readFloatBig();
r = reader.readFloatBig();
a = reader.readFloatBig();
void readBGRABig(athena::io::IStreamReader& reader) {
simd_floats f;
f[2] = reader.readFloatBig();
f[1] = reader.readFloatBig();
f[0] = reader.readFloatBig();
f[3] = reader.readFloatBig();
mSimd.copy_from(f);
}
inline void writeRGBABig(athena::io::IStreamWriter& writer) const
{
writer.writeFloatBig(r);
writer.writeFloatBig(g);
writer.writeFloatBig(b);
writer.writeFloatBig(a);
void writeRGBABig(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeFloatBig(f[0]);
writer.writeFloatBig(f[1]);
writer.writeFloatBig(f[2]);
writer.writeFloatBig(f[3]);
}
inline void writeBGRABig(athena::io::IStreamWriter& writer) const
{
writer.writeFloatBig(b);
writer.writeFloatBig(g);
writer.writeFloatBig(r);
writer.writeFloatBig(a);
void writeBGRABig(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeFloatBig(f[2]);
writer.writeFloatBig(f[1]);
writer.writeFloatBig(f[0]);
writer.writeFloatBig(f[3]);
}
inline void writeRGBA8(athena::io::IStreamWriter& writer) const
{
writer.writeUByte(this->r * 255);
writer.writeUByte(this->g * 255);
writer.writeUByte(this->b * 255);
writer.writeUByte(this->a * 255);
void writeRGBA8(athena::io::IStreamWriter& writer) const {
simd_floats f(mSimd);
writer.writeUByte(atUint8(f[0] * 255));
writer.writeUByte(atUint8(f[1] * 255));
writer.writeUByte(atUint8(f[2] * 255));
writer.writeUByte(atUint8(f[3] * 255));
}
#endif
inline bool operator==(const CColor& rhs) const { return (r == rhs.r && g == rhs.g && b == rhs.b && a == rhs.a); }
inline bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
inline CColor operator+(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CColor(r + rhs.r, g + rhs.g, b + rhs.b, a + rhs.a);
#endif
bool operator==(const CColor& rhs) const {
return (r() == rhs.r() && g() == rhs.g() && b() == rhs.b() && a() == rhs.a());
}
inline CColor operator-(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CColor(r - rhs.r, g - rhs.g, b - rhs.b, a - rhs.a);
#endif
bool operator!=(const CColor& rhs) const { return !(*this == rhs); }
CColor operator+(const CColor& rhs) const {
return mSimd + rhs.mSimd;
}
inline CColor operator*(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CColor(r * rhs.r, g * rhs.g, b * rhs.b, a * rhs.a);
#endif
CColor operator-(const CColor& rhs) const {
return mSimd - rhs.mSimd;
}
inline CColor operator/(const CColor& rhs) const
{
#if __SSE__
return CColor(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CColor(r / rhs.r, g / rhs.g, b / rhs.b, a / rhs.a);
#endif
CColor operator*(const CColor& rhs) const {
return mSimd * rhs.mSimd;
}
inline CColor operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_add_ps(mVec128, splat.mVec128));
#else
return CColor(r + val, g + val, b + val, a + val);
#endif
CColor operator/(const CColor& rhs) const {
return mSimd / rhs.mSimd;
}
inline CColor operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CColor(r - val, g - val, b - val, a - val);
#endif
CColor operator+(float val) const {
return mSimd + simd<float>(val);
}
inline CColor operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CColor(r * val, g * val, b * val, a * val);
#endif
CColor operator-(float val) const {
return mSimd - simd<float>(val);
}
inline CColor operator/(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CColor(_mm_div_ps(mVec128, splat.mVec128));
#else
return CColor(r / val, g / val, b / val, a / val);
#endif
CColor operator*(float val) const {
return mSimd * simd<float>(val);
}
inline const CColor& operator+=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
r += rhs.r;
g += rhs.g;
b += rhs.b;
a += rhs.a;
#endif
CColor operator/(float val) const {
return mSimd / simd<float>(val);
}
const CColor& operator+=(const CColor& rhs) {
mSimd += rhs.mSimd;
return *this;
}
inline const CColor& operator-=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
r -= rhs.r;
g -= rhs.g;
b -= rhs.b;
a -= rhs.a;
#endif
const CColor& operator-=(const CColor& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
inline const CColor& operator*=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
r *= rhs.r;
g *= rhs.g;
b *= rhs.b;
a *= rhs.a;
#endif
const CColor& operator*=(const CColor& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
inline const CColor& operator/=(const CColor& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
r /= rhs.r;
g /= rhs.g;
b /= rhs.b;
a /= rhs.a;
#endif
const CColor& operator/=(const CColor& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
inline void normalize()
{
const CColor& operator+=(float rhs) {
mSimd += simd<float>(rhs);
return *this;
}
const CColor& operator-=(float rhs) {
mSimd -= simd<float>(rhs);
return *this;
}
const CColor& operator*=(float rhs) {
mSimd *= simd<float>(rhs);
return *this;
}
const CColor& operator/=(float rhs) {
mSimd /= simd<float>(rhs);
return *this;
}
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= mag;
}
inline CColor normalized() const
{
CColor normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return r * r + g * g + b * b + a * a;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
static inline CColor lerp(const CColor& a, const CColor& b, float t) { return (a + (b - a) * t); }
static inline CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
inline float& operator[](const size_t& idx) { assert(idx < 4); return (&r)[idx]; }
inline const float& operator[](const size_t& idx) const { assert(idx < 4); return (&r)[idx]; }
inline void splat(float rgb, float a)
{
#if __SSE__
TVectorUnion splat = {{rgb, rgb, rgb, a}};
mVec128 = splat.mVec128;
#else
v[0] = rgb;
v[1] = rgb;
v[2] = rgb;
v[3] = a;
#endif
float magSquared() const {
return mSimd.dot4(mSimd);
}
inline float rgbDot(const CColor& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (r * rhs.r) + (g * rhs.g) + (b * rhs.b);
#endif
float magnitude() const { return std::sqrt(magSquared()); }
static CColor lerp(const CColor& a, const CColor& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
union {
struct
{
float r, g, b, a;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
static CColor nlerp(const CColor& a, const CColor& b, float t) { return lerp(a, b, t).normalized(); }
void fromRGBA8(Comp8 r, Comp8 g, Comp8 b, Comp8 a)
{
this->r = r * OneOver255;
this->g = g * OneOver255;
this->b = b * OneOver255;
this->a = a * OneOver255;
simd<float>::reference operator[](const size_t& idx) {
assert(idx < 4);
return mSimd[idx];
}
void fromRGBA32(Comp32 rgba)
{
float operator[](const size_t& idx) const {
assert(idx < 4);
return mSimd[idx];
}
void splat(float rgb, float a) {
mSimd = simd<float>(rgb);
mSimd[3] = a;
}
float rgbDot(const CColor& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
void fromRGBA8(const Comp8 ri, const Comp8 gi, const Comp8 bi, const Comp8 ai) {
mSimd = simd<float>(ri * OneOver255, gi * OneOver255, bi * OneOver255, ai * OneOver255);
}
void fromRGBA32(Comp32 rgba) {
static RGBA32 tmp;
tmp.rgba = COLOR(rgba);
fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a);
@ -343,12 +268,11 @@ public:
* \param b
* \param a
*/
void toRGBA8(Comp8& r, Comp8& g, Comp8& b, Comp8& a)
{
r = this->r * 255;
g = this->g * 255;
b = this->b * 255;
a = this->a * 255;
void toRGBA8(Comp8& ro, Comp8& go, Comp8& bo, Comp8& ao) const {
ro = Comp8(r() * 255);
go = Comp8(g() * 255);
bo = Comp8(b() * 255);
ao = Comp8(a() * 255);
}
/**
@ -371,59 +295,44 @@ public:
void fromHSL(float h, float s, float l, float _a = 1.0);
void toHSL(float& h, float& s, float& l);
void toHSL(float& h, float& s, float& l) const;
CColor toGrayscale() { return {std::sqrt((r * r + g * g + b * b) / 3), a}; }
CColor toGrayscale() const { return {std::sqrt((r() * r() + g() * g() + b() * b()) / 3), a()}; }
/**
* @brief Clamps to GPU-safe RGBA values [0,1]
*/
void Clamp()
{
this->r = std::min(1.f, std::max(0.f, this->r));
this->g = std::min(1.f, std::max(0.f, this->g));
this->b = std::min(1.f, std::max(0.f, this->b));
this->a = std::min(1.f, std::max(0.f, this->a));
void Clamp() {
r() = std::min(1.f, std::max(0.f, float(r())));
g() = std::min(1.f, std::max(0.f, float(g())));
b() = std::min(1.f, std::max(0.f, float(b())));
a() = std::min(1.f, std::max(0.f, float(a())));
}
float r() const { return mSimd[0]; }
float g() const { return mSimd[1]; }
float b() const { return mSimd[2]; }
float a() const { return mSimd[3]; }
simd<float>::reference r() { return mSimd[0]; }
simd<float>::reference g() { return mSimd[1]; }
simd<float>::reference b() { return mSimd[2]; }
simd<float>::reference a() { return mSimd[3]; }
};
static inline CColor operator+(float lhs, const CColor& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs + rhs.r, lhs + rhs.g, lhs + rhs.b, lhs + rhs.a);
#endif
static inline CColor operator+(float lhs, const CColor& rhs) {
return simd<float>(lhs) + rhs.mSimd;
}
static inline CColor operator-(float lhs, const CColor& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs - rhs.r, lhs - rhs.g, lhs - rhs.b, lhs - rhs.a);
#endif
static inline CColor operator-(float lhs, const CColor& rhs) {
return simd<float>(lhs) - rhs.mSimd;
}
static inline CColor operator*(float lhs, const CColor& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs * rhs.r, lhs * rhs.g, lhs * rhs.b, lhs * rhs.a);
#endif
static inline CColor operator*(float lhs, const CColor& rhs) {
return simd<float>(lhs) * rhs.mSimd;
}
static inline CColor operator/(float lhs, const CColor& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CColor(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CColor(lhs / rhs.r, lhs / rhs.g, lhs / rhs.b, lhs / rhs.a);
#endif
static inline CColor operator/(float lhs, const CColor& rhs) {
return simd<float>(lhs) / rhs.mSimd;
}
}

View File

@ -2,12 +2,10 @@
#include "zeus/CVector3f.hpp"
namespace zeus
{
namespace zeus {
class CQuaternion;
class CEulerAngles : public CVector3f
{
class CEulerAngles : public CVector3f {
public:
CEulerAngles(float x, float y, float z) { assign(x, y, z); }
CEulerAngles(const CQuaternion& quat);

View File

@ -4,17 +4,14 @@
#include "zeus/CAABox.hpp"
#include "zeus/CProjection.hpp"
namespace zeus
{
class CFrustum
{
namespace zeus {
class CFrustum {
CPlane planes[6];
bool valid = false;
public:
void updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection);
void updatePlanes(const CTransform& viewPointMtx, const CProjection& projection);
bool aabbFrustumTest(const CAABox& aabb) const;
bool sphereFrustumTest(const CSphere& sphere) const;
bool pointFrustumTest(const CVector3f& point) const;

View File

@ -3,12 +3,11 @@
#include "Global.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
{
class CLine
{
namespace zeus {
class CLine {
public:
CLine(const CVector3f& origin, const CVector3f& dir) : origin(origin), dir(dir) {}
CVector3f origin;
CVector3f dir;
};

View File

@ -3,15 +3,12 @@
#include "Global.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
{
class CLineSeg
{
namespace zeus {
class CLineSeg {
public:
CLineSeg(const CVector3f& start, const CVector3f& end) : x0_start(start), x18_end(end)
{
CLineSeg(const CVector3f& start, const CVector3f& end) : x0_start(start), x18_end(end) {
CVector3f tmp = (end - start).normalized();
if (tmp.x != 0 || tmp.y != 0 || tmp.z != 0)
if (tmp.x() != 0.f || tmp.y() != 0.f || tmp.z() != 0.f)
xc_dir = tmp.normalized();
else
xc_dir = CVector3f::skZero;

View File

@ -1,28 +1,24 @@
#pragma once
#include "zeus/CVector3f.hpp"
#include "zeus/CTransform.hpp"
#include "zeus/Math.hpp"
namespace zeus
{
struct CMRay
{
namespace zeus {
struct CMRay {
CMRay(const CVector3f& start, const CVector3f& dirin, float len)
: start(start), length(len), invLength(1.f / len), dir(dirin)
{
: start(start), length(len), invLength(1.f / len), dir(dirin) {
end = start + (len * dirin);
delta = end - start;
}
CMRay(const CVector3f& start, const CVector3f& end, float len, float invLen)
: start(start), end(end), length(len), invLength(invLen)
{
: start(start), end(end), length(len), invLength(invLen) {
delta = end - start;
dir = invLen * delta;
}
CMRay getInvUnscaledTransformRay(const CTransform& xfrm) const
{
CMRay getInvUnscaledTransformRay(const CTransform& xfrm) const {
const CTransform inv = xfrm.inverse();
return CMRay(inv * start, inv * end, length, invLength);
}

View File

@ -6,79 +6,68 @@
#include <cstring>
/* Column-major matrix class */
namespace zeus
{
namespace zeus {
class CQuaternion;
class alignas(16) CMatrix3f
{
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
explicit CMatrix3f(bool zero = false)
{
memset(m, 0, sizeof(m));
if (!zero)
{
class CMatrix3f {
public:
explicit CMatrix3f(bool zero = false) {
m[0] = simd<float>(0.f);
m[1] = simd<float>(0.f);
m[2] = simd<float>(0.f);
if (!zero) {
m[0][0] = 1.0;
m[1][1] = 1.0;
m[2][2] = 1.0;
}
}
CMatrix3f(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21, float m22)
{
m[0][0] = m00, m[1][0] = m01, m[2][0] = m02;
m[0][1] = m10, m[1][1] = m11, m[2][1] = m12;
m[0][2] = m20, m[1][2] = m21, m[2][2] = m22;
}
CMatrix3f(const CVector3f& scaleVec)
{
memset(m, 0, sizeof(m));
CMatrix3f(float m00, float m01, float m02,
float m10, float m11, float m12,
float m20, float m21, float m22)
: m{{m00, m10, m20},
{m01, m11, m21},
{m02, m12, m22}} {}
CMatrix3f(const CVector3f& scaleVec) {
m[0] = simd<float>(0.f);
m[1] = simd<float>(0.f);
m[2] = simd<float>(0.f);
m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2];
}
CMatrix3f(float scale) : CMatrix3f(CVector3f(scale)) {}
CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2)
{
vec[0] = r0;
vec[1] = r1;
vec[2] = r2;
CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2) {
m[0] = r0;
m[1] = r1;
m[2] = r2;
}
CMatrix3f(const CMatrix3f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
CMatrix3f(const CMatrix3f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
}
#if __SSE__
CMatrix3f(const __m128& r0, const __m128& r1, const __m128& r2)
{
vec[0].mVec128 = r0;
vec[1].mVec128 = r1;
vec[2].mVec128 = r2;
CMatrix3f(const simd<float>& r0, const simd<float>& r1, const simd<float>& r2) {
m[0].mSimd = r0;
m[1].mSimd = r1;
m[2].mSimd = r2;
}
#endif
#if ZE_ATHENA_TYPES
CMatrix3f(const atVec4f& r0, const atVec4f& r1, const atVec4f& r2)
{
#if __SSE__
vec[0].mVec128 = r0.mVec128;
vec[1].mVec128 = r1.mVec128;
vec[2].mVec128 = r2.mVec128;
#else
vec[0].x = r0.vec[0];
vec[0].y = r0.vec[1];
vec[0].z = r0.vec[2];
vec[1].x = r1.vec[0];
vec[1].y = r1.vec[1];
vec[1].z = r1.vec[2];
vec[2].x = r2.vec[0];
vec[2].y = r2.vec[1];
vec[2].z = r2.vec[2];
#endif
CMatrix3f(const atVec4f& r0, const atVec4f& r1, const atVec4f& r2) {
m[0].mSimd = r0.simd;
m[1].mSimd = r1.simd;
m[2].mSimd = r2.simd;
}
void readBig(athena::io::IStreamReader& input)
{
void readBig(athena::io::IStreamReader& input) {
m[0][0] = input.readFloatBig();
m[1][0] = input.readFloatBig();
m[2][0] = input.readFloatBig();
@ -90,170 +79,112 @@ public:
m[2][2] = input.readFloatBig();
}
static CMatrix3f ReadBig(athena::io::IStreamReader& input)
{
static CMatrix3f ReadBig(athena::io::IStreamReader& input) {
CMatrix3f ret;
ret.readBig(input);
return ret;
}
#endif
CMatrix3f(const CVector3f& axis, float angle);
CMatrix3f(const CQuaternion& quat);
CMatrix3f(const TVectorUnion& r0, const TVectorUnion& r1, const TVectorUnion& r2)
{
#if __SSE__
vec[0].mVec128 = r0.mVec128;
vec[1].mVec128 = r1.mVec128;
vec[2].mVec128 = r2.mVec128;
#else
vec[0].x = r0.vec[0];
vec[0].y = r0.vec[1];
vec[0].z = r0.vec[2];
vec[1].x = r1.vec[0];
vec[1].y = r1.vec[1];
vec[1].z = r1.vec[2];
vec[2].x = r2.vec[0];
vec[2].y = r2.vec[1];
vec[2].z = r2.vec[2];
#endif
}
inline CMatrix3f& operator=(const CMatrix3f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
#endif
CMatrix3f(const CVector3f& axis, float angle);
CMatrix3f(const CQuaternion& quat);
CMatrix3f& operator=(const CMatrix3f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
return *this;
}
inline CVector3f operator*(const CVector3f& other) const
{
#if __SSE__
TVectorUnion res;
res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)),
_mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))),
_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)));
return CVector3f(res.mVec128);
#else
return CVector3f(m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2],
m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2],
m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2]);
#endif
CVector3f operator*(const CVector3f& other) const {
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>();
}
inline CVector3f& operator[](int i)
{
assert(0 <= i && i < 3);
return vec[i];
CVector3f& operator[](size_t i) {
assert(i < 3);
return m[i];
}
inline const CVector3f& operator[](int i) const
{
assert(0 <= i && i < 3);
return vec[i];
const CVector3f& operator[](size_t i) const {
assert(i < 3);
return m[i];
}
inline CMatrix3f orthonormalized() const
{
CMatrix3f orthonormalized() const {
CMatrix3f ret;
ret[0] = vec[0].normalized();
ret[2] = ret[0].cross(vec[1]);
ret[0] = m[0].normalized();
ret[2] = ret[0].cross(m[1]);
ret[2].normalize();
ret[1] = ret[2].cross(ret[0]);
return ret;
}
inline bool operator==(const CMatrix3f& other) const
{
return vec[0] == other.vec[0] && vec[1] == other.vec[1] && vec[2] == other.vec[2];
bool operator==(const CMatrix3f& other) const {
return m[0] == other.m[0] && m[1] == other.m[1] && m[2] == other.m[2];
}
static const CMatrix3f skIdentityMatrix3f;
void transpose();
void transposeSSE3();
CMatrix3f transposed() const;
CMatrix3f transposedSSE3() const;
inline void invert() { *this = inverted(); }
CMatrix3f transposed() const;
void invert() { *this = inverted(); }
CMatrix3f inverted() const;
void addScaledMatrix(const CMatrix3f& other, float scale)
{
void addScaledMatrix(const CMatrix3f& other, float scale) {
CVector3f scaleVec(scale);
vec[0] += other.vec[0] * scaleVec;
vec[1] += other.vec[1] * scaleVec;
vec[2] += other.vec[2] * scaleVec;
m[0] += other.m[0] * scaleVec;
m[1] += other.m[1] * scaleVec;
m[2] += other.m[2] * scaleVec;
}
static inline CMatrix3f RotateX(float theta)
{
static CMatrix3f RotateX(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{1.f, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, cosT, sinT, 0.f}},
TVectorUnion{{0.f, -sinT, cosT, 0.f}});
return CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
simd<float>{0.f, cosT, sinT, 0.f},
simd<float>{0.f, -sinT, cosT, 0.f});
}
static inline CMatrix3f RotateY(float theta)
{
static CMatrix3f RotateY(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{cosT, 0.f, -sinT, 0.f}},
TVectorUnion{{0.f, 1.f, 0.f, 0.f}},
TVectorUnion{{sinT, 0.f, cosT, 0.f}});
return CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
simd<float>{0.f, 1.f, 0.f, 0.f},
simd<float>{sinT, 0.f, cosT, 0.f});
}
static inline CMatrix3f RotateZ(float theta)
{
static CMatrix3f RotateZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CMatrix3f(TVectorUnion{{cosT, sinT, 0.f, 0.f}},
TVectorUnion{{-sinT, cosT, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, 1.f, 0.f}});
return CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
simd<float>{-sinT, cosT, 0.f, 0.f},
simd<float>{0.f, 0.f, 1.f, 0.f});
}
float determinant() const
{
float determinant() const {
return
m[1][0] * (m[2][1] * m[0][2] - m[0][1] * m[2][2]) +
m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) +
m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
}
union {
float m[3][4]; /* 4th row for union-alignment */
struct
{
CVector3f vec[3];
};
};
CVector3f m[3];
};
static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs)
{
#if __SSE__
unsigned i;
TVectorUnion resVec[3];
for (i = 0; i < 3; ++i)
{
resVec[i].mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(lhs[0].mVec128, ze_splat_ps(rhs[i].mVec128, 0)),
_mm_mul_ps(lhs[1].mVec128, ze_splat_ps(rhs[i].mVec128, 1))),
_mm_mul_ps(lhs[2].mVec128, ze_splat_ps(rhs[i].mVec128, 2)));
resVec[i].v[3] = 0.0;
}
return CMatrix3f(resVec[0].mVec128, resVec[1].mVec128, resVec[2].mVec128);
#else
return CMatrix3f(lhs[0][0] * rhs[0][0] + lhs[1][0] * rhs[0][1] + lhs[2][0] * rhs[0][2],
lhs[0][0] * rhs[1][0] + lhs[1][0] * rhs[1][1] + lhs[2][0] * rhs[1][2],
lhs[0][0] * rhs[2][0] + lhs[1][0] * rhs[2][1] + lhs[2][0] * rhs[2][2],
lhs[0][1] * rhs[0][0] + lhs[1][1] * rhs[0][1] + lhs[2][1] * rhs[0][2],
lhs[0][1] * rhs[1][0] + lhs[1][1] * rhs[1][1] + lhs[2][1] * rhs[1][2],
lhs[0][1] * rhs[2][0] + lhs[1][1] * rhs[2][1] + lhs[2][1] * rhs[2][2],
lhs[0][2] * rhs[0][0] + lhs[1][2] * rhs[0][1] + lhs[2][2] * rhs[0][2],
lhs[0][2] * rhs[1][0] + lhs[1][2] * rhs[1][1] + lhs[2][2] * rhs[1][2],
lhs[0][2] * rhs[2][0] + lhs[1][2] * rhs[2][1] + lhs[2][2] * rhs[2][2]);
#endif
static inline CMatrix3f operator*(const CMatrix3f& lhs, const CMatrix3f& rhs) {
simd<float> v[3];
for (int i = 0; i < 3; ++i)
v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>();
return CMatrix3f(v[0], v[1], v[2]);
}
}

View File

@ -1,176 +1,116 @@
#pragma once
#include "zeus/CMatrix3f.hpp"
#include "zeus/CVector4f.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
{
class alignas(16) CMatrix4f
{
namespace zeus {
class CMatrix4f {
public:
static const CMatrix4f skIdentityMatrix4f;
ZE_DECLARE_ALIGNED_ALLOCATOR();
explicit CMatrix4f(bool zero = false)
{
memset(m, 0, sizeof(m));
if (!zero)
{
explicit CMatrix4f(bool zero = false) {
if (!zero) {
m[0][0] = 1.0;
m[1][1] = 1.0;
m[2][2] = 1.0;
m[3][3] = 1.0;
}
}
CMatrix4f(float m00, float m01, float m02, float m03, float m10, float m11, float m12, float m13, float m20, float m21,
float m22, float m23, float m30, float m31, float m32, float m33)
{
m[0][0] = m00, m[1][0] = m01, m[2][0] = m02, m[3][0] = m03;
m[0][1] = m10, m[1][1] = m11, m[2][1] = m12, m[3][1] = m13;
m[0][2] = m20, m[1][2] = m21, m[2][2] = m22, m[3][2] = m23;
m[0][3] = m30, m[1][3] = m31, m[2][3] = m32, m[3][3] = m33;
}
CMatrix4f(const CVector3f& scaleVec)
{
memset(m, 0, sizeof(m));
CMatrix4f(float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23,
float m30, float m31, float m32, float m33)
: m{{m00, m10, m20, m30},
{m01, m11, m21, m31},
{m02, m12, m22, m32},
{m03, m13, m23, m33}} {}
CMatrix4f(const CVector3f& scaleVec) {
m[0][0] = scaleVec[0];
m[1][1] = scaleVec[1];
m[2][2] = scaleVec[2];
m[3][3] = 1.0f;
}
CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3)
{
vec[0] = r0;
vec[1] = r1;
vec[2] = r2;
vec[3] = r3;
CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3) {
m[0] = r0;
m[1] = r1;
m[2] = r2;
m[3] = r3;
}
CMatrix4f(const CMatrix4f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = other.vec[3];
CMatrix4f(const CMatrix4f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
m[3] = other.m[3];
}
#if __SSE__
CMatrix4f(const __m128& r0, const __m128& r1, const __m128& r2, const __m128& r3)
{
vec[0].mVec128 = r0;
vec[1].mVec128 = r1;
vec[2].mVec128 = r2;
vec[3].mVec128 = r3;
CMatrix4f(const simd<float>& r0, const simd<float>& r1, const simd<float>& r2, const simd<float>& r3) {
m[0].mSimd = r0;
m[1].mSimd = r1;
m[2].mSimd = r2;
m[3].mSimd = r3;
}
#endif
CMatrix4f(const CMatrix3f& other)
{
memset(m, 0, sizeof(m));
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = CVector4f(0, 0, 0, 1.0f);
CMatrix4f(const CMatrix3f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
m[3] = CVector4f(0.f, 0.f, 0.f, 1.0f);
}
inline CMatrix4f& operator=(const CMatrix4f& other)
{
vec[0] = other.vec[0];
vec[1] = other.vec[1];
vec[2] = other.vec[2];
vec[3] = other.vec[3];
CMatrix4f& operator=(const CMatrix4f& other) {
m[0] = other.m[0];
m[1] = other.m[1];
m[2] = other.m[2];
m[3] = other.m[3];
return *this;
}
inline CVector4f operator*(const CVector4f& other) const
{
#if __SSE__
TVectorUnion res;
res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)),
_mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))),
_mm_add_ps(_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)),
_mm_mul_ps(vec[3].mVec128, ze_splat_ps(other.mVec128, 3))));
return CVector4f(res.mVec128);
#else
return CVector4f(m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2] + m[3][0] * other.v[3],
m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2] + m[3][1] * other.v[3],
m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2] + m[3][2] * other.v[3],
m[0][3] * other.v[0] + m[1][3] * other.v[1] + m[2][3] * other.v[2] + m[3][3] * other.v[3]);
#endif
CVector4f operator*(const CVector4f& other) const {
return m[0].mSimd * other.mSimd.shuffle<0, 0, 0, 0>() +
m[1].mSimd * other.mSimd.shuffle<1, 1, 1, 1>() +
m[2].mSimd * other.mSimd.shuffle<2, 2, 2, 2>() +
m[3].mSimd * other.mSimd.shuffle<3, 3, 3, 3>();
}
inline CVector4f& operator[](int i)
{
assert(0 <= i && i < 4);
return vec[i];
CVector4f& operator[](size_t i) {
assert(i < 4);
return m[i];
}
inline const CVector4f& operator[](int i) const
{
assert(0 <= i && i < 4);
return vec[i];
const CVector4f& operator[](size_t i) const {
assert(i < 4);
return m[i];
}
CMatrix4f transposed() const;
CMatrix4f transposedSSE3() const;
inline CVector3f multiplyOneOverW(const CVector3f& point) const
{
CVector3f multiplyOneOverW(const CVector3f& point) const {
CVector4f xfVec = *this * point;
return xfVec.toVec3f() / xfVec.w;
return xfVec.toVec3f() / xfVec.w();
}
inline CVector3f multiplyOneOverW(const CVector3f& point, float& wOut) const
{
CVector3f multiplyOneOverW(const CVector3f& point, float& wOut) const {
CVector4f xfVec = *this * point;
wOut = xfVec.w;
return xfVec.toVec3f() / xfVec.w;
wOut = xfVec.w();
return xfVec.toVec3f() / xfVec.w();
}
union {
float m[4][4];
struct
{
CVector4f vec[4];
};
};
CVector4f m[4];
};
static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs)
{
CMatrix4f ret;
#if __SSE__
unsigned i;
for (i = 0; i < 4; ++i)
{
ret.vec[i].mVec128 = _mm_add_ps(
_mm_add_ps(_mm_add_ps(_mm_mul_ps(lhs.vec[0].mVec128,
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(0, 0, 0, 0))),
_mm_mul_ps(lhs.vec[1].mVec128,
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(1, 1, 1, 1)))),
_mm_mul_ps(lhs.vec[2].mVec128,
_mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(2, 2, 2, 2)))),
_mm_mul_ps(lhs.vec[3].mVec128, _mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(3, 3, 3, 3))));
}
#else
ret.m[0][0] = lhs.m[0][0] * rhs.m[0][0] + lhs.m[1][0] * rhs.m[0][1] + lhs.m[2][0] * rhs.m[0][2] + lhs.m[3][0] * rhs.m[0][3];
ret.m[1][0] = lhs.m[0][0] * rhs.m[1][0] + lhs.m[1][0] * rhs.m[1][1] + lhs.m[2][0] * rhs.m[1][2] + lhs.m[3][0] * rhs.m[1][3];
ret.m[2][0] = lhs.m[0][0] * rhs.m[2][0] + lhs.m[1][0] * rhs.m[2][1] + lhs.m[2][0] * rhs.m[2][2] + lhs.m[3][0] * rhs.m[2][3];
ret.m[3][0] = lhs.m[0][0] * rhs.m[3][0] + lhs.m[1][0] * rhs.m[3][1] + lhs.m[2][0] * rhs.m[3][2] + lhs.m[3][0] * rhs.m[3][3];
ret.m[0][1] = lhs.m[0][1] * rhs.m[0][0] + lhs.m[1][1] * rhs.m[0][1] + lhs.m[2][1] * rhs.m[0][2] + lhs.m[3][1] * rhs.m[0][3];
ret.m[1][1] = lhs.m[0][1] * rhs.m[1][0] + lhs.m[1][1] * rhs.m[1][1] + lhs.m[2][1] * rhs.m[1][2] + lhs.m[3][1] * rhs.m[1][3];
ret.m[2][1] = lhs.m[0][1] * rhs.m[2][0] + lhs.m[1][1] * rhs.m[2][1] + lhs.m[2][1] * rhs.m[2][2] + lhs.m[3][1] * rhs.m[2][3];
ret.m[3][1] = lhs.m[0][1] * rhs.m[3][0] + lhs.m[1][1] * rhs.m[3][1] + lhs.m[2][1] * rhs.m[3][2] + lhs.m[3][1] * rhs.m[3][3];
ret.m[0][2] = lhs.m[0][2] * rhs.m[0][0] + lhs.m[1][2] * rhs.m[0][1] + lhs.m[2][2] * rhs.m[0][2] + lhs.m[3][2] * rhs.m[0][3];
ret.m[1][2] = lhs.m[0][2] * rhs.m[1][0] + lhs.m[1][2] * rhs.m[1][1] + lhs.m[2][2] * rhs.m[1][2] + lhs.m[3][2] * rhs.m[1][3];
ret.m[2][2] = lhs.m[0][2] * rhs.m[2][0] + lhs.m[1][2] * rhs.m[2][1] + lhs.m[2][2] * rhs.m[2][2] + lhs.m[3][2] * rhs.m[2][3];
ret.m[3][2] = lhs.m[0][2] * rhs.m[3][0] + lhs.m[1][2] * rhs.m[3][1] + lhs.m[2][2] * rhs.m[3][2] + lhs.m[3][2] * rhs.m[3][3];
ret.m[0][3] = lhs.m[0][3] * rhs.m[0][0] + lhs.m[1][3] * rhs.m[0][1] + lhs.m[2][3] * rhs.m[0][2] + lhs.m[3][3] * rhs.m[0][3];
ret.m[1][3] = lhs.m[0][3] * rhs.m[1][0] + lhs.m[1][3] * rhs.m[1][1] + lhs.m[2][3] * rhs.m[1][2] + lhs.m[3][3] * rhs.m[1][3];
ret.m[2][3] = lhs.m[0][3] * rhs.m[2][0] + lhs.m[1][3] * rhs.m[2][1] + lhs.m[2][3] * rhs.m[2][2] + lhs.m[3][3] * rhs.m[2][3];
ret.m[3][3] = lhs.m[0][3] * rhs.m[3][0] + lhs.m[1][3] * rhs.m[3][1] + lhs.m[2][3] * rhs.m[2][2] + lhs.m[3][3] * rhs.m[3][3];
#endif
return ret;
static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs) {
simd<float> v[4];
for (int i = 0; i < 4; ++i)
v[i] = lhs.m[0].mSimd * rhs[i].mSimd.shuffle<0, 0, 0, 0>() +
lhs.m[1].mSimd * rhs[i].mSimd.shuffle<1, 1, 1, 1>() +
lhs.m[2].mSimd * rhs[i].mSimd.shuffle<2, 2, 2, 2>() +
lhs.m[3].mSimd * rhs[i].mSimd.shuffle<3, 3, 3, 3>();
return CMatrix4f(v[0], v[1], v[2], v[3]);
}
}

View File

@ -5,20 +5,17 @@
#include "zeus/CAABox.hpp"
#include "zeus/CMRay.hpp"
namespace zeus
{
class alignas(16) COBBox
{
namespace zeus {
class COBBox {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
#if ZE_ATHENA_TYPES
void readBig(athena::io::IStreamReader& in)
{
void readBig(athena::io::IStreamReader& in) {
transform.read34RowMajor(in);
extents.readBig(in);
}
static COBBox ReadBig(athena::io::IStreamReader& in)
{
static COBBox ReadBig(athena::io::IStreamReader& in) {
COBBox out;
out.readBig(in);
return out;
@ -29,7 +26,7 @@ public:
CTransform transform;
CVector3f extents;
COBBox() {}
COBBox() = default;
COBBox(const CAABox& aabb) : extents(aabb.extents()) { transform.origin = aabb.center(); }
@ -37,8 +34,7 @@ public:
CAABox calculateAABox(const CTransform& worldXf = CTransform()) const;
static COBBox FromAABox(const CAABox& box, const CTransform& xf)
{
static COBBox FromAABox(const CAABox& box, const CTransform& xf) {
const CVector3f extents = box.max - box.center();
const CTransform newXf = CTransform::Translate(box.center()) * xf;
return COBBox(newXf, extents);
@ -46,8 +42,7 @@ public:
bool OBBIntersectsBox(const COBBox& other) const;
bool AABoxIntersectsBox(const CAABox& other)
{
bool AABoxIntersectsBox(const CAABox& other) {
return OBBIntersectsBox(FromAABox(other, CTransform::Identity()));
}
};

View File

@ -4,72 +4,67 @@
#include "zeus/CVector3f.hpp"
#include "zeus/Math.hpp"
namespace zeus
{
class alignas(16) CPlane
{
namespace zeus {
class CPlane {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CPlane() : mSimd(1.0, 0.f, 0.f, 0.f) {}
inline CPlane() : a(1.f), b(0.f), c(0.f), d(0.f) {}
CPlane(float a, float b, float c, float d) : a(a), b(b), c(c), d(d) {}
CPlane(const CVector3f& a, const CVector3f& b, const CVector3f& c)
{
vec = (b - a).cross(c - a).normalized();
d = a.dot(vec);
CPlane(float a, float b, float c, float d) : mSimd(a, b, c, d) {}
CPlane(const CVector3f& a, const CVector3f& b, const CVector3f& c) {
mSimd = (b - a).cross(c - a).normalized().mSimd;
mSimd[3] = a.dot(normal());
}
CPlane(const CVector3f& point, float displacement)
{
#if __SSE__
mVec128 = point.mVec128;
#else
a = point[0];
b = point[1];
c = point[2];
#endif
d = displacement;
CPlane(const CVector3f& point, float displacement) {
mSimd = point.mSimd;
mSimd[3] = displacement;
}
float clipLineSegment(const CVector3f& a, const CVector3f& b)
{
float mag = (b-a).dot(vec);
float dis = (-(vec.y - d)) / mag;
float clipLineSegment(const CVector3f& a, const CVector3f& b) {
float mag = (b - a).dot(normal());
float dis = (-(y() - d())) / mag;
return clamp(0.0f, dis, 1.0f);
}
inline void normalize()
{
float nd = d;
float mag = vec.magnitude();
void normalize() {
float nd = d();
auto norm = normal();
float mag = norm.magnitude();
mag = 1.f / mag;
vec = vec * mag;
d = nd * mag;
mSimd = (norm * mag).mSimd;
mSimd[3] = nd * mag;
}
float pointToPlaneDist(const CVector3f& pos) const
{
return pos.dot(vec) - d;
float pointToPlaneDist(const CVector3f& pos) const {
return pos.dot(normal()) - d();
}
bool rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const;
const CVector3f& normal() const { return vec; }
CVector3f normal() const { return mSimd; }
inline float& operator[](size_t idx) { assert(idx < 4); return p[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return p[idx]; }
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
union {
struct
{
float a, b, c, d;
};
float p[4];
CVector3f vec;
#ifdef __SSE__
__m128 mVec128;
#endif
};
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
float d() const { return mSimd[3]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
simd<float>::reference d() { return mSimd[3]; }
zeus::simd<float> mSimd;
};
}

View File

@ -6,56 +6,51 @@
#include <cstdio>
#include <cmath>
namespace zeus
{
enum class EProjType
{
namespace zeus {
enum class EProjType {
None = 0,
Orthographic = 1,
Perspective = 2
};
class SProjOrtho
{
class SProjOrtho {
public:
float top, bottom, left, right, znear, zfar;
explicit SProjOrtho(float p_top = 1.0f, float p_bottom = -1.0f, float p_left = -1.0f, float p_right = 1.0f,
float p_near = 1.0f, float p_far = -1.0f)
: top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far)
{
: top(p_top), bottom(p_bottom), left(p_left), right(p_right), znear(p_near), zfar(p_far) {
}
};
struct SProjPersp
{
struct SProjPersp {
float fov, aspect, znear, zfar;
SProjPersp(float p_fov = degToRad(55.0f), float p_aspect = 1.0f, float p_near = 0.1f, float p_far = 4096.f)
: fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far)
{
: fov(p_fov), aspect(p_aspect), znear(p_near), zfar(p_far) {
}
};
extern const SProjOrtho kOrthoIdentity;
class alignas(16) CProjection
{
class CProjection {
void _updateCachedMatrix();
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CProjection()
{
CProjection() {
m_projType = EProjType::Orthographic;
m_ortho = SProjOrtho();
m_mtx = CMatrix4f::skIdentityMatrix4f;
}
CProjection(const CProjection& other) { *this = other; }
CProjection(const SProjOrtho& ortho) { setOrtho(ortho); }
CProjection(const SProjPersp& persp) { setPersp(persp); }
inline CProjection& operator=(const CProjection& other)
{
if (this != &other)
{
CProjection& operator=(const CProjection& other) {
if (this != &other) {
m_projType = other.m_projType;
m_ortho = other.m_ortho;
m_mtx = other.m_mtx;
@ -63,40 +58,41 @@ public:
return *this;
}
inline void setOrtho(const SProjOrtho& ortho)
{
void setOrtho(const SProjOrtho& ortho) {
m_projType = EProjType::Orthographic;
m_ortho = ortho;
_updateCachedMatrix();
}
inline void setPersp(const SProjPersp& persp)
{
void setPersp(const SProjPersp& persp) {
m_projType = EProjType::Perspective;
m_persp = persp;
_updateCachedMatrix();
}
inline EProjType getType() const { return m_projType; }
inline const SProjOrtho& getOrtho() const
{
if (m_projType != EProjType::Orthographic)
{
EProjType getType() const { return m_projType; }
const SProjOrtho& getOrtho() const {
#ifndef NDEBUG
if (m_projType != EProjType::Orthographic) {
std::fprintf(stderr, "attempted to access orthographic structure of non-ortho projection");
std::abort();
}
#endif
return m_ortho;
}
inline const SProjPersp& getPersp() const
{
if (m_projType != EProjType::Perspective)
{
const SProjPersp& getPersp() const {
#ifndef NDEBUG
if (m_projType != EProjType::Perspective) {
std::fprintf(stderr, "attempted to access perspective structure of non-persp projection");
std::abort();
}
#endif
return m_persp;
}
inline const CMatrix4f& getCachedMatrix() const { return m_mtx; }
const CMatrix4f& getCachedMatrix() const { return m_mtx; }
protected:
/* Projection type */

View File

@ -8,15 +8,16 @@
#include "zeus/Math.hpp"
#include "zeus/CRelAngle.hpp"
#include "zeus/CTransform.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
namespace zeus
{
namespace zeus {
static inline float normalize_angle(float angle)
{
static float normalize_angle(float angle) {
if (angle > M_PIF)
angle -= 2.f * M_PIF;
else if (angle < -M_PIF)
@ -28,91 +29,95 @@ static inline float normalize_angle(float angle)
class CNUQuaternion;
/** Unit quaternion, used for all quaternion arithmetic */
class alignas(16) CQuaternion
{
#if __atdna__ && ZE_ATHENA_TYPES
float clangVec __attribute__((__vector_size__(16)));
#endif
class CQuaternion {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CQuaternion() : mSimd(1.f, 0.f, 0.f, 0.f) {}
CQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
CQuaternion() : w(1.0f), x(0.0f), y(0.0f), z(0.0f) {}
CQuaternion(float wi, float xi, float yi, float zi) : w(wi), x(xi), y(yi), z(zi) {}
CQuaternion(float xi, float yi, float zi) { fromVector3f(CVector3f(xi, yi, zi)); }
CQuaternion(float wi, const CVector3f& vec) : w(wi), x(vec.x), y(vec.y), z(vec.z) {}
#if ZE_ATHENA_TYPES
inline void readBig(athena::io::IStreamReader& input)
{
w = input.readFloatBig();
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
}
CQuaternion(const atVec4f& vec)
{
#if __SSE__
mVec128 = vec.mVec128;
#else
x = vec.vec[1];
y = vec.vec[2];
z = vec.vec[3];
w = vec.vec[0];
#endif
CQuaternion(float wi, const CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
mSimd[0] = wi;
}
operator atVec4f&()
{
return *reinterpret_cast<atVec4f*>(v);
template <typename T>
CQuaternion(const simd<T>& s) : mSimd(s) {}
#if ZE_ATHENA_TYPES
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = input.readFloatBig();
mSimd.copy_from(f);
}
operator const atVec4f&() const
{
return *reinterpret_cast<const atVec4f*>(v);
CQuaternion(const atVec4f& vec) : mSimd(vec.simd) {}
operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
}
operator const atVec4f&() const {
return *reinterpret_cast<const atVec4f*>(this);
}
#endif
CQuaternion(const CMatrix3f& mat);
CQuaternion(const CVector3f& vec) { fromVector3f(vec); }
CQuaternion(const CVector4f& vec)
{
#if __SSE__
mVec128 = vec.mVec128;
#else
x = vec[1];
y = vec[2];
z = vec[3];
w = vec[0];
#endif
}
CQuaternion(const CVector3f& vecA, const CVector3f& vecB)
{
CQuaternion(const CVector3f& vec) { fromVector3f(vec); }
CQuaternion(const CVector4f& vec) : mSimd(vec.mSimd) {}
CQuaternion(const CVector3f& vecA, const CVector3f& vecB) {
CVector3f vecAN = vecA.normalized();
CVector3f vecBN = vecB.normalized();
CVector3f w = vecAN.cross(vecBN);
*this = CQuaternion(1.f + vecAN.dot(vecBN), w.x, w.y, w.z).normalized();
*this = CQuaternion(1.f + vecAN.dot(vecBN), w).normalized();
}
void fromVector3f(const CVector3f& vec);
CQuaternion& operator=(const CQuaternion& q);
CQuaternion operator+(const CQuaternion& q) const;
CQuaternion operator-(const CQuaternion& q) const;
CQuaternion operator*(const CQuaternion& q) const;
CQuaternion operator/(const CQuaternion& q) const;
CQuaternion operator*(float scale) const;
CQuaternion operator/(float scale) const;
CQuaternion operator-() const;
const CQuaternion& operator+=(const CQuaternion& q);
const CQuaternion& operator-=(const CQuaternion& q);
const CQuaternion& operator*=(const CQuaternion& q);
const CQuaternion& operator*=(float scale);
const CQuaternion& operator/=(float scale);
float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return w * w + x * x + y * y + z * z; }
float magSquared() const { return mSimd.dot4(mSimd); }
void normalize() { *this /= magnitude(); }
CQuaternion normalized() const { return *this / magnitude(); }
void invert();
CQuaternion inverse() const;
/**
@ -121,27 +126,26 @@ public:
* @param angle The magnitude of the rotation in radians
* @return
*/
static inline CQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle)
{
static CQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) {
return CQuaternion(std::cos(angle / 2.f), axis * std::sin(angle / 2.f));
}
void rotateX(const CRelAngle& angle) { *this *= fromAxisAngle({1.0f, 0.0f, 0.0f}, angle); }
void rotateY(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 1.0f, 0.0f}, angle); }
void rotateZ(const CRelAngle& angle) { *this *= fromAxisAngle({0.0f, 0.0f, 1.0f}, angle); }
static inline CVector3f rotate(const CQuaternion& rotation, const CAxisAngle& v)
{
static CVector3f rotate(const CQuaternion& rotation, const CAxisAngle& v) {
CQuaternion q = rotation * v;
q *= rotation.inverse();
return {q.x, q.y, q.z};
return {q.mSimd.shuffle<1, 2, 3, 3>()};
}
static CQuaternion lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng);
CVector3f transform(const CVector3f& v) const
{
CVector3f transform(const CVector3f& v) const {
CQuaternion r(0.f, v);
return (*this * r * inverse()).getImaginary();
}
@ -150,57 +154,75 @@ public:
CQuaternion exp() const;
inline CTransform toTransform() const { return CTransform(CMatrix3f(*this)); }
inline CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
inline float dot(const CQuaternion& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
CTransform toTransform() const { return CTransform(CMatrix3f(*this)); }
CTransform toTransform(const zeus::CVector3f& origin) const { return CTransform(CMatrix3f(*this), origin); }
float dot(const CQuaternion& rhs) const {
return mSimd.dot4(rhs.mSimd);
}
static CQuaternion lerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion slerpShort(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion nlerp(const CQuaternion& a, const CQuaternion& b, double t);
static CQuaternion shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1);
static CQuaternion clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle);
inline float roll() const { return std::atan2(2.f * (x * y + w * z), w * w + x * x - y * y - z * z); }
float roll() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[1] * f[2] + f[0] * f[3]), f[0] * f[0] + f[1] * f[1] - f[2] * f[2] - f[3] * f[3]);
}
inline float pitch() const { return std::atan2(2.f * (y * z + w * x), w * w - x * x - y * y + z * z); }
float pitch() const {
simd_floats f(mSimd);
return std::atan2(2.f * (f[2] * f[3] + f[0] * f[1]), f[0] * f[0] - f[1] * f[1] - f[2] * f[2] + f[3] * f[3]);
}
inline float yaw() const { return std::asin(-2.f * (x * z - w * y)); }
float yaw() const {
simd_floats f(mSimd);
return std::asin(-2.f * (f[1] * f[3] - f[0] * f[2]));
}
CQuaternion buildEquivalent() const;
zeus::CVector3f getImaginary() const { return {x, y, z}; }
void setImaginary(const zeus::CVector3f& i) { x = i.x; y = i.y; z = i.z; }
zeus::CVector3f getImaginary() const { return mSimd.shuffle<1, 2, 3, 3>(); }
void setImaginary(const zeus::CVector3f& i) {
x() = i.x();
y() = i.y();
z() = i.z();
}
CRelAngle angleFrom(const zeus::CQuaternion& other);
inline float& operator[](size_t idx) { assert(idx < 4); return (&w)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&w)[idx]; }
simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
union
{
__m128 mVec128;
struct
{
float w, x, y, z;
};
float v[4];
};
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float w() const { return mSimd[0]; }
float x() const { return mSimd[1]; }
float y() const { return mSimd[2]; }
float z() const { return mSimd[3]; }
simd<float>::reference w() { return mSimd[0]; }
simd<float>::reference x() { return mSimd[1]; }
simd<float>::reference y() { return mSimd[2]; }
simd<float>::reference z() { return mSimd[3]; }
simd<float> mSimd;
static const CQuaternion skNoRotation;
@ -210,60 +232,79 @@ public:
/** Non-unit quaternion, no guarantee that it's normalized.
* Converting to CQuaternion will perform normalize operation.
*/
class alignas(16) CNUQuaternion
{
class CNUQuaternion {
public:
CNUQuaternion() : w(1.0f), x(0.0f), y(0.0f), z(0.0f) {}
CNUQuaternion(float wi, float xi, float yi, float zi) : w(wi), x(xi), y(yi), z(zi) {}
CNUQuaternion(float win, const zeus::CVector3f& vec) { w = win; x = vec.x; y = vec.y; z = vec.z; }
CNUQuaternion(const CQuaternion& other) { w = other.w; x = other.x; y = other.y; z = other.z; }
CNUQuaternion() : mSimd(1.f, 0.f, 0.f, 0.f) {}
CNUQuaternion(float wi, float xi, float yi, float zi) : mSimd(wi, xi, yi, zi) {}
CNUQuaternion(float win, const zeus::CVector3f& vec) : mSimd(vec.mSimd.shuffle<0, 0, 1, 2>()) {
w() = win;
}
CNUQuaternion(const CQuaternion& other) : mSimd(other.mSimd) {}
CNUQuaternion(const CMatrix3f& mtx) : CNUQuaternion(CQuaternion(mtx)) {}
static inline CNUQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle)
{
CNUQuaternion(const simd<float>& s) : mSimd(s) {}
static CNUQuaternion fromAxisAngle(const CUnitVector3f& axis, const CRelAngle& angle) {
return CNUQuaternion(CQuaternion::fromAxisAngle(axis, angle));
}
float magnitude() const { return std::sqrt(magSquared()); }
float magSquared() const { return w * w + x * x + y * y + z * z; }
void normalize()
{
float magSquared() const { return mSimd.dot4(mSimd); }
void normalize() {
float magDiv = 1.f / magnitude();
w *= magDiv;
x *= magDiv;
y *= magDiv;
z *= magDiv;
mSimd *= magDiv;
}
CNUQuaternion normalized() const
{
CNUQuaternion normalized() const {
float magDiv = 1.f / magnitude();
return { w * magDiv, x * magDiv, y * magDiv, z * magDiv };
return mSimd * simd<float>(magDiv);
}
CNUQuaternion operator*(const CNUQuaternion& q) const;
CNUQuaternion operator*(float f) const;
const CNUQuaternion& operator+=(const CNUQuaternion& q);
inline float& operator[](size_t idx) { assert(idx < 4); return (&w)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&w)[idx]; }
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
union
{
__m128 mVec128;
struct
{
float w, x, y, z;
};
};
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float w() const { return mSimd[0]; }
float x() const { return mSimd[1]; }
float y() const { return mSimd[2]; }
float z() const { return mSimd[3]; }
simd<float>::reference w() { return mSimd[0]; }
simd<float>::reference x() { return mSimd[1]; }
simd<float>::reference y() { return mSimd[2]; }
simd<float>::reference z() { return mSimd[3]; }
simd<float> mSimd;
};
inline CQuaternion CQuaternion::fromNUQuaternion(const CNUQuaternion& q)
{
inline CQuaternion CQuaternion::fromNUQuaternion(const CNUQuaternion& q) {
auto norm = q.normalized();
return { norm.w, norm.x, norm.y, norm.z };
return norm.mSimd;
}
CQuaternion operator+(float lhs, const CQuaternion& rhs);
CQuaternion operator-(float lhs, const CQuaternion& rhs);
CQuaternion operator*(float lhs, const CQuaternion& rhs);
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs);
}

View File

@ -1,28 +1,26 @@
#pragma once
#include "zeus/CVector2f.hpp"
namespace zeus
{
class CRectangle
{
namespace zeus {
class CRectangle {
public:
CRectangle() {}
CRectangle(float x, float y, float w, float h) : position(x, y), size(w, h) {}
inline bool contains(const CVector2f& point) const
{
if (point.x < position.x || point.x > position.x + size.x)
bool contains(const CVector2f& point) const {
if (point.x() < position.x() || point.x() > position.x() + size.x())
return false;
if (point.y < position.y || point.y > position.y + size.y)
if (point.y() < position.y() || point.y() > position.y() + size.y())
return false;
return true;
}
inline bool intersects(const CRectangle& rect) const
{
return !(position.x > rect.position.x + rect.size.x || rect.position.x > position.x + size.x ||
position.y > rect.position.y + rect.size.y || rect.position.y > position.y + size.y);
bool intersects(const CRectangle& rect) const {
return !(position.x() > rect.position.x() + rect.size.x() || rect.position.x() > position.x() + size.x() ||
position.y() > rect.position.y() + rect.size.y() || rect.position.y() > position.y() + size.y());
}
CVector2f position;

View File

@ -4,17 +4,14 @@
#include "zeus/Math.hpp"
#include <cmath>
namespace zeus
{
namespace zeus {
/**
* @brief The CRelAngle class represents relative angle in radians
*/
struct CRelAngle
{
struct CRelAngle {
float angle = 0.f;
static float MakeRelativeAngle(float angle)
{
static float MakeRelativeAngle(float angle) {
float absAngle = std::fabs(angle);
if (absAngle == 2.f * M_PIF)
return std::copysign(absAngle, angle);
@ -23,32 +20,76 @@ struct CRelAngle
}
CRelAngle() = default;
CRelAngle(float angle) : angle(MakeRelativeAngle(angle)) {}
CRelAngle& operator=(float ang) { angle = MakeRelativeAngle(ang); return *this; }
CRelAngle& operator=(const CRelAngle& ang) { angle = ang.angle; return *this; }
CRelAngle& operator=(float ang) {
angle = MakeRelativeAngle(ang);
return *this;
}
CRelAngle& operator=(const CRelAngle& ang) {
angle = ang.angle;
return *this;
}
float asDegrees() const { return radToDeg(angle); }
float asRadians() const { return angle; }
float arcCosine() const { return std::acos(angle); }
static CRelAngle FromDegrees(float angle)
{
static CRelAngle FromDegrees(float angle) {
CRelAngle ret;
ret.angle = MakeRelativeAngle(degToRad(angle));
return ret;
}
operator float() const { return angle; }
static CRelAngle FromRadians(float angle) { return CRelAngle(angle); }
bool operator <(const CRelAngle& other) const { return angle < other.angle; }
CRelAngle& operator +=(const CRelAngle& other) { angle = MakeRelativeAngle(angle + other.angle); return *this; }
CRelAngle& operator +=(float r) { angle = MakeRelativeAngle(angle + r); return *this; }
CRelAngle& operator -=(const CRelAngle& other) { angle = MakeRelativeAngle(angle - other.angle); return *this; }
CRelAngle& operator -=(float r) { angle = MakeRelativeAngle(angle - r); return *this; }
CRelAngle& operator *=(const CRelAngle& other) { angle = MakeRelativeAngle(angle * other.angle); return *this; }
CRelAngle& operator *=(float r) { angle = MakeRelativeAngle(angle * r); return *this; }
CRelAngle& operator /=(const CRelAngle& other) { angle = MakeRelativeAngle(angle / other.angle); return *this; }
CRelAngle& operator /=(float r) { angle = MakeRelativeAngle(angle / r); return *this; }
bool operator<(const CRelAngle& other) const { return angle < other.angle; }
CRelAngle& operator+=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle + other.angle);
return *this;
}
CRelAngle& operator+=(float r) {
angle = MakeRelativeAngle(angle + r);
return *this;
}
CRelAngle& operator-=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle - other.angle);
return *this;
}
CRelAngle& operator-=(float r) {
angle = MakeRelativeAngle(angle - r);
return *this;
}
CRelAngle& operator*=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle * other.angle);
return *this;
}
CRelAngle& operator*=(float r) {
angle = MakeRelativeAngle(angle * r);
return *this;
}
CRelAngle& operator/=(const CRelAngle& other) {
angle = MakeRelativeAngle(angle / other.angle);
return *this;
}
CRelAngle& operator/=(float r) {
angle = MakeRelativeAngle(angle / r);
return *this;
}
};
}

View File

@ -2,19 +2,14 @@
#include "zeus/CVector3f.hpp"
namespace zeus
{
class alignas(16) CSphere
{
namespace zeus {
class CSphere {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CSphere(const CVector3f& position, float radius) : position(position), radius(radius) {}
inline CVector3f getSurfaceNormal(const CVector3f& coord) const { return (coord - position).normalized(); }
CVector3f getSurfaceNormal(const CVector3f& coord) const { return (coord - position).normalized(); }
inline bool intersects(const CSphere& other)
{
bool intersects(const CSphere& other) {
float dist = (position - other.position).magnitude();
return dist < (radius + other.radius);
}

View File

@ -8,102 +8,107 @@
#include <cstdint>
#include <cstdio>
namespace zeus
{
class alignas(16) CTransform
{
namespace zeus {
class CTransform {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CTransform() : basis(false) {}
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero) : basis(basis), origin(offset) {}
#if ZE_ATHENA_TYPES
CTransform(const atVec4f* mtx) : basis(mtx[0], mtx[1], mtx[2]), origin(mtx[0].vec[3], mtx[1].vec[3], mtx[2].vec[3]) {}
void read34RowMajor(athena::io::IStreamReader& r)
{
CTransform(const CMatrix3f& basis, const CVector3f& offset = CVector3f::skZero)
: basis(basis), origin(offset) {}
#if ZE_ATHENA_TYPES
CTransform(const atVec4f* mtx)
: basis(mtx[0], mtx[1], mtx[2])
, origin(mtx[0].simd[3], mtx[1].simd[3], mtx[2].simd[3]) {}
void read34RowMajor(athena::io::IStreamReader& r) {
atVec4f r0 = r.readVec4fBig();
atVec4f r1 = r.readVec4fBig();
atVec4f r2 = r.readVec4fBig();
basis = CMatrix3f(r0, r1, r2);
basis.transpose();
origin = CVector3f(r0.vec[3], r1.vec[3], r2.vec[3]);
origin = CVector3f(r0.simd[3], r1.simd[3], r2.simd[3]);
}
#endif
/* Column constructor */
CTransform(const CVector3f& c0, const CVector3f& c1, const CVector3f& c2, const CVector3f& c3)
: basis(c0, c1, c2), origin(c3) {}
static inline CTransform Identity() { return CTransform(CMatrix3f::skIdentityMatrix3f); }
static CTransform Identity() {
return CTransform(CMatrix3f::skIdentityMatrix3f);
}
inline bool operator ==(const CTransform& other) const
{
bool operator==(const CTransform& other) const {
return origin == other.origin && basis == other.basis;
}
inline CTransform operator*(const CTransform& rhs) const
{
CTransform operator*(const CTransform& rhs) const {
return CTransform(basis * rhs.basis, origin + (basis * rhs.origin));
}
inline CTransform inverse() const
{
CTransform inverse() const {
CMatrix3f inv = basis.inverted();
return CTransform(inv, inv * -origin);
}
static inline CTransform Translate(const CVector3f& position) { return {CMatrix3f::skIdentityMatrix3f, position}; }
static CTransform Translate(const CVector3f& position) {
return {CMatrix3f::skIdentityMatrix3f, position};
}
static inline CTransform Translate(float x, float y, float z) { return Translate({x, y, z}); }
static CTransform Translate(float x, float y, float z) {
return Translate({x, y, z});
}
inline CTransform operator+(const CVector3f& other) { return CTransform(basis, origin + other); }
CTransform operator+(const CVector3f& other) {
return CTransform(basis, origin + other);
}
inline CTransform& operator+=(const CVector3f& other)
{
CTransform& operator+=(const CVector3f& other) {
origin += other;
return *this;
}
inline CTransform operator-(const CVector3f& other) { return CTransform(basis, origin - other); }
CTransform operator-(const CVector3f& other) {
return CTransform(basis, origin - other);
}
inline CTransform& operator-=(const CVector3f& other)
{
CTransform& operator-=(const CVector3f& other) {
origin -= other;
return *this;
}
inline zeus::CVector3f rotate(const CVector3f& vec) const { return basis * vec; }
static inline CTransform RotateX(float theta)
{
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{1.f, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, cosT, sinT, 0.f}},
TVectorUnion{{0.f, -sinT, cosT, 0.f}}));
zeus::CVector3f rotate(const CVector3f& vec) const {
return basis * vec;
}
static inline CTransform RotateY(float theta)
{
static CTransform RotateX(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{cosT, 0.f, -sinT, 0.f}},
TVectorUnion{{0.f, 1.f, 0.f, 0.f}},
TVectorUnion{{sinT, 0.f, cosT, 0.f}}));
return CTransform(CMatrix3f(simd<float>{1.f, 0.f, 0.f, 0.f},
simd<float>{0.f, cosT, sinT, 0.f},
simd<float>{0.f, -sinT, cosT, 0.f}));
}
static inline CTransform RotateZ(float theta)
{
static CTransform RotateY(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(TVectorUnion{{cosT, sinT, 0.f, 0.f}},
TVectorUnion{{-sinT, cosT, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, 1.f, 0.f}}));
return CTransform(CMatrix3f(simd<float>{cosT, 0.f, -sinT, 0.f},
simd<float>{0.f, 1.f, 0.f, 0.f},
simd<float>{sinT, 0.f, cosT, 0.f}));
}
inline void rotateLocalX(float theta)
{
static CTransform RotateZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
return CTransform(CMatrix3f(simd<float>{cosT, sinT, 0.f, 0.f},
simd<float>{-sinT, cosT, 0.f, 0.f},
simd<float>{0.f, 0.f, 1.f, 0.f}));
}
void rotateLocalX(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
@ -118,8 +123,7 @@ public:
basis[2] -= b1;
}
inline void rotateLocalY(float theta)
{
void rotateLocalY(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
@ -134,8 +138,7 @@ public:
basis[0] -= b2;
}
inline void rotateLocalZ(float theta)
{
void rotateLocalZ(float theta) {
float sinT = std::sin(theta);
float cosT = std::cos(theta);
@ -150,66 +153,67 @@ public:
basis[1] -= b0;
}
inline CVector3f transposeRotate(const CVector3f& in) const
{
CVector3f transposeRotate(const CVector3f& in) const {
return CVector3f(basis[0].dot(in), basis[1].dot(in), basis[2].dot(in));
}
inline void scaleBy(float factor)
{
void scaleBy(float factor) {
CTransform xfrm(CMatrix3f(CVector3f(factor, factor, factor)));
*this = *this * xfrm;
}
static inline CTransform Scale(const CVector3f& factor)
{
return CTransform(CMatrix3f(TVectorUnion{{factor.x, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, factor.y, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, factor.z, 0.f}}));
static CTransform Scale(const CVector3f& factor) {
return CTransform(CMatrix3f(simd<float>{factor.x(), 0.f, 0.f, 0.f},
simd<float>{0.f, factor.y(), 0.f, 0.f},
simd<float>{0.f, 0.f, factor.z(), 0.f}));
}
static inline CTransform Scale(float x, float y, float z)
{
return CTransform(
CMatrix3f(TVectorUnion{{x, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, y, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, z, 0.f}}));
static CTransform Scale(float x, float y, float z) {
return CTransform(CMatrix3f(simd<float>{x, 0.f, 0.f, 0.f},
simd<float>{0.f, y, 0.f, 0.f},
simd<float>{0.f, 0.f, z, 0.f}));
}
static inline CTransform Scale(float factor)
{
return CTransform(CMatrix3f(TVectorUnion{{factor, 0.f, 0.f, 0.f}},
TVectorUnion{{0.f, factor, 0.f, 0.f}},
TVectorUnion{{0.f, 0.f, factor, 0.f}}));
static CTransform Scale(float factor) {
return CTransform(CMatrix3f(simd<float>{factor, 0.f, 0.f, 0.f},
simd<float>{0.f, factor, 0.f, 0.f},
simd<float>{0.f, 0.f, factor, 0.f}));
}
inline CTransform multiplyIgnoreTranslation(const CTransform& xfrm) const
{
CTransform multiplyIgnoreTranslation(const CTransform& xfrm) const {
CTransform ret;
ret.basis = basis * xfrm.basis;
return ret;
}
inline CTransform getRotation() const
{
CTransform getRotation() const {
CTransform ret = *this;
ret.origin.zeroOut();
return ret;
}
void setRotation(const CMatrix3f& mat) { basis = mat; }
void setRotation(const CTransform& xfrm) { setRotation(xfrm.basis); }
void setRotation(const CMatrix3f& mat) {
basis = mat;
}
void setRotation(const CTransform& xfrm) {
setRotation(xfrm.basis);
}
/**
* @brief buildMatrix3f Returns the stored matrix
* buildMatrix3f is here for compliance with Retro's Math API
* @return The Matrix (Neo, you are the one)
*/
inline const CMatrix3f& buildMatrix3f() const { return basis; }
const CMatrix3f& buildMatrix3f() const {
return basis;
}
inline CVector3f operator*(const CVector3f& other) const { return origin + basis * other; }
CVector3f operator*(const CVector3f& other) const {
return origin + basis * other;
}
inline CMatrix4f toMatrix4f() const
{
CMatrix4f toMatrix4f() const {
CMatrix4f ret(basis[0], basis[1], basis[2], origin);
ret[0][3] = 0.0f;
ret[1][3] = 0.0f;
@ -218,31 +222,26 @@ public:
return ret;
}
inline CVector3f upVector() const
{
return basis.vec[2];
CVector3f upVector() const {
return basis.m[2];
}
inline CVector3f frontVector() const
{
return basis.vec[1];
CVector3f frontVector() const {
return basis.m[1];
}
inline CVector3f rightVector() const
{
return basis.vec[0];
CVector3f rightVector() const {
return basis.m[0];
}
inline void orthonormalize()
{
void orthonormalize() {
basis[0].normalize();
basis[2] = basis[0].cross(basis[1]);
basis[2].normalize();
basis[1] = basis[2].cross(basis[0]);
}
void printMatrix() const
{
void printMatrix() const {
printf("%f %f %f %f\n"
"%f %f %f %f\n"
"%f %f %f %f\n"
@ -253,10 +252,9 @@ public:
0.f, 0.f, 0.f, 1.f);
}
static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec)
{
static zeus::CTransform MakeRotationsBasedOnY(const CUnitVector3f& uVec) {
uint32_t i;
if (uVec.y < uVec.x || uVec.z < uVec.y || uVec.z < uVec.x)
if (uVec.y() < uVec.x() || uVec.z() < uVec.y() || uVec.z() < uVec.x())
i = 2;
else
i = 1;
@ -271,10 +269,16 @@ public:
CVector3f origin;
};
static inline CTransform CTransformFromScaleVector(const CVector3f& scale) { return CTransform(CMatrix3f(scale)); }
static inline CTransform CTransformFromScaleVector(const CVector3f& scale) {
return CTransform(CMatrix3f(scale));
}
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec);
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin);
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle);
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up = CVector3f::skUp);
}

View File

@ -2,21 +2,17 @@
#include "zeus/CVector3f.hpp"
namespace zeus
{
class alignas(16) CUnitVector3f : public CVector3f
{
namespace zeus {
class CUnitVector3f : public CVector3f {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
CUnitVector3f() : CVector3f(0.f, 1.f, 0.f) {}
CUnitVector3f() : CVector3f(0, 1, 0) {}
CUnitVector3f(float x, float y, float z, bool doNormalize = true) : CVector3f(x, y, z)
{
CUnitVector3f(float x, float y, float z, bool doNormalize = true) : CVector3f(x, y, z) {
if (doNormalize && canBeNormalized())
normalize();
}
CUnitVector3f(const CVector3f& vec, bool doNormalize = true) : CVector3f(vec)
{
CUnitVector3f(const CVector3f& vec, bool doNormalize = true) : CVector3f(vec) {
if (doNormalize && canBeNormalized())
normalize();
}

View File

@ -2,431 +2,260 @@
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
#include "zeus/Math.hpp"
#include <cassert>
namespace zeus
{
class alignas(16) CVector2f
{
#if __atdna__
float clangVec __attribute__((__vector_size__(8)));
#endif
namespace zeus {
class CVector2f {
public:
// ZE_DECLARE_ALIGNED_ALLOCATOR();
union {
struct
{
float x, y;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
simd<float> mSimd;
CVector2f() : mSimd(0.f) {}
template <typename T>
CVector2f(const simd<T>& s) : mSimd(s) {}
inline CVector2f() { zeroOut(); }
#if __SSE__
CVector2f(const __m128& mVec128) : mVec128(mVec128)
{
v[2] = 0.0f;
v[3] = 0.0f;
}
#endif
#if ZE_ATHENA_TYPES
CVector2f(const atVec2f& vec)
#if __SSE__
: mVec128(vec.mVec128)
{
}
#else
{
x = vec.vec[0], y = vec.vec[1], v[2] = 0.0f, v[3] = 0.0f;
}
#endif
operator atVec2f&()
{
return *reinterpret_cast<atVec2f*>(v);
}
operator const atVec2f&() const
{
return *reinterpret_cast<const atVec2f*>(v);
CVector2f(const atVec2f& vec) : mSimd(vec.simd) {}
operator atVec2f&() {
return *reinterpret_cast<atVec2f*>(this);
}
void readBig(athena::io::IStreamReader& input)
{
x = input.readFloatBig();
y = input.readFloatBig();
v[2] = 0.0f;
v[3] = 0.0f;
operator const atVec2f&() const {
return *reinterpret_cast<const atVec2f*>(this);
}
static CVector2f ReadBig(athena::io::IStreamReader& input)
{
void readBig(athena::io::IStreamReader& input) {
mSimd[0] = input.readFloatBig();
mSimd[1] = input.readFloatBig();
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
static CVector2f ReadBig(athena::io::IStreamReader& input) {
CVector2f ret;
ret.readBig(input);
return ret;
}
#endif
explicit CVector2f(float xy) { splat(xy); }
void assign(float x, float y)
{
v[0] = x;
v[1] = y;
v[2] = 0.0f;
v[3] = 0.0f;
void assign(float x, float y) {
mSimd[0] = x;
mSimd[1] = y;
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
CVector2f(float x, float y) { assign(x, y); }
inline bool operator==(const CVector2f& rhs) const { return (x == rhs.x && y == rhs.y); }
inline bool operator!=(const CVector2f& rhs) const { return !(*this == rhs); }
inline bool operator<(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x < rhs.x || y < rhs.y);
#endif
}
inline bool operator<=(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x <= rhs.x || y <= rhs.y);
#endif
}
inline bool operator>(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x > rhs.x || y > rhs.y);
#endif
}
inline bool operator>=(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0);
#else
return (x >= rhs.x || y >= rhs.y);
#endif
bool operator==(const CVector2f& rhs) const {
return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1];
}
inline CVector2f operator+(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x + rhs.x, y + rhs.y);
#endif
bool operator!=(const CVector2f& rhs) const {
return mSimd[0] != rhs.mSimd[0] || mSimd[1] != rhs.mSimd[1];
}
inline CVector2f operator-(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x - rhs.x, y - rhs.y);
#endif
bool operator<(const CVector2f& rhs) const {
return mSimd[0] < rhs.mSimd[0] && mSimd[1] < rhs.mSimd[1];
}
inline CVector2f operator-() const
{
#if __SSE__
return CVector2f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#else
return CVector2f(-x, -y);
#endif
bool operator<=(const CVector2f& rhs) const {
return mSimd[0] <= rhs.mSimd[0] && mSimd[1] <= rhs.mSimd[1];
}
inline CVector2f operator*(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x * rhs.x, y * rhs.y);
#endif
bool operator>(const CVector2f& rhs) const {
return mSimd[0] > rhs.mSimd[0] && mSimd[1] > rhs.mSimd[1];
}
inline CVector2f operator/(const CVector2f& rhs) const
{
#if __SSE__
return CVector2f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector2f(x / rhs.x, y / rhs.y);
#endif
bool operator>=(const CVector2f& rhs) const {
return mSimd[0] >= rhs.mSimd[0] && mSimd[1] >= rhs.mSimd[1];
}
inline CVector2f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector2f(x + val, y + val);
#endif
CVector2f operator+(const CVector2f& rhs) const {
return mSimd + rhs.mSimd;
}
inline CVector2f operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CVector2f(x - val, y - val);
#endif
CVector2f operator-(const CVector2f& rhs) const {
return mSimd - rhs.mSimd;
}
inline CVector2f operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x * val, y * val);
#endif
CVector2f operator-() const {
return -mSimd;
}
inline CVector2f operator/(float val) const
{
CVector2f operator*(const CVector2f& rhs) const {
return mSimd * rhs.mSimd;
}
CVector2f operator/(const CVector2f& rhs) const {
return mSimd / rhs.mSimd;
}
CVector2f operator+(float val) const {
return mSimd + simd<float>(val);
}
CVector2f operator-(float val) const {
return mSimd - simd<float>(val);
}
CVector2f operator*(float val) const {
return mSimd * simd<float>(val);
}
CVector2f operator/(float val) const {
float ooval = 1.f / val;
#if __SSE__
TVectorUnion splat = {{ooval, ooval, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector2f(x * ooval, y * ooval);
#endif
return mSimd * simd<float>(ooval);
}
inline const CVector2f& operator+=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
#endif
const CVector2f& operator+=(const CVector2f& rhs) {
mSimd += rhs.mSimd;
return *this;
}
inline const CVector2f& operator-=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
#endif
const CVector2f& operator-=(const CVector2f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
inline const CVector2f& operator*=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
#endif
const CVector2f& operator*=(const CVector2f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
inline const CVector2f& operator/=(const CVector2f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
#endif
const CVector2f& operator/=(const CVector2f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
inline const CVector2f& operator+=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_add_ps(mVec128, splat.mVec128);
#else
x += rhs;
y += rhs;
#endif
const CVector2f& operator+=(float rhs) {
mSimd += simd<float>(rhs);
return *this;
}
inline const CVector2f& operator-=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_sub_ps(mVec128, splat.mVec128);
#else
x -= rhs;
y -= rhs;
#endif
const CVector2f& operator-=(float rhs) {
mSimd -= simd<float>(rhs);
return *this;
}
inline const CVector2f& operator*=(float rhs)
{
#if __SSE__
TVectorUnion splat = {{rhs, rhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x *= rhs;
y *= rhs;
#endif
const CVector2f& operator*=(float rhs) {
mSimd *= simd<float>(rhs);
return *this;
}
inline const CVector2f& operator/=(float rhs)
{
const CVector2f& operator/=(float rhs) {
float oorhs = 1.f / rhs;
#if __SSE__
TVectorUnion splat = {{oorhs, oorhs, 0.f, 0.0f}};
mVec128 = _mm_mul_ps(mVec128, splat.mVec128);
#else
x *= oorhs;
y *= oorhs;
#endif
mSimd /= simd<float>(oorhs);
return *this;
}
inline void normalize()
{
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector2f(mag);
}
inline CVector2f normalized() const
{
CVector2f normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
inline CVector2f perpendicularVector() const { return {-y, x}; }
CVector2f perpendicularVector() const { return {-y(), x()}; }
inline float cross(const CVector2f& rhs) const { return (x * rhs.y) - (y * rhs.x); }
inline float dot(const CVector2f& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1];
#endif
#else
return (x * rhs.x) + (y * rhs.y);
#endif
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x31);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1];
#endif
#else
return x * x + y * y;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
float cross(const CVector2f& rhs) const { return (x() * rhs.y()) - (y() * rhs.x()); }
inline void zeroOut()
{
float dot(const CVector2f& rhs) const {
return mSimd.dot2(rhs.mSimd);
}
float magSquared() const {
return mSimd.dot2(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
void zeroOut() {
*this = CVector2f::skZero;
}
inline void splat(float xy)
{
#if __SSE__
TVectorUnion splat = {{xy, xy, 0.0f, 0.0f}};
mVec128 = splat.mVec128;
#else
v[0] = xy;
v[1] = xy;
v[2] = 0.0f;
v[3] = 0.0f;
#endif
void splat(float xy) {
mSimd = zeus::simd<float>(xy);
}
static float getAngleDiff(const CVector2f& a, const CVector2f& b);
static inline CVector2f lerp(const CVector2f& a, const CVector2f& b, float t) { return (a + (b - a) * t); }
static inline CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) { return lerp(a, b, t).normalized(); }
static CVector2f lerp(const CVector2f& a, const CVector2f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
static CVector2f nlerp(const CVector2f& a, const CVector2f& b, float t) {
return lerp(a, b, t).normalized();
}
static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y))
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON;
}
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; }
bool isZero() const {
return magSquared() <= FLT_EPSILON;
}
inline bool isEqu(const CVector2f& other, float epsilon = 1.1920929e-7f)
{
bool isEqu(const CVector2f& other, float epsilon = FLT_EPSILON) {
const CVector2f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon);
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon);
}
inline float& operator[](size_t idx) { assert(idx < 2); return (&x)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 2); return (&x)[idx]; }
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 2);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 2);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
static const CVector2f skOne;
static const CVector2f skNegOne;
static const CVector2f skZero;
};
static inline CVector2f operator+(float lhs, const CVector2f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs + rhs.x, lhs + rhs.y);
#endif
static inline CVector2f operator+(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
}
static inline CVector2f operator-(float lhs, const CVector2f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs - rhs.x, lhs - rhs.y);
#endif
static inline CVector2f operator-(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
}
static inline CVector2f operator*(float lhs, const CVector2f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs * rhs.x, lhs * rhs.y);
#endif
static inline CVector2f operator*(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
}
static inline CVector2f operator/(float lhs, const CVector2f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, 0.0f, 0.0f}};
return CVector2f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector2f(lhs / rhs.x, lhs / rhs.y);
#endif
static inline CVector2f operator/(float lhs, const CVector2f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
}
}

View File

@ -5,54 +5,55 @@
#include "CVector2f.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#endif
namespace zeus
{
namespace zeus {
class CVector2i
{
class CVector2i {
public:
union {
struct
{
struct {
int x, y;
};
int v[2];
};
CVector2i() = default;
CVector2i(int xin, int yin) : x(xin), y(yin) {}
CVector2i(const CVector2f& vec) : x(int(vec.x)), y(int(vec.y)) {}
CVector2i(const CVector2f& vec) : x(int(vec.x())), y(int(vec.y())) {}
CVector2f toVec2f() const { return CVector2f(x, y); }
inline CVector2i operator+(const CVector2i& val) const
{
CVector2i operator+(const CVector2i& val) const {
return CVector2i(x + val.x, y + val.y);
}
inline CVector2i operator-(const CVector2i& val) const
{
CVector2i operator-(const CVector2i& val) const {
return CVector2i(x - val.x, y - val.y);
}
inline CVector2i operator*(const CVector2i& val) const
{
CVector2i operator*(const CVector2i& val) const {
return CVector2i(x * val.x, y * val.y);
}
inline CVector2i operator/(const CVector2i& val) const
{
CVector2i operator/(const CVector2i& val) const {
return CVector2i(x / val.x, y / val.y);
}
inline bool operator==(const CVector2i& other) const
{
bool operator==(const CVector2i& other) const {
return x == other.x && y == other.y;
}
inline bool operator!=(const CVector2i& other) const
{
bool operator!=(const CVector2i& other) const {
return x != other.x || y != other.y;
}
inline CVector2i operator*(int val) const
{
CVector2i operator*(int val) const {
return CVector2i(x * val, y * val);
}
};

View File

@ -1,288 +1,118 @@
#pragma once
#include <athena/Types.hpp>
#include "athena/Types.hpp"
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "TVectorUnion.hpp"
#include "zeus/CVector3f.hpp"
namespace zeus
{
class alignas(32) CVector3d
{
namespace zeus {
class CVector3d {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR32();
CVector3d() { zeroOut(); }
zeus::simd<double> mSimd;
CVector3d() : mSimd(0.0) {}
template <typename T>
CVector3d(const simd<T>& s) : mSimd(s) {}
#if __AVX__
CVector3d(const __m256d& mVec256)
{
this->mVec256 = mVec256;
v[3] = 0.0;
}
#elif __SSE__
CVector3d(const __m128d mVec128[2])
{
this->mVec128[0] = mVec128[0];
this->mVec128[1] = mVec128[1];
v[3] = 0.0;
}
#endif
#if ZE_ATHENA_TYPES
CVector3d(const atVec3d& vec)
{
#if __AVX__
mVec256 = vec.mVec256;
#elif __SSE__
mVec128[0] = vec.mVec128[0];
mVec128[1] = vec.mVec128[1];
#else
x = v[0], y = v[1], z = v[2], v[3] = 0.0f;
#endif
}
CVector3d(const atVec3d& vec) : mSimd(vec.simd) {}
#endif
explicit CVector3d(double xyz) { splat(xyz); }
explicit CVector3d(double xyz) : mSimd(xyz) {}
CVector3d(const CVector3f& vec)
{
#if __AVX__
mVec256 = _mm256_cvtps_pd(vec.mVec128);
#elif __SSE__
mVec128[0] = _mm_cvtps_pd(vec.mVec128);
v[2] = vec[2];
#else
v[0] = vec[0];
v[1] = vec[1];
v[2] = vec[2];
v[3] = 0.0;
#endif
CVector3d(const CVector3f& vec) : mSimd(vec.mSimd) {}
CVector3d(double x, double y, double z) : mSimd(x, y, z) {}
CVector3f asCVector3f() {
return mSimd;
}
CVector3d(double x, double y, double z)
{
#if __AVX__
TDblVectorUnion splat{{x, y, z, 0.0}};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat{{x, y, z, 0.0}};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
#else
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = 0.0;
#endif
double magSquared() const {
return mSimd.dot3(mSimd);
}
CVector3f asCVector3f()
{
#if __AVX__
return CVector3f(_mm256_cvtpd_ps(mVec256));
#else
return CVector3f(float(x), float(y), float(z));
#endif
double magnitude() const {
return sqrt(magSquared());
}
double magSquared() const
{
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], mVec128[0], 0x31);
return result.v[0] + (v[2] * v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
CVector3d cross(const CVector3d& rhs) const {
return {y() * rhs.z() - z() * rhs.y(),
z() * rhs.x() - x() * rhs.z(),
x() * rhs.y() - y() * rhs.x()};
}
double magnitude() const { return sqrt(magSquared()); }
inline CVector3d cross(const CVector3d& rhs) const
{
return {y * rhs.z - z * rhs.y,
z * rhs.x - x * rhs.z,
x * rhs.y - y * rhs.x};
double dot(const CVector3d& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
double dot(const CVector3d& rhs) const
{
#if __SSE__
TDblVectorUnion result;
#if __SSE4_1__
result.mVec128[0] = _mm_dp_pd(mVec128[0], rhs.mVec128[0], 0x31);
return result.v[0] + (v[2] * rhs.v[2]);
#else
result.mVec128[0] = _mm_mul_pd(mVec128[0], rhs.mVec128[0]);
result.mVec128[1] = _mm_mul_pd(mVec128[1], rhs.mVec128[1]);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
}
CVector3d asNormalized()
{
CVector3d asNormalized() {
double mag = magnitude();
mag = 1.0 / mag;
return {x * mag, y * mag, z * mag};
return mSimd * zeus::simd<double>(mag);
}
void splat(double xyz)
{
#if __AVX__
TDblVectorUnion splat = {{xyz, xyz, xyz, 0.0}};
mVec256 = splat.mVec256;
#elif __SSE__
TDblVectorUnion splat = {{xyz, xyz, xyz, 0.0}};
mVec128[0] = splat.mVec128[0];
mVec128[1] = splat.mVec128[1];
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = 0.0;
#endif
void splat(double xyz) {
mSimd = zeus::simd<double>(xyz);
}
void zeroOut()
{
void zeroOut() {
*this = skZero;
}
inline CVector3d operator+(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_add_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_add_pd(mVec128[0], rhs.mVec128[0]),
_mm_add_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x + rhs.x, y + rhs.y, z + rhs.z);
#endif
}
inline CVector3d operator-(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_sub_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_sub_pd(mVec128[0], rhs.mVec128[0]),
_mm_sub_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x - rhs.x, y - rhs.y, z - rhs.z);
#endif
}
inline CVector3d operator*(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_mul_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_mul_pd(mVec128[0], rhs.mVec128[0]),
_mm_mul_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x * rhs.x, y * rhs.y, z * rhs.z);
#endif
}
inline CVector3d operator/(const CVector3d& rhs) const
{
#if __AVX__
return _mm256_div_pd(mVec256, rhs.mVec256);
#elif __SSE__
const __m128d tmpVec128[2] = {_mm_div_pd(mVec128[0], rhs.mVec128[0]),
_mm_div_pd(mVec128[1], rhs.mVec128[1])};
return CVector3d(tmpVec128);
#else
return CVector3d(x / rhs.x, y / rhs.y, z / rhs.z);
#endif
CVector3d operator+(const CVector3d& rhs) const {
return mSimd + rhs.mSimd;
}
inline double& operator[](size_t idx) { assert(idx < 3); return v[idx]; }
inline const double& operator[](size_t idx) const { assert(idx < 3); return v[idx]; }
CVector3d operator-(const CVector3d& rhs) const {
return mSimd - rhs.mSimd;
}
union {
struct
{
double x, y, z;
};
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif
};
CVector3d operator*(const CVector3d& rhs) const {
return mSimd * rhs.mSimd;
}
CVector3d operator/(const CVector3d& rhs) const {
return mSimd / rhs.mSimd;
}
zeus::simd<double>::reference operator[](size_t idx) {
assert(idx < 3);
return mSimd[idx];
}
double operator[](size_t idx) const {
assert(idx < 3);
return mSimd[idx];
}
double x() const { return mSimd[0]; }
double y() const { return mSimd[1]; }
double z() const { return mSimd[2]; }
simd<double>::reference x() { return mSimd[0]; }
simd<double>::reference y() { return mSimd[1]; }
simd<double>::reference z() { return mSimd[2]; }
static const CVector3d skZero;
};
static inline CVector3d operator+(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_add_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_add_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_add_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs + rhs.x, lhs + rhs.y, lhs + rhs.z};
#endif
static inline CVector3d operator+(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) + rhs.mSimd;
}
static inline CVector3d operator-(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_sub_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_sub_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_sub_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs - rhs.x, lhs - rhs.y, lhs - rhs.z};
#endif
static inline CVector3d operator-(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) - rhs.mSimd;
}
static inline CVector3d operator*(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_mul_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_mul_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_mul_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs * rhs.x, lhs * rhs.y, lhs * rhs.z};
#endif
static inline CVector3d operator*(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) * rhs.mSimd;
}
static inline CVector3d operator/(double lhs, const CVector3d& rhs) {
return zeus::simd<double>(lhs) / rhs.mSimd;
}
static inline CVector3d operator/(double lhs, const CVector3d& rhs)
{
#if __AVX__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
return _mm256_div_pd(splat.mVec256, rhs.mVec256);
#elif __SSE__
TDblVectorUnion splat{{lhs, lhs, lhs, 0}};
splat.mVec128[0] = _mm_div_pd(splat.mVec128[0], rhs.mVec128[0]);
splat.mVec128[1] = _mm_div_pd(splat.mVec128[1], rhs.mVec128[1]);
return {splat.mVec128};
#else
return {lhs.x / rhs.x, lhs.y / rhs.y, lhs.z / rhs.z};
#endif
}
}

View File

@ -3,362 +3,210 @@
#include "Global.hpp"
#include "zeus/Math.hpp"
#include "zeus/CVector2f.hpp"
#include "TVectorUnion.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#include "athena/IStreamReader.hpp"
#endif
namespace zeus
{
namespace zeus {
class CVector3d;
class alignas(16) CVector3f
{
#if __atdna__
float clangVec __attribute__((__vector_size__(12)));
#endif
class CVector3f {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
zeus::simd<float> mSimd;
CVector3f() : mSimd(0.f) {}
union {
struct
{
float x, y, z;
};
float v[4];
#if __SSE__
__m128 mVec128;
#elif __GEKKO_PS__
ps128_t mVec128;
#endif
};
inline CVector3f() { zeroOut(); }
#if __SSE__ || __GEKKO_PS__
CVector3f(const __m128& mVec128) : mVec128(mVec128) { v[3] = 0.0f; }
#endif
template <typename T>
CVector3f(const simd<T>& s) : mSimd(s) {}
#if ZE_ATHENA_TYPES
CVector3f(const atVec3f& vec)
#if __SSE__ || __GEKKO_PS__
: mVec128(vec.mVec128)
{
}
#else
{
x = vec.vec[0], y = vec.vec[1], z = vec.vec[2], v[3] = 0.0f;
}
#endif
operator atVec3f&()
{
return *reinterpret_cast<atVec3f*>(v);
}
operator const atVec3f&() const
{
return *reinterpret_cast<const atVec3f*>(v);
CVector3f(const atVec3f& vec) : mSimd(vec.simd) {}
operator atVec3f&() {
return *reinterpret_cast<atVec3f*>(this);
}
void readBig(athena::io::IStreamReader& input)
{
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
v[3] = 0.0f;
operator const atVec3f&() const {
return *reinterpret_cast<const atVec3f*>(this);
}
static CVector3f ReadBig(athena::io::IStreamReader& input)
{
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = 0.0f;
mSimd.copy_from(f);
}
static CVector3f ReadBig(athena::io::IStreamReader& input) {
CVector3f ret;
ret.readBig(input);
return ret;
}
#endif
CVector3f(const CVector3d& vec);
explicit CVector3f(float xyz) { splat(xyz); }
void assign(float x, float y, float z)
{
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = 0.0f;
}
CVector3f(float x, float y, float z) { assign(x, y, z); }
explicit CVector3f(float xyz) : mSimd(xyz) {}
CVector3f(const float* floats)
{
#if __SSE__
mVec128 = _mm_loadu_ps(floats);
#else
x = floats[0];
y = floats[1];
z = floats[2];
#endif
v[3] = 0.0f;
void assign(float x, float y, float z) {
mSimd = zeus::simd<float>(x, y, z);
}
CVector3f(const CVector2f& other)
{
x = other.x;
y = other.y;
z = 0.0f;
v[3] = 0.0f;
CVector3f(float x, float y, float z) : mSimd(x, y, z) {}
CVector3f(const float* floats) : mSimd(floats[0], floats[1], floats[2]) {}
CVector3f(const CVector2f& other) {
mSimd = other.mSimd;
mSimd[2] = 0.0f;
mSimd[3] = 0.0f;
}
inline CVector2f toVec2f() const
{
#if __SSE__
return CVector2f(mVec128);
#else
return CVector2f(x, y);
#endif
CVector2f toVec2f() const {
return CVector2f(mSimd);
}
inline bool operator==(const CVector3f& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); }
inline bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); }
inline CVector3f operator+(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_add_ps(mVec128, rhs.mVec128));
#elif __GEKKO_PS__
return CVector3f(__mm_gekko_add_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x + rhs.x, y + rhs.y, z + rhs.z);
#endif
bool operator==(const CVector3f& rhs) const {
return mSimd[0] == rhs.mSimd[0] && mSimd[1] == rhs.mSimd[1] && mSimd[2] == rhs.mSimd[2];
}
inline CVector3f operator-(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x - rhs.x, y - rhs.y, z - rhs.z);
#endif
bool operator!=(const CVector3f& rhs) const { return !(*this == rhs); }
CVector3f operator+(const CVector3f& rhs) const {
return mSimd + rhs.mSimd;
}
inline CVector3f operator-() const
{
#if __SSE__
return CVector3f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_neg_ps(mVec128));
#else
return CVector3f(-x, -y, -z);
#endif
CVector3f operator-(const CVector3f& rhs) const {
return mSimd - rhs.mSimd;
}
inline CVector3f operator*(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x * rhs.x, y * rhs.y, z * rhs.z);
#endif
CVector3f operator-() const {
return -mSimd;
}
inline CVector3f operator/(const CVector3f& rhs) const
{
#if __SSE__
return CVector3f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector3f(x / rhs.x, y / rhs.y, z / rhs.z);
#endif
CVector3f operator*(const CVector3f& rhs) const {
return mSimd * rhs.mSimd;
}
inline CVector3f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, 0.0f}};
return CVector3f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector3f(x + val, y + val, z + val);
#endif
CVector3f operator/(const CVector3f& rhs) const {
return mSimd / rhs.mSimd;
}
inline CVector3f operator-(float val) const
{
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{val, val, val, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_sub_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_sub_ps(mVec128, splat.mVec128));
#else
return CVector3f(x - val, y - val, z - val);
#endif
CVector3f operator+(float val) const {
return mSimd + zeus::simd<float>(val);
}
inline CVector3f operator*(float val) const
{
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{val, val, val, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x * val, y * val, z * val);
#endif
CVector3f operator-(float val) const {
return mSimd - zeus::simd<float>(val);
}
inline CVector3f operator/(float val) const
{
CVector3f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
}
CVector3f operator/(float val) const {
float ooval = 1.f / val;
#if __SSE__ || __GEKKO_PS__
TVectorUnion splat = {{ooval, ooval, ooval, 0.0f}};
#endif
#if __SSE__
return CVector3f(_mm_mul_ps(mVec128, splat.mVec128));
#elif __GEKKO_PS__
return CVector3f(_mm_gekko_mul_ps(mVec128, splat.mVec128));
#else
return CVector3f(x * ooval, y * ooval, z * ooval);
#endif
return mSimd * zeus::simd<float>(ooval);
}
inline const CVector3f& operator+=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#elif __GEKKO_PS__
mVec128 = _mm_gekko_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
z += rhs.z;
#endif
return *this;
}
inline const CVector3f& operator-=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
z -= rhs.z;
#endif
return *this;
}
inline const CVector3f& operator*=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
#endif
return *this;
}
inline const CVector3f& operator/=(const CVector3f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
z /= rhs.z;
#endif
const CVector3f& operator+=(const CVector3f& rhs) {
mSimd += rhs.mSimd;
return *this;
}
inline void normalize()
{
const CVector3f& operator-=(const CVector3f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
const CVector3f& operator*=(const CVector3f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
const CVector3f& operator/=(const CVector3f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
void normalize() {
float mag = 1.f / magnitude();
*this *= CVector3f(mag);
}
inline CVector3f normalized() const
{
CVector3f normalized() const {
float mag = 1.f / magnitude();
return *this * mag;
}
inline CVector3f cross(const CVector3f& rhs) const
{
return CVector3f(y * rhs.z - z * rhs.y,
z * rhs.x - x * rhs.z,
x * rhs.y - y * rhs.x);
CVector3f cross(const CVector3f& rhs) const {
return CVector3f(y() * rhs.z() - z() * rhs.y(),
z() * rhs.x() - x() * rhs.z(),
x() * rhs.y() - y() * rhs.x());
}
inline float dot(const CVector3f& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
#endif
float dot(const CVector3f& rhs) const {
return mSimd.dot3(rhs.mSimd);
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z;
#endif
float magSquared() const {
return mSimd.dot3(mSimd);
}
inline float magnitude() const { return std::sqrt(magSquared()); }
inline bool isNotInf() const
{
return !(std::isinf(x) || std::isinf(y) || std::isinf(z));
float magnitude() const {
return std::sqrt(magSquared());
}
inline bool isMagnitudeSafe() const
{
bool isNotInf() const {
return !(std::isinf(x()) || std::isinf(y()) || std::isinf(z()));
}
bool isMagnitudeSafe() const {
return isNotInf() && magSquared() >= 9.9999994e-29;
}
inline void zeroOut()
{
void zeroOut() {
*this = CVector3f::skZero;
}
inline void splat(float xyz)
{
#if __SSE__
TVectorUnion splat = {{xyz, xyz, xyz, 0.0f}};
mVec128 = splat.mVec128;
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = 0.0f;
#endif
void splat(float xyz) {
mSimd = zeus::simd<float>(xyz);
}
static float getAngleDiff(const CVector3f& a, const CVector3f& b);
static inline CVector3f lerp(const CVector3f& a, const CVector3f& b, float t) { return (a + (b - a) * t); }
static inline CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) { return lerp(a, b, t).normalized(); }
static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y) || std::isinf(z))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON;
static CVector3f lerp(const CVector3f& a, const CVector3f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
inline bool isZero() const { return magSquared() <= 1.1920929e-7f; }
static CVector3f nlerp(const CVector3f& a, const CVector3f& b, float t) {
return lerp(a, b, t).normalized();
}
inline void scaleToLength(float newLength)
{
static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()))
return false;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON || std::fabs(z()) >= FLT_EPSILON;
}
bool isZero() const {
return magSquared() <= FLT_EPSILON;
}
void scaleToLength(float newLength) {
float length = magSquared();
if (length < 1.1920929e-7f)
{
x = newLength, y = 0.f, z = 0.f;
if (length < FLT_EPSILON) {
mSimd[0] = newLength, mSimd[1] = 0.f, mSimd[2] = 0.f;
return;
}
@ -367,21 +215,34 @@ public:
*this *= CVector3f(scalar);
}
inline CVector3f scaledToLength(float newLength) const
{
CVector3f scaledToLength(float newLength) const {
CVector3f v = *this;
v.scaleToLength(newLength);
return v;
}
inline bool isEqu(const CVector3f& other, float epsilon = 1.1920929e-7f)
{
bool isEqu(const CVector3f& other, float epsilon = FLT_EPSILON) {
const CVector3f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon && diffVec.z <= epsilon);
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon && diffVec.z() <= epsilon);
}
inline float& operator[](size_t idx) { assert(idx < 3); return (&x)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 3); return (&x)[idx]; }
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 3);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 3);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
static const CVector3f skOne;
static const CVector3f skNegOne;
@ -396,47 +257,24 @@ public:
static const CVector3f skDegToRadVec;
static CVector3f radToDeg(const CVector3f& rad) { return rad * skRadToDegVec; }
static CVector3f degToRad(const CVector3f& deg) { return deg * skDegToRadVec; }
};
static inline CVector3f operator+(float lhs, const CVector3f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z);
#endif
static inline CVector3f operator+(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
}
static inline CVector3f operator-(float lhs, const CVector3f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z);
#endif
static inline CVector3f operator-(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
}
static inline CVector3f operator*(float lhs, const CVector3f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
#endif
static inline CVector3f operator*(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
}
static inline CVector3f operator/(float lhs, const CVector3f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, 0.0f}};
return CVector3f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector3f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z);
#endif
static inline CVector3f operator/(float lhs, const CVector3f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
}
}

View File

@ -1,420 +1,261 @@
#pragma once
#include "Global.hpp"
#include "TVectorUnion.hpp"
#include "zeus/CVector3f.hpp"
#if ZE_ATHENA_TYPES
#include <athena/IStreamReader.hpp>
#include "athena/IStreamReader.hpp"
#endif
#include "zeus/Math.hpp"
#include <cfloat>
#include <cassert>
namespace zeus
{
namespace zeus {
class CColor;
class alignas(16) CVector4f
{
#if __atdna__
float clangVec __attribute__((__vector_size__(16)));
#endif
class CVector4f {
public:
ZE_DECLARE_ALIGNED_ALLOCATOR();
union {
struct
{
float x, y, z, w;
};
float v[4];
#if __SSE__
__m128 mVec128;
#endif
};
zeus::simd<float> mSimd;
CVector4f() : mSimd(0.f) {}
template <typename T>
CVector4f(const simd<T>& s) : mSimd(s) {}
inline CVector4f() { zeroOut(); }
#if __SSE__
CVector4f(const __m128& mVec128) : mVec128(mVec128) {}
#endif
#if ZE_ATHENA_TYPES
CVector4f(const atVec4f& vec)
#if __SSE__
: mVec128(vec.mVec128)
{
CVector4f(const atVec4f& vec) : mSimd(vec.simd) {}
operator atVec4f&() {
return *reinterpret_cast<atVec4f*>(this);
}
#else
{
x = vec.vec[0], y = vec.vec[1], z = vec.vec[2], w = vec.vec[3];
operator const atVec4f&() const {
return *reinterpret_cast<const atVec4f*>(this);
}
void readBig(athena::io::IStreamReader& input) {
simd_floats f;
f[0] = input.readFloatBig();
f[1] = input.readFloatBig();
f[2] = input.readFloatBig();
f[3] = input.readFloatBig();
mSimd.copy_from(f);
}
#endif
operator atVec4f&()
{
return *reinterpret_cast<atVec4f*>(v);
}
operator const atVec4f&() const
{
return *reinterpret_cast<const atVec4f*>(v);
explicit CVector4f(float xyzw) : mSimd(xyzw) {}
void assign(float x, float y, float z, float w) {
mSimd = simd<float>(x, y, z, w);
}
void readBig(athena::io::IStreamReader& input)
{
x = input.readFloatBig();
y = input.readFloatBig();
z = input.readFloatBig();
w = input.readFloatBig();
}
#endif
CVector4f(float x, float y, float z, float w) : mSimd(x, y, z, w) {}
explicit CVector4f(float xyzw) { splat(xyzw); }
void assign(float x, float y, float z, float w)
{
v[0] = x;
v[1] = y;
v[2] = z;
v[3] = w;
}
CVector4f(float x, float y, float z, float w) { assign(x, y, z, w); }
CVector4f(const CColor& other);
CVector4f(const CVector3f& other, float wIn = 1.f)
{
#if __SSE__
mVec128 = other.mVec128;
#else
x = other.x;
y = other.y;
z = other.z;
#endif
w = wIn;
CVector4f(const CVector3f& other, float wIn = 1.f) : mSimd(other.mSimd) {
mSimd[3] = wIn;
}
static CVector4f ToClip(const zeus::CVector3f& v, float w)
{
static CVector4f ToClip(const zeus::CVector3f& v, float w) {
return CVector4f(v * w, w);
}
inline CVector3f toVec3f() const
{
#if __SSE__
return CVector3f(mVec128);
#else
return CVector3f(x, y, z);
#endif
CVector3f toVec3f() const {
return CVector3f(mSimd);
}
CVector4f& operator=(const CColor& other);
inline bool operator==(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpeq_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
#else
return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w);
#endif
bool operator==(const CVector4f& rhs) const {
auto eq_mask = mSimd == rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool operator!=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpneq_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
#else
return !(*this == rhs);
#endif
bool operator!=(const CVector4f& rhs) const {
auto eq_mask = mSimd != rhs.mSimd;
return eq_mask[0] || eq_mask[1] || eq_mask[2] || eq_mask[3];
}
inline bool operator<(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x < rhs.x || y < rhs.y || z < rhs.z || w < rhs.w);
#endif
bool operator<(const CVector4f& rhs) const {
auto eq_mask = mSimd < rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool operator<=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x <= rhs.x || y <= rhs.y || z <= rhs.z || w <= rhs.w);
#endif
bool operator<=(const CVector4f& rhs) const {
auto eq_mask = mSimd <= rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool operator>(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x > rhs.x || y > rhs.y || z > rhs.z || w > rhs.w);
#endif
bool operator>(const CVector4f& rhs) const {
auto eq_mask = mSimd > rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline bool operator>=(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion vec;
vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
#else
return (x >= rhs.x || y >= rhs.y || z >= rhs.z || w >= rhs.w);
#endif
bool operator>=(const CVector4f& rhs) const {
auto eq_mask = mSimd >= rhs.mSimd;
return eq_mask[0] && eq_mask[1] && eq_mask[2] && eq_mask[3];
}
inline CVector4f operator+(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_add_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w);
#endif
CVector4f operator+(const CVector4f& rhs) const {
return mSimd + rhs.mSimd;
}
inline CVector4f operator-(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_sub_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w);
#endif
CVector4f operator-(const CVector4f& rhs) const {
return mSimd - rhs.mSimd;
}
inline CVector4f operator-() const
{
#if __SSE__
return CVector4f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
#else
return CVector4f(-x, -y, -z, -w);
#endif
CVector4f operator-() const {
return -mSimd;
}
inline CVector4f operator*(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_mul_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w);
#endif
CVector4f operator*(const CVector4f& rhs) const {
return mSimd * rhs.mSimd;
}
inline CVector4f operator/(const CVector4f& rhs) const
{
#if __SSE__
return CVector4f(_mm_div_ps(mVec128, rhs.mVec128));
#else
return CVector4f(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w);
#endif
CVector4f operator/(const CVector4f& rhs) const {
return mSimd / rhs.mSimd;
}
inline CVector4f operator+(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_add_ps(mVec128, splat.mVec128));
#else
return CVector4f(x + val, y + val, z + val, w + val);
#endif
CVector4f operator+(float val) const {
return mSimd + zeus::simd<float>(val);
}
inline CVector4f operator-(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_sub_ps(mVec128, splat.mVec128));
#else
return CVector4f(x - val, y - val, z - val, w - val);
#endif
CVector4f operator-(float val) const {
return mSimd - zeus::simd<float>(val);
}
inline CVector4f operator*(float val) const
{
#if __SSE__
TVectorUnion splat = {{val, val, val, val}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x * val, y * val, z * val, w * val);
#endif
CVector4f operator*(float val) const {
return mSimd * zeus::simd<float>(val);
}
inline CVector4f operator/(float val) const
{
CVector4f operator/(float val) const {
float ooval = 1.f / val;
#if __SSE__
TVectorUnion splat = {{ooval, ooval, ooval, ooval}};
return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
#else
return CVector4f(x * ooval, y * ooval, z * ooval, w * ooval);
#endif
return mSimd * zeus::simd<float>(ooval);
}
inline const CVector4f& operator+=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
#else
x += rhs.x;
y += rhs.y;
z += rhs.z;
w += rhs.w;
#endif
const CVector4f& operator+=(const CVector4f& rhs) {
mSimd += rhs.mSimd;
return *this;
}
inline const CVector4f& operator-=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
#else
x -= rhs.x;
y -= rhs.y;
z -= rhs.z;
w -= rhs.w;
#endif
const CVector4f& operator-=(const CVector4f& rhs) {
mSimd -= rhs.mSimd;
return *this;
}
inline const CVector4f& operator*=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
#else
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
w *= rhs.w;
#endif
const CVector4f& operator*=(const CVector4f& rhs) {
mSimd *= rhs.mSimd;
return *this;
}
inline const CVector4f& operator/=(const CVector4f& rhs)
{
#if __SSE__
mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
#else
x /= rhs.x;
y /= rhs.y;
z /= rhs.z;
w /= rhs.w;
#endif
const CVector4f& operator/=(const CVector4f& rhs) {
mSimd /= rhs.mSimd;
return *this;
}
inline void normalize()
{
void normalize() {
float mag = magnitude();
mag = 1.f / mag;
*this *= CVector4f(mag);
}
inline CVector4f normalized() const
{
CVector4f normalized() const {
float mag = magnitude();
mag = 1.f / mag;
return *this * mag;
}
inline float dot(const CVector4f& rhs) const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
return result.v[0] + result.v[1] + result.v[2] + result.v[3];
#endif
#else
return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
#endif
float dot(const CVector4f& rhs) const {
return mSimd.dot4(rhs.mSimd);
}
inline float magSquared() const
{
#if __SSE__
TVectorUnion result;
#if __SSE4_1__
result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
return result.v[0];
#else
result.mVec128 = _mm_mul_ps(mVec128, mVec128);
return result.v[0] + result.v[1] + result.v[2];
#endif
#else
return x * x + y * y + z * z + w * w;
#endif
}
inline float magnitude() const { return std::sqrt(magSquared()); }
inline void zeroOut()
{
float magSquared() const {
return mSimd.dot4(mSimd);
}
float magnitude() const {
return std::sqrt(magSquared());
}
void zeroOut() {
*this = CVector4f::skZero;
}
inline void splat(float xyzw)
{
#if __SSE__
TVectorUnion splat = {{xyzw, xyzw, xyzw, xyzw}};
mVec128 = splat.mVec128;
#else
v[0] = xyz;
v[1] = xyz;
v[2] = xyz;
v[3] = xyzw;
#endif
void splat(float xyzw) {
mSimd = zeus::simd<float>(xyzw);
}
static inline CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) { return (a + (b - a) * t); }
static inline CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) { return lerp(a, b, t).normalized(); }
static CVector4f lerp(const CVector4f& a, const CVector4f& b, float t) {
return zeus::simd<float>(1.f - t) * a.mSimd + b.mSimd * zeus::simd<float>(t);
}
inline bool isNormalized() const { return std::fabs(1.f - magSquared()) < 0.01f; }
static CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t) {
return lerp(a, b, t).normalized();
}
inline bool canBeNormalized() const
{
if (std::isinf(x) || std::isinf(y) || std::isinf(z) || std::isinf(w))
bool isNormalized() const {
return std::fabs(1.f - magSquared()) < 0.01f;
}
bool canBeNormalized() const {
if (std::isinf(x()) || std::isinf(y()) || std::isinf(z()) || std::isinf(w()))
return false;
return std::fabs(x) >= FLT_EPSILON || std::fabs(y) >= FLT_EPSILON || std::fabs(z) >= FLT_EPSILON || std::fabs(w) >= FLT_EPSILON;
return std::fabs(x()) >= FLT_EPSILON || std::fabs(y()) >= FLT_EPSILON ||
std::fabs(z()) >= FLT_EPSILON || std::fabs(w()) >= FLT_EPSILON;
}
inline bool isEqu(const CVector4f& other, float epsilon = 1.1920929e-7f)
{
bool isEqu(const CVector4f& other, float epsilon = FLT_EPSILON) {
const CVector4f diffVec = other - *this;
return (diffVec.x <= epsilon && diffVec.y <= epsilon && diffVec.z <= epsilon && diffVec.w <= epsilon);
return (diffVec.x() <= epsilon && diffVec.y() <= epsilon &&
diffVec.z() <= epsilon && diffVec.w() <= epsilon);
}
inline float& operator[](size_t idx) { assert(idx < 4); return (&x)[idx]; }
inline const float& operator[](size_t idx) const { assert(idx < 4); return (&x)[idx]; }
zeus::simd<float>::reference operator[](size_t idx) {
assert(idx < 4);
return mSimd[idx];
}
float operator[](size_t idx) const {
assert(idx < 4);
return mSimd[idx];
}
float x() const { return mSimd[0]; }
float y() const { return mSimd[1]; }
float z() const { return mSimd[2]; }
float w() const { return mSimd[3]; }
simd<float>::reference x() { return mSimd[0]; }
simd<float>::reference y() { return mSimd[1]; }
simd<float>::reference z() { return mSimd[2]; }
simd<float>::reference w() { return mSimd[3]; }
static const CVector4f skOne;
static const CVector4f skNegOne;
static const CVector4f skZero;
};
static inline CVector4f operator+(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_add_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z, lhs + rhs.w);
#endif
static CVector4f operator+(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) + rhs.mSimd;
}
static inline CVector4f operator-(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z, lhs - rhs.w);
#endif
static CVector4f operator-(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) - rhs.mSimd;
}
static inline CVector4f operator*(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z, lhs * rhs.w);
#endif
static CVector4f operator*(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) * rhs.mSimd;
}
static CVector4f operator/(float lhs, const CVector4f& rhs) {
return zeus::simd<float>(lhs) / rhs.mSimd;
}
static inline CVector4f operator/(float lhs, const CVector4f& rhs)
{
#if __SSE__
TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
return CVector4f(_mm_div_ps(splat.mVec128, rhs.mVec128));
#else
return CVector4f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z, lhs / rhs.w);
#endif
}
}

View File

@ -1,61 +1,19 @@
#pragma once
#if _M_IX86_FP >= 1 || _M_X64
#define __SSE__ 1
#endif
#if __SSE__
#include <immintrin.h>
#ifndef _MSC_VER
#include <mm_malloc.h>
#endif
#define zeAlloc(sz, align) _mm_malloc(sz, align)
#define zeFree(ptr) _mm_free(ptr)
#elif GEKKO
#include <ps_intrins.h>
#define zeAlloc(sz, align) _ps_malloc(sz, align)
#define zeFree(ptr) _ps_free(ptr)
#endif
#if __SSE__ || __GEKKO_PS__
#define ZE_DECLARE_ALIGNED_ALLOCATOR() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 16); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 16); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() \
inline void* operator new(size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete(void* ptr) { zeFree(ptr); } \
inline void* operator new(size_t, void* ptr) { return ptr; } \
inline void operator delete(void*, void*) {} \
inline void* operator new[](size_t sizeInBytes) { return zeAlloc(sizeInBytes, 32); } \
inline void operator delete[](void* ptr) { zeFree(ptr); } \
inline void* operator new[](size_t, void* ptr) { return ptr; } \
inline void operator delete[](void*, void*) {} \
void __unused__()
#if ZE_ATHENA_TYPES
#include "athena/IStreamReader.hpp"
#include "athena/simd/simd.hpp"
#else
#define ZE_DECLARE_ALIGNED_ALLOCATOR() void __unused__()
#define ZE_DECLARE_ALIGNED_ALLOCATOR32() void __unused__()
#include "simd/simd.hpp"
#endif
#if __SSE__
#define ZE_SHUFFLE(x, y, z, w) ((w) << 6 | (z) << 4 | (y) << 2 | (x))
#define ze_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask))
#define ze_splat3_ps(_a, _i) ze_pshufd_ps((_a), ZE_SHUFFLE(_i, _i, _i, 3))
#define ze_splat_ps(_a, _i) ze_pshufd_ps((_a), ZE_SHUFFLE(_i, _i, _i, _i))
#if _WIN32
#define zeCastiTo128f(a) (_mm_castsi128_ps(a))
#else
#define zeCastiTo128f(a) ((__m128)(a))
#endif
#elif __GEKKO_PS__
namespace zeus {
#if ZE_ATHENA_TYPES
template<typename T> using simd = athena::simd<T>;
using simd_floats = athena::simd_floats;
using simd_doubles = athena::simd_doubles;
#endif
}
inline int rotr(int x, int n) { return ((x >> n) | (x << (32 - n))); }
inline int rotl(int x, int n) { return ((x << n) | (x >> (32 - n))); }

View File

@ -1,6 +1,7 @@
#pragma once
#include <cfloat>
#undef min
#undef max
@ -26,8 +27,7 @@
#include <cmath>
#include <algorithm>
namespace zeus
{
namespace zeus {
#if _MSC_VER
#if defined(_M_IX86)
@ -43,8 +43,7 @@ namespace zeus
#endif
#endif
struct CPUInfo
{
struct CPUInfo {
const char cpuBrand[48] = {0};
const char cpuVendor[32] = {0};
#if ZEUS_ARCH_X86_64 || ZEUS_ARCH_X86
@ -61,64 +60,83 @@ struct CPUInfo
const bool AVX2 = false;
#endif
};
/**
* Detects CPU capabilities and returns true if SSE4.1 or SSE4.2 is available
*/
void detectCPU();
const CPUInfo& cpuFeatures();
std::pair<bool, const CPUInfo&> validateCPU();
void getCpuInfo(int eax, int regs[4]);
void getCpuInfoEx(int eax, int ecx, int regs[4]);
class CVector3f;
class CVector2f;
class CTransform;
template <typename T>
inline constexpr T min(const T& a, const T& b)
{
template<typename T>
inline constexpr T min(const T& a, const T& b) {
return a < b ? a : b;
}
template <typename T>
inline constexpr T max(const T& a, const T& b)
{
template<typename T>
inline constexpr T max(const T& a, const T& b) {
return a > b ? a : b;
}
template <> CVector3f min(const CVector3f& a, const CVector3f& b);
template <> CVector3f max(const CVector3f& a, const CVector3f& b);
template <typename T>
inline constexpr T clamp(const T& a, const T& val, const T& b)
{
template<>
CVector3f min(const CVector3f& a, const CVector3f& b);
template<>
CVector3f max(const CVector3f& a, const CVector3f& b);
template<typename T>
inline constexpr T clamp(const T& a, const T& val, const T& b) {
return max<T>(a, min<T>(b, val));
}
inline constexpr float radToDeg(float rad) { return rad * (180.f / M_PIF); }
inline constexpr float degToRad(float deg) { return deg * (M_PIF / 180.f); }
inline constexpr double radToDeg(double rad) { return rad * (180.0 / M_PI); }
inline constexpr double degToRad(double deg) { return deg * (M_PI / 180.0); }
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary);
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t);
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f
getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t);
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d,
float t);
// Since round(double) doesn't exist in some <cmath> implementations
// we'll define our own
inline double round(double val) { return (val < 0.0 ? std::ceil(val - 0.5) : std::ceil(val + 0.5)); }
inline double powD(float a, float b) { return std::exp(b * std::log(a)); }
inline double invSqrtD(double val) { return 1.0 / std::sqrt(val); }
inline float invSqrtF(float val) { return float(1.0 / std::sqrt(val)); }
int floorPowerOfTwo(int x);
int ceilingPowerOfTwo(int x);
template <typename U>
typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x)
{
template<typename U>
typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, int>::type PopCount(U x) {
#if __GNUC__ >= 4
return __builtin_popcountll(x);
#else
@ -134,23 +152,21 @@ typename std::enable_if<!std::is_enum<U>::value && std::is_integral<U>::value, i
#endif
}
template <typename E>
typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e)
{
template<typename E>
typename std::enable_if<std::is_enum<E>::value, int>::type PopCount(E e) {
return PopCount(static_cast<typename std::underlying_type<E>::type>(e));
}
bool close_enough(const CVector3f &a, const CVector3f &b, float epsilon = 0.000099999997f);
bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon = 0.000099999997f);
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon = 0.000099999997f);
inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378752)
{
inline bool close_enough(float a, float b, double epsilon = 0.000009999999747378752) {
return std::fabs(a - b) < epsilon;
}
inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752)
{
inline bool close_enough(double a, double b, double epsilon = 0.000009999999747378752) {
return std::fabs(a - b) < epsilon;
}
}

View File

@ -1,22 +0,0 @@
#pragma once
namespace zeus
{
typedef union {
float v[4];
#if __SSE__
__m128 mVec128;
#endif
} TVectorUnion;
typedef union {
double v[4];
#if __AVX__
__m256d mVec256;
#endif
#if __SSE__
__m128d mVec128[2];
#endif
} TDblVectorUnion;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
#pragma once
#define _ZEUS_SIMD_INCLUDED
namespace zeus::_simd { using namespace std; }
#include "parallelism_v2_simd.hpp"
#if _M_IX86_FP >= 1 || _M_X64
#define __SSE__ 1
#endif
#if __AVX__
#include "simd_avx.hpp"
#elif __SSE__
#include "simd_sse.hpp"
#else
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = fixed_size<4>; };
template<> struct zeus_native<double> { using type = fixed_size<4>; };
}
#endif
namespace zeus {
template<typename T> using simd = _simd::simd<T,
typename _simd::simd_abi::zeus_native<T>::type>;
template<typename T>
using simd_values = _simd::simd_data<simd<T>>;
using simd_floats = simd_values<float>;
using simd_doubles = simd_values<double>;
}

View File

@ -0,0 +1,188 @@
#pragma once
#ifndef _ZEUS_SIMD_INCLUDED
#error simd_avx.hpp must not be included directly. Include simd.hpp instead.
#endif
#include "simd_sse.hpp"
#include <immintrin.h>
namespace zeus::_simd {
// __m256d storage for AVX
template<>
class __simd_storage<double, m256d_abi> {
public:
using storage_type = __m256d;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, double __val) noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm256_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_ = _mm256_set_pd(d, c, b, a);
}
void __broadcast(double __val) noexcept {
__storage_ = _mm256_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
}
double __dot3(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
}
double __dot4(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
}
void __copy_from(const simd_data<simd<double, m256d_abi>>& __buffer) noexcept {
__storage_ = _mm256_load_pd(__buffer.data());
}
void __copy_to(simd_data<simd<double, m256d_abi>>& __buffer) const noexcept {
_mm256_store_pd(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_ = _mm256_cvtps_pd(other.__storage_);
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m256d mask storage for AVX
template<>
class __simd_mask_storage<double, m256d_abi> : public __simd_storage<double, m256d_abi> {
public:
bool __get(size_t __index) const noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT64_MAX : 0;
__storage_ = _mm256_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m256d_abi> simd<double, m256d_abi>::operator-() const {
return _mm256_xor_pd(__s_.__storage_, _mm256_set1_pd(-0.0));
}
inline simd<double, m256d_abi>
operator+(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator-(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator*(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator/(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>&
operator+=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator-=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator*=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator/=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>::mask_type
operator==(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_EQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator!=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_NEQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GT_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LT_OQ);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m256d_abi>& other) {
__storage_ = _mm256_cvtpd_ps(other.__storage_);
}
namespace simd_abi {
template<> struct zeus_native<double> { using type = m256d_abi; };
} // namespace simd_abi
} // namespace zeus::_simd

View File

@ -0,0 +1,455 @@
#pragma once
#ifndef _ZEUS_SIMD_INCLUDED
#error simd_sse.hpp must not be included directly. Include simd.hpp instead.
#endif
#include <xmmintrin.h>
#if __SSE4_1__
#include <smmintrin.h>
#endif
namespace zeus::_simd {
// __m128 ABI
using m128_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 1), 4>;
// __m128d ABI
using m128d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 2), 4>;
#ifdef __AVX__
// __m256d ABI
using m256d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 3), 4>;
#endif
template <>
class __simd_storage<double, m128d_abi>;
#ifdef __AVX__
template <>
class __simd_storage<double, m256d_abi>;
#endif
// __m128 storage for SSE2+
template <>
class __simd_storage<float, m128_abi> {
public:
using storage_type = __m128;
storage_type __storage_;
float __get(size_t __index) const noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, float __val) noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm_load_ps(sse_data.data());
}
void __set4(float a, float b, float c, float d) noexcept {
__storage_ = _mm_set_ps(d, c, b, a);
}
void __broadcast(float __val) noexcept {
__storage_ = _mm_set1_ps(__val);
}
float __dot2(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x3F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
#endif
}
float __dot3(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x7F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
#endif
}
float __dot4(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0xFF));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
#endif
}
template<int x, int y, int z, int w>
__simd_storage __shuffle() const noexcept {
__simd_storage s;
s.__storage_ = _mm_shuffle_ps(__storage_, __storage_, _MM_SHUFFLE(w, z, y, x));
return s;
}
void __copy_from(const simd_data<simd<float, m128_abi>>& __buffer) noexcept {
__storage_ = _mm_load_ps(__buffer.data());
}
void __copy_to(simd_data<simd<float, m128_abi>>& __buffer) const noexcept {
_mm_store_ps(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<double, m128d_abi>& other);
#ifdef __AVX__
explicit __simd_storage(const __simd_storage<double, m256d_abi>& other);
#endif
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128 mask storage for SSE2+
template <>
class __simd_mask_storage<float, m128_abi> : public __simd_storage<float, m128_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT32_MAX : 0;
__storage_ = _mm_load_ps(reinterpret_cast<float*>(sse_data));
}
};
template <>
inline simd<float, m128_abi> simd<float, m128_abi>::operator-() const {
return _mm_xor_ps(__s_.__storage_, _mm_set1_ps(-0.f));
}
inline simd<float, m128_abi>
operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator-(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator*(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator/(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>&
operator+=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator-=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator*=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator/=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>::mask_type
operator==(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpeq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator!=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpneq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpge_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmple_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpgt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmplt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
// __m128d storage for SSE2+
template <>
class __simd_storage<double, m128d_abi> {
public:
using storage_type = std::array<__m128d, 2>;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
return sse_data[__index % 2];
}
void __set(size_t __index, double __val) noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
sse_data[__index % 2] = __val;
__storage_[__index / 2] = _mm_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_[0] = _mm_set_pd(b, a);
__storage_[1] = _mm_set_pd(d, c);
}
void __broadcast(double __val) noexcept {
for (int i = 0; i < 2; ++i)
__storage_[i] = _mm_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
return ret;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
return sse_data[0] + sse_data[1];
#endif
}
double __dot3(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return ret + sse_data2[0];
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0];
#endif
}
double __dot4(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
double ret2;
_mm_store_sd(&ret2, _mm_dp_pd(__storage_[1], other.__storage_[1], 0x3F));
return ret + ret2;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0] + sse_data2[1];
#endif
}
void __copy_from(const simd_data<simd<double, m128d_abi>>& __buffer) noexcept {
__storage_[0] = _mm_load_pd(__buffer.data());
__storage_[1] = _mm_load_pd(__buffer.data() + 2);
}
void __copy_to(simd_data<simd<double, m128d_abi>>& __buffer) const noexcept {
_mm_store_pd(__buffer.data(), __storage_[0]);
_mm_store_pd(__buffer.data() + 2, __storage_[1]);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_[0] = _mm_cvtps_pd(other.__storage_);
__storage_[1] = _mm_cvtps_pd(_mm_movehl_ps(other.__storage_, other.__storage_));
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128d mask storage for SSE2+
template <>
class __simd_mask_storage<double, m128d_abi> : public __simd_storage<double, m128d_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
sse_data[__index % 2] = __val ? UINT64_MAX : 0;
__storage_[__index / 2] = _mm_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m128d_abi> simd<double, m128d_abi>::operator-() const {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_xor_pd(__s_.__storage_[i], _mm_set1_pd(-0.0));
return ret;
}
inline simd<double, m128d_abi>
operator+(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator-(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator*(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator/(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>&
operator+=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator-=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator*=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator/=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>::mask_type
operator==(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpeq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator!=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpneq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpge_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmple_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpgt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmplt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m128d_abi>& other) {
__storage_ = _mm_movelh_ps(_mm_cvtpd_ps(other.__storage_[0]), _mm_cvtpd_ps(other.__storage_[1]));
}
namespace simd_abi {
template<typename T> struct zeus_native {};
template<> struct zeus_native<float> { using type = m128_abi; };
#ifndef __AVX__
template<> struct zeus_native<double> { using type = m128d_abi; };
#endif
} // namespace simd_abi
} // namespace zeus::_simd

View File

@ -3,7 +3,6 @@
namespace zeus
{
const CAABox CAABox::skInvertedBox = CAABox();
const CAABox CAABox::skNullBox = CAABox(CVector3f::skZero, CVector3f::skZero);
}

View File

@ -1,8 +1,7 @@
#include "zeus/CColor.hpp"
#include "zeus/CVector4f.hpp"
namespace zeus
{
namespace zeus {
const CColor CColor::skRed(Comp32(0xFF0000FFul));
const CColor CColor::skBlack(Comp32(0x000000FFul));
const CColor CColor::skBlue(Comp32(0x0000FFFFul));
@ -14,8 +13,7 @@ const CColor CColor::skYellow(Comp32(0xFFFF00FFul));
const CColor CColor::skWhite(Comp32(0xFFFFFFFFul));
const CColor CColor::skClear(Comp32(0x00000000ul));
float hueToRgb(float p, float q, float t)
{
float hueToRgb(float p, float q, float t) {
if (t < 0.0f)
t += 1.0f;
if (t > 1.0f)
@ -29,118 +27,92 @@ float hueToRgb(float p, float q, float t)
return p;
}
CColor::CColor(const CVector4f& other)
{
r = other.x;
g = other.y;
b = other.z;
a = other.w;
}
void CColor::fromHSV(float h, float s, float v, float _a) {
int i = int(h * 6.f);
float f = h * 6.f - i;
float p = v * (1.f - s);
float q = v * (1.f - f * s);
float t = v * (1.f - (1.f - f) * s);
simd_floats fo;
CColor& CColor::operator=(const CVector4f& other)
{
r = other.x;
g = other.y;
b = other.z;
a = other.w;
return *this;
}
void CColor::fromHSV(float h, float s, float v, float _a)
{
int i = int(h * 6);
float f = h * 6 - i;
float p = v * (1 - s);
float q = v * (1 - f * s);
float t = v * (1 - (1 - f) * s);
float _r, _g, _b;
switch (i % 6)
{
switch (i % 6) {
case 0:
_r = v, _g = t, _b = p;
fo[0] = v, fo[1] = t, fo[2] = p;
break;
case 1:
_r = q, _g = v, _b = p;
fo[0] = q, fo[1] = v, fo[2] = p;
break;
case 2:
_r = p, _g = v, _b = t;
fo[0] = p, fo[1] = v, fo[2] = t;
break;
case 3:
_r = p, _g = q, _b = v;
fo[0] = p, fo[1] = q, fo[2] = v;
break;
case 4:
_r = t, _g = p, _b = v;
fo[0] = t, fo[1] = p, fo[2] = v;
break;
case 5:
_r = v, _g = p, _b = q;
fo[0] = v, fo[1] = p, fo[2] = q;
break;
default:
break;
}
r = _r;
g = _g;
b = _b;
a = _a;
fo[3] = _a;
mSimd.copy_from(fo);
}
void CColor::toHSV(float& h, float& s, float& v) const
{
float min = std::min(r, std::min(g, b));
float max = std::max(r, std::max(g, b));
void CColor::toHSV(float& h, float& s, float& v) const {
float min = std::min(r(), std::min(g(), b()));
float max = std::max(r(), std::max(g(), b()));
v = max;
float delta = max - min;
s = max == 0 ? 0 : delta / max;
s = max == 0.f ? 0.f : delta / max;
if (max == min)
h = 0;
else
{
if (max == r)
h = (g - b) / delta + (g < b ? 6 : 0);
else if (max == g)
h = (b - r) / delta + 2;
else if (max == b)
h = (r - g) / delta + 4;
h /= 6;
h = 0.f;
else {
if (max == r())
h = (g() - b()) / delta + (g() < b() ? 6.f : 0.f);
else if (max == g())
h = (b() - r()) / delta + 2.f;
else if (max == b())
h = (r() - g()) / delta + 4.f;
h /= 6.f;
}
}
void CColor::fromHSL(float h, float s, float l, float _a)
{
if (s == 0.0f)
r = g = b = l;
else
{
void CColor::fromHSL(float h, float s, float l, float _a) {
if (s == 0.0f) {
mSimd = simd<float>(l);
} else {
const float q = l < 0.5f ? l * (1.f + s) : l + s - 1.f * s;
const float p = 2 * l - q;
r = hueToRgb(p, q, h + 1.f / 3);
g = hueToRgb(p, q, h);
b = hueToRgb(p, q, h - 1.f / 3);
const float p = 2.f * l - q;
r() = hueToRgb(p, q, h + 1.f / 3.f);
g() = hueToRgb(p, q, h);
b() = hueToRgb(p, q, h - 1.f / 3.f);
}
a = _a;
a() = _a;
}
void CColor::toHSL(float& h, float& s, float& l)
{
const float min = std::min(r, std::min(g, b));
const float max = std::max(r, std::max(g, b));
void CColor::toHSL(float& h, float& s, float& l) const {
const float min = std::min(r(), std::min(g(), b()));
const float max = std::max(r(), std::max(g(), b()));
const float d = max - min;
if (max == min)
h = s = 0;
else
{
h = s = 0.f;
else {
s = l > 0.5f ? d / (2.f - max - min) : d / (max + min);
if (max == r)
h = (g - b) / d + (g < b ? 6.f : 0.f);
else if (max == g)
h = (b - r) / d + 2.f;
else if (max == b)
h = (r - g) / d + 4.f;
if (max == r())
h = (g() - b()) / d + (g() < b() ? 6.f : 0.f);
else if (max == g())
h = (b() - r()) / d + 2.f;
else if (max == b())
h = (r() - g()) / d + 4.f;
h /= 6;
h /= 6.f;
}
}
}

View File

@ -10,29 +10,29 @@ CEulerAngles::CEulerAngles(const CQuaternion& quat)
float t0 = 0.f;
if (quatDot > 0.f)
t0 = 2.f / quatDot;
double t1 = 1.0 - (t0 * quat.x * quat.x + t0 * quat.z * quat.z);
double t2 = t0 * quat.y * quat.x - t0 * quat.z * quat.w;
double t1 = 1.0 - (t0 * quat.x() * quat.x() + t0 * quat.z() * quat.z());
double t2 = t0 * quat.y() * quat.x() - t0 * quat.z() * quat.w();
double t3 = t1 * t1 + t2 * t2;
double t4 = 0.0;
if (t3 > 0.0)
t4 = std::sqrt(t3);
double t5 = t0 * quat.z * quat.y + t0 * quat.x * quat.w;
double t5 = t0 * quat.z() * quat.y() + t0 * quat.x() * quat.w();
if (std::abs(t4) > 0.00001)
{
x = -std::atan2(-t5, t4);
y = -std::atan2(t0 * quat.z * quat.x - t0 * quat.y * quat.w,
1.0 - (t0 * quat.x * quat.x + t0 * quat.y * quat.y));
z = -std::atan2(t2, t1);
x() = -std::atan2(-t5, t4);
y() = -std::atan2(t0 * quat.z() * quat.x() - t0 * quat.y() * quat.w(),
1.0 - (t0 * quat.x() * quat.x() + t0 * quat.y() * quat.y()));
z() = -std::atan2(t2, t1);
}
else
{
x = -std::atan2(-t5, t4);
y = -std::atan2(-(t0 * quat.z * quat.x + t0 * quat.y * quat.w),
1.0 - (t0 * quat.y * quat.y + t0 * quat.z * quat.z));
z = 0.f;
x() = -std::atan2(-t5, t4);
y() = -std::atan2(-(t0 * quat.z() * quat.x() + t0 * quat.y() * quat.w()),
1.0 - (t0 * quat.y() * quat.y() + t0 * quat.z() * quat.z()));
z() = 0.f;
}
}
@ -58,15 +58,15 @@ CEulerAngles::CEulerAngles(const CTransform& xf)
if (std::fabs(f1) >= 0.00001)
{
x = -std::atan2(-xf.basis[1][2], f1);
y = -std::atan2(xf.basis[0][2], xf.basis[2][2]);
z = -std::atan2(xf.basis[1][0], xf.basis[1][1]);
x() = -std::atan2(-xf.basis[1][2], f1);
y() = -std::atan2(xf.basis[0][2], xf.basis[2][2]);
z() = -std::atan2(xf.basis[1][0], xf.basis[1][1]);
}
else
{
x = -std::atan2(-xf.basis[1][2], f1);
y = -std::atan2(-xf.basis[2][0], xf.basis[0][0]);
z = 0.f;
x() = -std::atan2(-xf.basis[1][2], f1);
y() = -std::atan2(-xf.basis[2][0], xf.basis[0][0]);
z() = 0.f;
}
}

View File

@ -1,71 +1,28 @@
#include "zeus/CFrustum.hpp"
namespace zeus
{
namespace zeus {
void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection)
{
void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projection) {
CMatrix4f mvp = projection * viewMtx;
CMatrix4f mvp_rm = mvp.transposed();
#if __SSE__
/* Left */
planes[0].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[0].mVec128);
planes[0].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[0].mSimd;
/* Right */
planes[1].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[0].mVec128);
planes[1].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[0].mSimd;
/* Bottom */
planes[2].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[1].mVec128);
planes[2].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[1].mSimd;
/* Top */
planes[3].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[1].mVec128);
planes[3].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[1].mSimd;
/* Near */
planes[4].mVec128 = _mm_add_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[2].mVec128);
planes[4].mSimd = mvp_rm.m[3].mSimd + mvp_rm.m[2].mSimd;
/* Far */
planes[5].mVec128 = _mm_sub_ps(mvp_rm.vec[3].mVec128, mvp_rm.vec[2].mVec128);
#else
/* Left */
planes[0].a = mvp.m[0][0] + mvp.m[3][0];
planes[0].b = mvp.m[0][1] + mvp.m[3][1];
planes[0].c = mvp.m[0][2] + mvp.m[3][2];
planes[0].d = mvp.m[0][3] + mvp.m[3][3];
/* Right */
planes[1].a = -mvp.m[0][0] + mvp.m[3][0];
planes[1].b = -mvp.m[0][1] + mvp.m[3][1];
planes[1].c = -mvp.m[0][2] + mvp.m[3][2];
planes[1].d = -mvp.m[0][3] + mvp.m[3][3];
/* Bottom */
planes[2].a = mvp.m[1][0] + mvp.m[3][0];
planes[2].b = mvp.m[1][1] + mvp.m[3][1];
planes[2].c = mvp.m[1][2] + mvp.m[3][2];
planes[2].d = mvp.m[1][3] + mvp.m[3][3];
/* Top */
planes[3].a = -mvp.m[1][0] + mvp.m[3][0];
planes[3].b = -mvp.m[1][1] + mvp.m[3][1];
planes[3].c = -mvp.m[1][2] + mvp.m[3][2];
planes[3].d = -mvp.m[1][3] + mvp.m[3][3];
/* Near */
planes[4].a = mvp.m[2][0] + mvp.m[3][0];
planes[4].b = mvp.m[2][1] + mvp.m[3][1];
planes[4].c = mvp.m[2][2] + mvp.m[3][2];
planes[4].d = mvp.m[2][3] + mvp.m[3][3];
/* Far */
planes[5].a = -mvp.m[2][0] + mvp.m[3][0];
planes[5].b = -mvp.m[2][1] + mvp.m[3][1];
planes[5].c = -mvp.m[2][2] + mvp.m[3][2];
planes[5].d = -mvp.m[2][3] + mvp.m[3][3];
#endif
planes[5].mSimd = mvp_rm.m[3].mSimd - mvp_rm.m[2].mSimd;
planes[0].normalize();
planes[1].normalize();
@ -77,8 +34,7 @@ void CFrustum::updatePlanes(const CMatrix4f& viewMtx, const CMatrix4f& projectio
valid = true;
}
void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& projection)
{
void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& projection) {
zeus::CMatrix3f tmp(viewPointMtx.basis[0], viewPointMtx.basis[2], -viewPointMtx.basis[1]);
zeus::CTransform viewBasis = zeus::CTransform(tmp.transposed());
zeus::CTransform viewMtx = viewBasis * zeus::CTransform::Translate(-viewPointMtx.origin);
@ -86,50 +42,44 @@ void CFrustum::updatePlanes(const CTransform& viewPointMtx, const CProjection& p
updatePlanes(viewMtx.toMatrix4f(), projection.getCachedMatrix());
}
bool CFrustum::aabbFrustumTest(const CAABox& aabb) const
{
bool CFrustum::aabbFrustumTest(const CAABox& aabb) const {
if (!valid)
return true;
CVector3f center = aabb.center();
CVector3f extents = aabb.extents();
for (uint32_t i = 0; i < 6; ++i)
{
for (uint32_t i = 0; i < 6; ++i) {
const CPlane& plane = planes[i];
float m = plane.vec.dot(center) + plane.d;
float n = extents.dot({std::fabs(plane.a), std::fabs(plane.b), std::fabs(plane.c)});
float m = plane.normal().dot(center) + plane.d();
float n = extents.dot({std::fabs(plane.x()), std::fabs(plane.y()), std::fabs(plane.z())});
if (m + n < 0)
if (m + n < 0.f)
return false;
}
return true;
}
bool CFrustum::sphereFrustumTest(const CSphere& sphere) const
{
bool CFrustum::sphereFrustumTest(const CSphere& sphere) const {
if (!valid)
return true;
for (uint32_t i = 0 ; i<6 ; ++i)
{
float dadot = planes[i].vec.dot(sphere.position);
if ((dadot + planes[i].d + sphere.radius) < 0)
for (uint32_t i = 0; i < 6; ++i) {
float dadot = planes[i].normal().dot(sphere.position);
if ((dadot + planes[i].d() + sphere.radius) < 0.f)
return false;
}
return true;
}
bool CFrustum::pointFrustumTest(const CVector3f& point) const
{
bool CFrustum::pointFrustumTest(const CVector3f& point) const {
if (!valid)
return true;
for (uint32_t i = 0 ; i<6 ; ++i)
{
float dadot = planes[i].vec.dot(point);
if ((dadot + planes[i].d) < 0)
for (uint32_t i = 0; i < 6; ++i) {
float dadot = planes[i].normal().dot(point);
if ((dadot + planes[i].d()) < 0.f)
return false;
}
return true;

View File

@ -2,45 +2,48 @@
#include "zeus/CQuaternion.hpp"
#include "zeus/Global.hpp"
namespace zeus
{
namespace zeus {
const CMatrix3f CMatrix3f::skIdentityMatrix3f = CMatrix3f();
CMatrix3f::CMatrix3f(const CQuaternion& quat)
{
CMatrix3f::CMatrix3f(const CQuaternion& quat) {
CQuaternion nq = quat.normalized();
float x2 = nq.x * nq.x;
float y2 = nq.y * nq.y;
float z2 = nq.z * nq.z;
float x2 = nq.x() * nq.x();
float y2 = nq.y() * nq.y();
float z2 = nq.z() * nq.z();
m[0][0] = 1.0 - 2.0 * y2 - 2.0 * z2;
m[1][0] = 2.0 * nq.x * nq.y - 2.0 * nq.z * nq.w;
m[2][0] = 2.0 * nq.x * nq.z + 2.0 * nq.y * nq.w;
m[1][0] = 2.0 * nq.x() * nq.y() - 2.0 * nq.z() * nq.w();
m[2][0] = 2.0 * nq.x() * nq.z() + 2.0 * nq.y() * nq.w();
m[0][1] = 2.0 * nq.x * nq.y + 2.0 * nq.z * nq.w;
m[0][1] = 2.0 * nq.x() * nq.y() + 2.0 * nq.z() * nq.w();
m[1][1] = 1.0 - 2.0 * x2 - 2.0 * z2;
m[2][1] = 2.0 * nq.y * nq.z - 2.0 * nq.x * nq.w;
m[2][1] = 2.0 * nq.y() * nq.z() - 2.0 * nq.x() * nq.w();
m[0][2] = 2.0 * nq.x * nq.z - 2.0 * nq.y * nq.w;
m[1][2] = 2.0 * nq.y * nq.z + 2.0 * nq.x * nq.w;
m[0][2] = 2.0 * nq.x() * nq.z() - 2.0 * nq.y() * nq.w();
m[1][2] = 2.0 * nq.y() * nq.z() + 2.0 * nq.x() * nq.w();
m[2][2] = 1.0 - 2.0 * x2 - 2.0 * y2;
m[0][3] = 0.0f;
m[1][3] = 0.0f;
m[2][3] = 0.0f;
}
void CMatrix3f::transpose()
{
void CMatrix3f::transpose() {
#if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero);
vec[0].mVec128 = _mm_movelh_ps(T0, T2);
vec[1].mVec128 = _mm_movehl_ps(T2, T0);
vec[2].mVec128 = _mm_movelh_ps(T1, T3);
__m128 zero = _mm_xor_ps(m[0].mSimd.native(), m[0].mSimd.native());
__m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), zero);
__m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
m[0].mSimd = _mm_movelh_ps(T0, T2);
m[1].mSimd = _mm_movehl_ps(T2, T0);
m[2].mSimd = _mm_movelh_ps(T1, T3);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
m[0].mSimd = T0.val[0];
m[1].mSimd = T0.val[1];
m[2].mSimd = T1.val[0];
#else
float tmp;
@ -58,15 +61,22 @@ void CMatrix3f::transpose()
#endif
}
CMatrix3f CMatrix3f::transposed() const
{
CMatrix3f CMatrix3f::transposed() const {
#if __SSE__
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero);
__m128 zero = _mm_xor_ps(m[0].mSimd.native(), m[0].mSimd.native());
__m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), zero);
__m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), zero);
return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3));
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
return CMatrix3f(T0.val[0], T0.val[1], T1.val[0]);
#else
CMatrix3f ret(*this);
float tmp;
@ -87,8 +97,7 @@ CMatrix3f CMatrix3f::transposed() const
#endif
}
CMatrix3f CMatrix3f::inverted() const
{
CMatrix3f CMatrix3f::inverted() const {
float det = m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] -
m[0][2] * m[1][1] * m[2][0] - m[1][2] * m[2][1] * m[0][0] - m[2][2] * m[0][1] * m[1][0];

View File

@ -9,14 +9,25 @@ CMatrix4f CMatrix4f::transposed() const
{
CMatrix4f ret;
#if __SSE__
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128);
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128);
ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2);
ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0);
ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3);
ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1);
__m128 T0 = _mm_unpacklo_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T2 = _mm_unpacklo_ps(m[2].mSimd.native(), m[3].mSimd.native());
__m128 T1 = _mm_unpackhi_ps(m[0].mSimd.native(), m[1].mSimd.native());
__m128 T3 = _mm_unpackhi_ps(m[2].mSimd.native(), m[3].mSimd.native());
ret.m[0].mSimd = _mm_movelh_ps(T0, T2);
ret.m[1].mSimd = _mm_movehl_ps(T2, T0);
ret.m[2].mSimd = _mm_movelh_ps(T1, T3);
ret.m[3].mSimd = _mm_movehl_ps(T3, T1);
#elif __ARM_NEON
float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
ret.m[0].mSimd = T0.val[0];
ret.m[1].mSimd = T0.val[1];
ret.m[2].mSimd = T1.val[0];
ret.m[3].mSimd = T1.val[1];
#else
ret.m[0][0] = m[0][0];
ret.m[1][0] = m[0][1];

View File

@ -1,15 +1,19 @@
#include "zeus/COBBox.hpp"
namespace zeus
{
namespace zeus {
CAABox COBBox::calculateAABox(const CTransform& worldXf) const
{
CAABox COBBox::calculateAABox(const CTransform& worldXf) const {
CAABox ret = CAABox::skInvertedBox;
CTransform trans = worldXf * transform;
static const CVector3f basis[8] = {{1.f, 1.f, 1.f}, {1.f, 1.f, -1.f}, {1.f, -1.f, 1.f}, {1.f, -1.f, -1.f},
{-1.f, -1.f, -1.f}, {-1.f, -1.f, 1.f}, {-1.f, 1.f, -1.f}, {-1.f, 1.f, 1.f}};
static const CVector3f basis[8] = {{1.f, 1.f, 1.f},
{1.f, 1.f, -1.f},
{1.f, -1.f, 1.f},
{1.f, -1.f, -1.f},
{-1.f, -1.f, -1.f},
{-1.f, -1.f, 1.f},
{-1.f, 1.f, -1.f},
{-1.f, 1.f, 1.f}};
CVector3f p = extents * basis[0];
ret.accumulateBounds(trans * p);
p = extents * basis[1];
@ -30,8 +34,7 @@ CAABox COBBox::calculateAABox(const CTransform& worldXf) const
return ret;
}
bool COBBox::OBBIntersectsBox(const COBBox& other) const
{
bool COBBox::OBBIntersectsBox(const COBBox& other) const {
CVector3f v = other.transform.origin - transform.origin;
CVector3f T = CVector3f(v.dot(transform.basis[0]),
v.dot(transform.basis[1]),
@ -45,8 +48,7 @@ bool COBBox::OBBIntersectsBox(const COBBox& other) const
for (int k = 0; k < 3; ++k)
R[i][k] = transform.basis[i].dot(other.transform.basis[k]);
for (int i = 0; i < 3; ++i)
{
for (int i = 0; i < 3; ++i) {
ra = extents[i];
rb = (other.extents[0] * std::fabs(R[i][0])) +
(other.extents[1] * std::fabs(R[i][1])) +
@ -57,8 +59,7 @@ bool COBBox::OBBIntersectsBox(const COBBox& other) const
return false;
}
for (int k = 0; k < 3; ++k)
{
for (int k = 0; k < 3; ++k) {
ra = (extents[0] * std::fabs(R[0][k])) +
(extents[1] * std::fabs(R[1][k])) +
(extents[2] * std::fabs(R[2][k]));

View File

@ -1,14 +1,12 @@
#include "zeus/CPlane.hpp"
namespace zeus
{
namespace zeus {
bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const
{
bool CPlane::rayPlaneIntersection(const CVector3f& from, const CVector3f& to, CVector3f& point) const {
zeus::CVector3f delta = to - from;
if (std::fabs(delta.normalized().dot(vec)) < 0.01f)
if (std::fabs(delta.normalized().dot(normal())) < 0.01f)
return false;
float tmp = -pointToPlaneDist(from) / delta.dot(vec);
float tmp = -pointToPlaneDist(from) / delta.dot(normal());
if (tmp < -0.f || tmp > 1.0001f)
return false;
point = delta * tmp + from;

View File

@ -2,13 +2,10 @@
#include "zeus/Math.hpp"
#include <cassert>
namespace zeus
{
void CProjection::_updateCachedMatrix()
{
namespace zeus {
void CProjection::_updateCachedMatrix() {
assert(m_projType == EProjType::Orthographic || m_projType == EProjType::Perspective);
if (m_projType == EProjType::Orthographic)
{
if (m_projType == EProjType::Orthographic) {
float tmp;
tmp = 1.0f / (m_ortho.right - m_ortho.left);
@ -33,9 +30,7 @@ void CProjection::_updateCachedMatrix()
m_mtx.m[1][3] = 0.0f;
m_mtx.m[2][3] = 0.0f;
m_mtx.m[3][3] = 1.0f;
}
else if (m_projType == EProjType::Perspective)
{
} else if (m_projType == EProjType::Perspective) {
float tfov = std::tan(m_persp.fov * 0.5f);
float top = m_persp.znear * tfov;
float bottom = -top;

View File

@ -1,262 +1,212 @@
#include "zeus/CQuaternion.hpp"
#include "zeus/Math.hpp"
namespace zeus
{
namespace zeus {
const CQuaternion CQuaternion::skNoRotation;
CQuaternion::CQuaternion(const CMatrix3f& mat)
{
CQuaternion::CQuaternion(const CMatrix3f& mat) {
float trace = mat[0][0] + mat[1][1] + mat[2][2];
if (trace >= 0.f)
{
if (trace >= 0.f) {
float st = std::sqrt(trace + 1.0f);
float s = 0.5f / st;
w = 0.5f * st;
x = (mat[1][2] - mat[2][1]) * s;
y = (mat[2][0] - mat[0][2]) * s;
z = (mat[0][1] - mat[1][0]) * s;
}
else
{
w() = 0.5f * st;
x() = (mat[1][2] - mat[2][1]) * s;
y() = (mat[2][0] - mat[0][2]) * s;
z() = (mat[0][1] - mat[1][0]) * s;
} else {
int idx = 0;
if (mat[1][1] > mat[0][0])
{
if (mat[1][1] > mat[0][0]) {
idx = 1;
if (mat[2][2] > mat[1][1])
idx = 2;
}
else if (mat[2][2] > mat[0][0])
{
} else if (mat[2][2] > mat[0][0]) {
idx = 2;
}
switch (idx)
{
case 0:
{
switch (idx) {
case 0: {
float st = std::sqrt(mat[0][0] - (mat[1][1] + mat[2][2]) + 1.f);
float s = 0.5f / st;
w = (mat[1][2] - mat[2][1]) * s;
x = 0.5f * st;
y = (mat[1][0] + mat[0][1]) * s;
z = (mat[2][0] + mat[0][2]) * s;
w() = (mat[1][2] - mat[2][1]) * s;
x() = 0.5f * st;
y() = (mat[1][0] + mat[0][1]) * s;
z() = (mat[2][0] + mat[0][2]) * s;
break;
}
case 1:
{
case 1: {
float st = std::sqrt(mat[1][1] - (mat[2][2] + mat[0][0]) + 1.f);
float s = 0.5f / st;
w = (mat[2][0] - mat[0][2]) * s;
x = (mat[1][0] + mat[0][1]) * s;
y = 0.5f * st;
z = (mat[2][1] + mat[1][2]) * s;
w() = (mat[2][0] - mat[0][2]) * s;
x() = (mat[1][0] + mat[0][1]) * s;
y() = 0.5f * st;
z() = (mat[2][1] + mat[1][2]) * s;
break;
}
case 2:
{
case 2: {
float st = std::sqrt(mat[2][2] - (mat[0][0] + mat[1][1]) + 1.f);
float s = 0.5f / st;
w = (mat[0][1] - mat[1][0]) * s;
x = (mat[2][0] + mat[0][2]) * s;
y = (mat[2][1] + mat[1][2]) * s;
z = 0.5f * st;
w() = (mat[0][1] - mat[1][0]) * s;
x() = (mat[2][0] + mat[0][2]) * s;
y() = (mat[2][1] + mat[1][2]) * s;
z() = 0.5f * st;
break;
}
default:
w = 0.f;
x = 0.f;
y = 0.f;
z = 0.f;
w() = 0.f;
x() = 0.f;
y() = 0.f;
z() = 0.f;
break;
}
}
}
void CQuaternion::fromVector3f(const CVector3f& vec)
{
float cosX = std::cos(0.5f * vec.x);
float cosY = std::cos(0.5f * vec.y);
float cosZ = std::cos(0.5f * vec.z);
void CQuaternion::fromVector3f(const CVector3f& vec) {
float cosX = std::cos(0.5f * vec.x());
float cosY = std::cos(0.5f * vec.y());
float cosZ = std::cos(0.5f * vec.z());
float sinX = std::sin(0.5f * vec.x);
float sinY = std::sin(0.5f * vec.y);
float sinZ = std::sin(0.5f * vec.z);
float sinX = std::sin(0.5f * vec.x());
float sinY = std::sin(0.5f * vec.y());
float sinZ = std::sin(0.5f * vec.z());
w = cosZ * cosY * cosX + sinZ * sinY * sinX;
x = cosZ * cosY * sinX - sinZ * sinY * cosX;
y = cosZ * sinY * cosX + sinZ * cosY * sinX;
z = sinZ * cosY * cosX - cosZ * sinY * sinX;
simd_floats f;
f[0] = cosZ * cosY * cosX + sinZ * sinY * sinX;
f[1] = cosZ * cosY * sinX - sinZ * sinY * cosX;
f[2] = cosZ * sinY * cosX + sinZ * cosY * sinX;
f[3] = sinZ * cosY * cosX - cosZ * sinY * sinX;
mSimd.copy_from(f);
}
CQuaternion& CQuaternion::operator=(const CQuaternion& q)
{
#if __SSE__
mVec128 = q.mVec128;
#else
w = q.w;
x = q.x;
y = q.y;
z = q.z;
#endif
CQuaternion& CQuaternion::operator=(const CQuaternion& q) {
mSimd = q.mSimd;
return *this;
}
CQuaternion CQuaternion::operator+(const CQuaternion& q) const { return CQuaternion(w + q.w, x + q.x, y + q.y, z + q.z); }
CQuaternion CQuaternion::operator-(const CQuaternion& q) const { return CQuaternion(w - q.w, x - q.x, y - q.y, z - q.z); }
CQuaternion CQuaternion::operator*(const CQuaternion& q) const
{
return CQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
CQuaternion CQuaternion::operator+(const CQuaternion& q) const {
return mSimd + q.mSimd;
}
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const
{
return CNUQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
y * q.z - z * q.y + w * q.x + x * q.w,
z * q.x - x * q.z + w * q.y + y * q.w,
x * q.y - y * q.x + w * q.z + z * q.w);
CQuaternion CQuaternion::operator-(const CQuaternion& q) const {
return mSimd - q.mSimd;
}
CQuaternion CQuaternion::operator/(const CQuaternion& q) const
{
CQuaternion CQuaternion::operator*(const CQuaternion& q) const {
return CQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
y() * q.z() - z() * q.y() + w() * q.x() + x() * q.w(),
z() * q.x() - x() * q.z() + w() * q.y() + y() * q.w(),
x() * q.y() - y() * q.x() + w() * q.z() + z() * q.w());
}
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const {
return CNUQuaternion(w() * q.w() - CVector3f(x(), y(), z()).dot({q.x(), q.y(), q.z()}),
y() * q.z() - z() * q.y() + w() * q.x() + x() * q.w(),
z() * q.x() - x() * q.z() + w() * q.y() + y() * q.w(),
x() * q.y() - y() * q.x() + w() * q.z() + z() * q.w());
}
CQuaternion CQuaternion::operator/(const CQuaternion& q) const {
CQuaternion p(q);
p.invert();
return *this * p;
}
CQuaternion CQuaternion::operator*(float scale) const { return CQuaternion(w * scale, x * scale, y * scale, z * scale); }
CQuaternion CQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
}
CNUQuaternion CNUQuaternion::operator*(float scale) const { return CNUQuaternion(w * scale, x * scale, y * scale, z * scale); }
CNUQuaternion CNUQuaternion::operator*(float scale) const {
return mSimd * simd<float>(scale);
}
CQuaternion CQuaternion::operator/(float scale) const { return CQuaternion(w / scale, x / scale, y / scale, z / scale); }
CQuaternion CQuaternion::operator/(float scale) const {
return mSimd / simd<float>(scale);
}
CQuaternion CQuaternion::operator-() const { return CQuaternion(-w, -x, -y, -z); }
CQuaternion CQuaternion::operator-() const { return -mSimd; }
const CQuaternion& CQuaternion::operator+=(const CQuaternion& q)
{
w += q.w;
x += q.x;
y += q.y;
z += q.z;
const CQuaternion& CQuaternion::operator+=(const CQuaternion& q) {
mSimd += q.mSimd;
return *this;
}
const CNUQuaternion& CNUQuaternion::operator+=(const CNUQuaternion& q)
{
w += q.w;
x += q.x;
y += q.y;
z += q.z;
const CNUQuaternion& CNUQuaternion::operator+=(const CNUQuaternion& q) {
mSimd += q.mSimd;
return *this;
}
const CQuaternion& CQuaternion::operator-=(const CQuaternion& q)
{
w -= q.w;
x -= q.x;
y -= q.y;
z -= q.z;
const CQuaternion& CQuaternion::operator-=(const CQuaternion& q) {
mSimd -= q.mSimd;
return *this;
}
const CQuaternion& CQuaternion::operator*=(const CQuaternion& q)
{
const CQuaternion& CQuaternion::operator*=(const CQuaternion& q) {
CQuaternion orig = *this;
w = orig.w * q.w - CVector3f(orig.x, orig.y, orig.z).dot({q.x, q.y, q.z});
x = orig.y * q.z - orig.z * q.y + orig.w * q.x + orig.x * q.w;
y = orig.z * q.x - orig.x * q.z + orig.w * q.y + orig.y * q.w;
z = orig.x * q.y - orig.y * q.x + orig.w * q.z + orig.z * q.w;
w() = orig.w() * q.w() - CVector3f(orig.x(), orig.y(), orig.z()).dot({q.x(), q.y(), q.z()});
x() = orig.y() * q.z() - orig.z() * q.y() + orig.w() * q.x() + orig.x() * q.w();
y() = orig.z() * q.x() - orig.x() * q.z() + orig.w() * q.y() + orig.y() * q.w();
z() = orig.x() * q.y() - orig.y() * q.x() + orig.w() * q.z() + orig.z() * q.w();
return *this;
}
const CQuaternion& CQuaternion::operator*=(float scale)
{
w *= scale;
x *= scale;
y *= scale;
z *= scale;
const CQuaternion& CQuaternion::operator*=(float scale) {
mSimd *= simd<float>(scale);
return *this;
}
const CQuaternion& CQuaternion::operator/=(float scale)
{
w /= scale;
x /= scale;
y /= scale;
z /= scale;
const CQuaternion& CQuaternion::operator/=(float scale) {
mSimd /= simd<float>(scale);
return *this;
}
void CQuaternion::invert()
{
x = -x;
y = -y;
z = -z;
static const simd<float> InvertQuat(1.f, -1.f, -1.f, -1.f);
void CQuaternion::invert() {
mSimd *= InvertQuat;
}
CQuaternion CQuaternion::inverse() const { return CQuaternion(w, -x, -y, -z); }
CQuaternion CQuaternion::inverse() const { return mSimd * InvertQuat; }
CQuaternion CQuaternion::log() const
{
float a = std::acos(w);
CQuaternion CQuaternion::log() const {
float a = std::acos(w());
float sina = std::sin(a);
CQuaternion ret;
ret.w = 0.f;
if (sina > 0.f)
{
ret.x = a * x / sina;
ret.y = a * y / sina;
ret.z = a * z / sina;
}
ret = a * *this / sina;
else
{
ret.x = 0.f;
ret.y = 0.f;
ret.z = 0.f;
}
ret = simd<float>(0.f);
ret.w() = 0.f;
return ret;
}
CQuaternion CQuaternion::exp() const
{
float a = (CVector3f(x, y, z).magnitude());
CQuaternion CQuaternion::exp() const {
float a = (CVector3f(mSimd.shuffle<1, 2, 3, 3>()).magnitude());
float sina = std::sin(a);
float cosa = std::cos(a);
CQuaternion ret;
ret.w = cosa;
if (a > 0.f)
{
ret.x = sina * x / a;
ret.y = sina * y / a;
ret.z = sina * z / a;
}
ret = sina * *this / a;
else
{
ret.x = 0.f;
ret.y = 0.f;
ret.z = 0.f;
}
ret = simd<float>(0.f);
ret.w() = cosa;
return ret;
}
CQuaternion CQuaternion::lerp(const CQuaternion& a, const CQuaternion& b, double t) { return (a + t * (b - a)); }
CQuaternion CQuaternion::nlerp(const CQuaternion& a, const CQuaternion& b, double t) { return lerp(a, b, t).normalized(); }
CQuaternion CQuaternion::nlerp(const CQuaternion& a, const CQuaternion& b, double t) {
return lerp(a, b, t).normalized();
}
CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, double t)
{
CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, double t) {
if (t <= 0.0f)
return a;
if (t >= 1.0f)
@ -268,8 +218,7 @@ CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, doubl
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
if (std::fabs(prod) < 1.0f) {
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);
@ -277,18 +226,14 @@ CQuaternion CQuaternion::slerp(const CQuaternion& a, const CQuaternion& b, doubl
const double d = 1.0 / std::sin(theta);
const double s0 = std::sin((1.0 - t) * theta);
ret.x = float((a.x * s0 + b.x * s1) * d);
ret.y = float((a.y * s0 + b.y * s1) * d);
ret.z = float((a.z * s0 + b.z * s1) * d);
ret.w = float((a.w * s0 + b.w * s1) * d);
ret = (a * s0 + b * s1) * d;
return ret;
}
return a;
}
CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1)
{
CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const zeus::CVector3f& v1) {
CVector3f v0N = v0;
CVector3f v1N = v1;
@ -299,92 +244,78 @@ CQuaternion CQuaternion::shortestRotationArc(const zeus::CVector3f& v0, const ze
CVector3f cross = v0N.cross(v1N);
if (cross.magSquared() < 0.001f)
{
if (cross.magSquared() < 0.001f) {
if (v0N.dot(v1N) > 0.f)
return CQuaternion::skNoRotation;
if (cross.canBeNormalized())
return CQuaternion(0.0f, cross.normalized());
return CQuaternion::skNoRotation;
}
else
{
} else {
float w = std::sqrt((1.f + zeus::clamp(-1.f, v0N.dot(v1N), 1.f)) * 2.f);
return CQuaternion(0.5f * w, cross * (1.f / w));
}
}
CQuaternion CQuaternion::clampedRotateTo(const zeus::CUnitVector3f& v0, const zeus::CUnitVector3f& v1,
const zeus::CRelAngle& angle)
{
const zeus::CRelAngle& angle) {
CQuaternion arc = shortestRotationArc(v0, v1);
if (angle >= 2.f * std::acos(arc.w))
if (angle >= 2.f * std::acos(arc.w()))
return arc;
return fromAxisAngle(arc.getImaginary(), angle);
}
CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b, double t)
{
CQuaternion CQuaternion::slerpShort(const CQuaternion& a, const CQuaternion& b, double t) {
return zeus::CQuaternion::slerp((b.dot(a) >= 0.f) ? a : a.buildEquivalent(), b, t);
}
CQuaternion operator+(float lhs, const CQuaternion& rhs)
{
return CQuaternion(lhs + rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
CQuaternion operator+(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) + rhs.mSimd;
}
CQuaternion operator-(float lhs, const CQuaternion& rhs)
{
return CQuaternion(lhs - rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
CQuaternion operator-(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) - rhs.mSimd;
}
CQuaternion operator*(float lhs, const CQuaternion& rhs)
{
return CQuaternion(lhs * rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
CQuaternion operator*(float lhs, const CQuaternion& rhs) {
return simd<float>(lhs) * rhs.mSimd;
}
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs)
{
return CNUQuaternion(lhs * rhs.w, lhs * rhs.x, lhs * rhs.y, lhs * rhs.z);
CNUQuaternion operator*(float lhs, const CNUQuaternion& rhs) {
return simd<float>(lhs) * rhs.mSimd;
}
CQuaternion CQuaternion::buildEquivalent() const
{
float tmp = std::acos(clamp(-1.f, w, 1.f)) * 2.0;
CQuaternion CQuaternion::buildEquivalent() const {
float tmp = std::acos(clamp(-1.f, w(), 1.f)) * 2.f;
if (std::fabs(tmp) < 1.0e-7)
return {-1.f, 0.f, 0.f, 0.f};
else
return CQuaternion::fromAxisAngle(CUnitVector3f(x, y, z), tmp + 2.0 * M_PI);
return CQuaternion::fromAxisAngle(CUnitVector3f(mSimd.shuffle<1, 2, 3, 3>()), tmp + 2.0 * M_PI);
}
CRelAngle CQuaternion::angleFrom(const zeus::CQuaternion& other)
{
CRelAngle CQuaternion::angleFrom(const zeus::CQuaternion& other) {
return std::acos(zeus::clamp(-1.f, dot(other), 1.f));
}
CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng)
{
CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f& dest, const CRelAngle& maxAng) {
CQuaternion q = skNoRotation;
zeus::CVector3f destNoZ = dest;
zeus::CVector3f sourceNoZ = source;
destNoZ.z = 0.f;
sourceNoZ.z = 0.f;
destNoZ.z() = 0.f;
sourceNoZ.z() = 0.f;
zeus::CVector3f tmp;
if (sourceNoZ.magSquared() > 0.0001f && destNoZ.magSquared() > 0.0001f)
{
if (sourceNoZ.magSquared() > 0.0001f && destNoZ.magSquared() > 0.0001f) {
sourceNoZ.normalize();
destNoZ.normalize();
float angleBetween =
normalize_angle(std::atan2(destNoZ.x, destNoZ.y) - std::atan2(sourceNoZ.x, sourceNoZ.y));
normalize_angle(std::atan2(destNoZ.x(), destNoZ.y()) - std::atan2(sourceNoZ.x(), sourceNoZ.y()));
float realAngle = zeus::clamp(-maxAng.asRadians(), angleBetween, maxAng.asRadians());
CQuaternion tmpQ;
tmpQ.rotateZ(-realAngle);
q = tmpQ;
tmp = q.transform(sourceNoZ);
}
else if (sourceNoZ.magSquared() > 0.0001f)
} else if (sourceNoZ.magSquared() > 0.0001f)
tmp = sourceNoZ.normalized();
else if (destNoZ.magSquared() > 0.0001f)
tmp = destNoZ.normalized();
@ -392,7 +323,7 @@ CQuaternion CQuaternion::lookAt(const CUnitVector3f& source, const CUnitVector3f
return skNoRotation;
float realAngle =
zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z) - std::acos(source.z)), maxAng.asRadians());
zeus::clamp(-maxAng.asRadians(), normalize_angle(std::acos(dest.z()) - std::acos(source.z())), maxAng.asRadians());
return CQuaternion::fromAxisAngle(tmp.cross(CVector3f::skUp), -realAngle) * q;
}

View File

@ -1,9 +1,7 @@
#include "zeus/CTransform.hpp"
namespace zeus
{
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec)
{
namespace zeus {
CTransform CTransformFromEditorEuler(const CVector3f& eulerVec) {
CTransform result;
double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
@ -36,8 +34,7 @@ CTransform CTransformFromEditorEuler(const CVector3f& eulerVec)
return result;
}
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle)
{
CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle) {
CTransform result;
CVector3f axisN = axis.normalized();
@ -45,23 +42,22 @@ CTransform CTransformFromAxisAngle(const CVector3f& axis, float angle)
float s = std::sin(angle);
float t = 1.f - c;
result.basis.m[0][0] = t * axisN.v[0] * axisN.v[0] + c;
result.basis.m[1][0] = t * axisN.v[0] * axisN.v[1] - axisN.v[2] * s;
result.basis.m[2][0] = t * axisN.v[0] * axisN.v[2] + axisN.v[1] * s;
result.basis.m[0][0] = t * axisN[0] * axisN[0] + c;
result.basis.m[1][0] = t * axisN[0] * axisN[1] - axisN[2] * s;
result.basis.m[2][0] = t * axisN[0] * axisN[2] + axisN[1] * s;
result.basis.m[0][1] = t * axisN.v[0] * axisN.v[1] + axisN.v[2] * s;
result.basis.m[1][1] = t * axisN.v[1] * axisN.v[1] + c;
result.basis.m[2][1] = t * axisN.v[1] * axisN.v[2] - axisN.v[0] * s;
result.basis.m[0][1] = t * axisN[0] * axisN[1] + axisN[2] * s;
result.basis.m[1][1] = t * axisN[1] * axisN[1] + c;
result.basis.m[2][1] = t * axisN[1] * axisN[2] - axisN[0] * s;
result.basis.m[0][2] = t * axisN.v[0] * axisN.v[2] - axisN.v[1] * s;
result.basis.m[1][2] = t * axisN.v[1] * axisN.v[2] + axisN.v[0] * s;
result.basis.m[2][2] = t * axisN.v[2] * axisN.v[2] + c;
result.basis.m[0][2] = t * axisN[0] * axisN[2] - axisN[1] * s;
result.basis.m[1][2] = t * axisN[1] * axisN[2] + axisN[0] * s;
result.basis.m[2][2] = t * axisN[2] * axisN[2] + c;
return result;
}
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin)
{
CTransform CTransformFromEditorEulers(const CVector3f& eulerVec, const CVector3f& origin) {
CTransform ret = CTransformFromEditorEuler(eulerVec);
ret.origin = origin;
return ret;

View File

@ -4,14 +4,12 @@
#include <cassert>
#include "zeus/Math.hpp"
namespace zeus
{
namespace zeus {
const CVector2f CVector2f::skOne = CVector2f(1.0);
const CVector2f CVector2f::skNegOne = CVector2f(-1.0);
const CVector2f CVector2f::skZero(0.f, 0.f);
float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b)
{
float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b) {
float mag1 = a.magnitude();
float mag2 = b.magnitude();
@ -23,8 +21,7 @@ float CVector2f::getAngleDiff(const CVector2f& a, const CVector2f& b)
return theta;
}
CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t)
{
CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t) {
if (t <= 0.0f)
return a;
if (t >= 1.0f)
@ -36,8 +33,7 @@ CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t)
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
if (std::fabs(prod) < 1.0f) {
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = std::acos(sign * prod);

View File

@ -5,8 +5,7 @@
#include <cassert>
#include "zeus/Math.hpp"
namespace zeus
{
namespace zeus {
const CVector3f CVector3f::skOne(1.f);
const CVector3f CVector3f::skNegOne(-1.f);
const CVector3f CVector3f::skZero;
@ -20,20 +19,9 @@ const CVector3f CVector3f::skRadToDegVec(180.0f / M_PIF);
const CVector3f CVector3f::skDegToRadVec(M_PIF / 180.0f);
const CVector3d CVector3d::skZero(0.0, 0.0, 0.0);
CVector3f::CVector3f(const CVector3d& vec)
{
#if __SSE__
mVec128 = _mm_cvtpd_ps(vec.mVec128[0]);
v[2] = vec.v[2];
#else
v[0] = vec.v[0];
v[1] = vec.v[1];
v[2] = vec.v[2];
#endif
}
CVector3f::CVector3f(const CVector3d& vec) : mSimd(vec.mSimd) {}
float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b)
{
float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b) {
float mag1 = a.magnitude();
float mag2 = b.magnitude();
@ -45,8 +33,7 @@ float CVector3f::getAngleDiff(const CVector3f& a, const CVector3f& b)
return theta;
}
CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t)
{
CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t) {
if (t <= 0.0f)
return a;
if (t >= 1.0f)
@ -58,8 +45,7 @@ CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t)
float prod = a.dot(b) / mag;
if (std::fabs(prod) < 1.0f)
{
if (std::fabs(prod) < 1.0f) {
const double sign = (prod < 0.0f) ? -1.0f : 1.0f;
const double theta = acos(sign * prod);
@ -67,9 +53,7 @@ CVector3f CVector3f::slerp(const CVector3f& a, const CVector3f& b, float t)
const double d = 1.0 / sin(theta);
const double s0 = sin((1.0 - t) * theta);
ret.x = (float)(a.x * s0 + b.x * s1) * d;
ret.y = (float)(a.y * s0 + b.y * s1) * d;
ret.z = (float)(a.z * s0 + b.z * s1) * d;
ret = (a * s0 + b * s1) * d;
return ret;
}

View File

@ -1,19 +1,13 @@
#include "zeus/CVector4f.hpp"
#include "zeus/CColor.hpp"
namespace zeus
{
namespace zeus {
const CVector4f CVector4f::skZero(0.f, 0.f, 0.f, 0.f);
CVector4f::CVector4f(const zeus::CColor& other) : x(other.r), y(other.g), z(other.b), w(other.a) {}
CVector4f& CVector4f::operator=(const CColor& other)
{
x = other.r;
y = other.g;
z = other.b;
w = other.a;
CVector4f::CVector4f(const zeus::CColor& other) : mSimd(other.mSimd) {}
CVector4f& CVector4f::operator=(const CColor& other) {
mSimd = other.mSimd;
return *this;
}
}

View File

@ -2,21 +2,22 @@
#include "zeus/CTransform.hpp"
#include "zeus/CVector3f.hpp"
#include "zeus/CVector2f.hpp"
#if _WIN32
#include <intrin.h>
#else
#include <cpuid.h>
#endif
namespace zeus
{
namespace zeus {
static bool isCPUInit = false;
static CPUInfo g_cpuFeatures = {};
static CPUInfo g_missingFeatures = {};
void getCpuInfo(int eax, int regs[4])
{
void getCpuInfo(int eax, int regs[4]) {
#if !GEKKO
#if _WIN32
__cpuid(regs, eax);
@ -26,8 +27,7 @@ void getCpuInfo(int eax, int regs[4])
#endif
}
void getCpuInfoEx(int eax, int ecx, int regs[4])
{
void getCpuInfoEx(int eax, int ecx, int regs[4]) {
#if !GEKKO
#if _WIN32
__cpuidex(regs, eax, ecx);
@ -37,8 +37,7 @@ void getCpuInfoEx(int eax, int ecx, int regs[4])
#endif
}
void detectCPU()
{
void detectCPU() {
#if !GEKKO
if (isCPUInit)
return;
@ -46,65 +45,61 @@ void detectCPU()
int regs[4];
getCpuInfo(0, regs);
int highestFeature = regs[0];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor) = regs[1];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 4) = regs[3];
*reinterpret_cast<int*>((char*)g_cpuFeatures.cpuVendor + 8) = regs[2];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor) = regs[1];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 4) = regs[3];
*reinterpret_cast<int*>((char*) g_cpuFeatures.cpuVendor + 8) = regs[2];
getCpuInfo(0x80000000, regs);
if (regs[0] >= 0x80000004)
{
for (unsigned int i = 0x80000002; i <= 0x80000004; i++)
{
if (regs[0] >= 0x80000004) {
for (unsigned int i = 0x80000002; i <= 0x80000004; i++) {
getCpuInfo(i, regs);
// Interpret CPU brand string and cache information.
if (i == 0x80000002)
memcpy((char*)g_cpuFeatures.cpuBrand, regs, sizeof(regs));
memcpy((char*) g_cpuFeatures.cpuBrand, regs, sizeof(regs));
else if (i == 0x80000003)
memcpy((char*)g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs));
memcpy((char*) g_cpuFeatures.cpuBrand + 16, regs, sizeof(regs));
else if (i == 0x80000004)
memcpy((char*)g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
memcpy((char*) g_cpuFeatures.cpuBrand + 32, regs, sizeof(regs));
}
}
if (highestFeature >= 1)
{
if (highestFeature >= 1) {
getCpuInfo(1, regs);
memset((bool*)&g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1);
memset((bool*)&g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1);
memset((bool*)&g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1);
memset((bool*)&g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1);
memset((bool*) &g_cpuFeatures.AESNI, ((regs[2] & 0x02000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE1, ((regs[3] & 0x02000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE2, ((regs[3] & 0x04000000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE3, ((regs[2] & 0x00000001) != 0), 1);
memset((bool*) &g_cpuFeatures.SSSE3, ((regs[2] & 0x00000200) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE41, ((regs[2] & 0x00080000) != 0), 1);
memset((bool*) &g_cpuFeatures.SSE42, ((regs[2] & 0x00100000) != 0), 1);
memset((bool*) &g_cpuFeatures.AVX, ((regs[2] & 0x10000000) != 0), 1);
}
if (highestFeature >= 7)
{
if (highestFeature >= 7) {
getCpuInfoEx(7, 0, regs);
memset((bool*)&g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1);
memset((bool*) &g_cpuFeatures.AVX2, ((regs[1] & 0x00000020) != 0), 1);
}
isCPUInit = true;
#endif
}
const CPUInfo& cpuFeatures() { detectCPU(); return g_cpuFeatures; }
const CPUInfo& cpuFeatures() {
detectCPU();
return g_cpuFeatures;
}
std::pair<bool, const CPUInfo&> validateCPU()
{
std::pair<bool, const CPUInfo&> validateCPU() {
detectCPU();
bool ret = true;
#if __AVX2__
if (!g_cpuFeatures.AVX2)
{
if (!g_cpuFeatures.AVX2) {
*(bool*) &g_missingFeatures.AVX2 = true;
ret = false;
}
#endif
#if __AVX__
if (!g_cpuFeatures.AVX)
{
if (!g_cpuFeatures.AVX) {
*(bool*) &g_missingFeatures.AVX = true;
ret = false;
}
@ -117,43 +112,37 @@ std::pair<bool, const CPUInfo&> validateCPU()
}
#endif
#if __SSE4_2__
if (!g_cpuFeatures.SSE42)
{
if (!g_cpuFeatures.SSE42) {
*(bool*) &g_missingFeatures.SSE42 = true;
ret = false;
}
#endif
#if __SSE4_1__
if (!g_cpuFeatures.SSE41)
{
if (!g_cpuFeatures.SSE41) {
*(bool*) &g_missingFeatures.SSE41 = true;
ret = false;
}
#endif
#if __SSSE3__
if (!g_cpuFeatures.SSSE3)
{
if (!g_cpuFeatures.SSSE3) {
*(bool*) &g_missingFeatures.SSSE3 = true;
ret = false;
}
#endif
#if __SSE3__
if (!g_cpuFeatures.SSE3)
{
if (!g_cpuFeatures.SSE3) {
*(bool*) &g_missingFeatures.SSE3 = true;
ret = false;
}
#endif
#if __SSE2__
if (!g_cpuFeatures.SSE2)
{
if (!g_cpuFeatures.SSE2) {
*(bool*) &g_missingFeatures.SSE2 = true;
ret = false;
}
#endif
#if __SSE__
if (!g_cpuFeatures.SSE1)
{
if (!g_cpuFeatures.SSE1) {
*(bool*) &g_missingFeatures.SSE1 = true;
ret = false;
}
@ -162,8 +151,7 @@ std::pair<bool, const CPUInfo&> validateCPU()
return {ret, g_missingFeatures};
}
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up)
{
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up) {
CVector3f vLook, vRight, vUp;
vLook = lookPos - pos;
@ -173,11 +161,10 @@ CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3
vLook.normalize();
vUp = up - vLook * clamp(-1.f, up.dot(vLook), 1.f);
if (vUp.magnitude() <= FLT_EPSILON) {
vUp = CVector3f(0.f, 0.f, 1.f) - vLook * vLook.z();
if (vUp.magnitude() <= FLT_EPSILON)
{
vUp = CVector3f(0.f, 0.f, 1.f) - vLook * vLook.z;
if (vUp.magnitude() <= FLT_EPSILON)
vUp = CVector3f(0.f, 1.f, 0.f) - vLook * vLook.y;
vUp = CVector3f(0.f, 1.f, 0.f) - vLook * vLook.y();
}
vUp.normalize();
vRight = vLook.cross(vUp);
@ -187,15 +174,13 @@ CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3
}
CVector3f getBezierPoint(const CVector3f& a, const CVector3f& b,
const CVector3f& c, const CVector3f& d, float t)
{
const CVector3f& c, const CVector3f& d, float t) {
const float omt = 1.f - t;
return ((a * omt + b * t) * omt + (b * omt + c * t) * t) * omt +
((b * omt + c * t) * omt + (c * omt + d * t) * t) * t;
}
int floorPowerOfTwo(int x)
{
int floorPowerOfTwo(int x) {
if (x == 0)
return 0;
/*
@ -211,8 +196,7 @@ int floorPowerOfTwo(int x)
return x - (x >> 1);
}
int ceilingPowerOfTwo(int x)
{
int ceilingPowerOfTwo(int x) {
if (x == 0)
return 0;
@ -227,8 +211,7 @@ int ceilingPowerOfTwo(int x)
return x;
}
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t)
{
float getCatmullRomSplinePoint(float a, float b, float c, float d, float t) {
if (t <= 0.0f)
return b;
if (t >= 1.0f)
@ -237,12 +220,13 @@ float getCatmullRomSplinePoint(float a, float b, float c, float d, float t)
const float t2 = t * t;
const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) + c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
}
CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t)
{
CVector3f
getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
if (t <= 0.0f)
return b;
if (t >= 1.0f)
@ -251,12 +235,13 @@ CVector3f getCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const
const float t2 = t * t;
const float t3 = t2 * t;
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) + c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
return (a * (-0.5f * t3 + t2 - 0.5f * t) + b * (1.5f * t3 + -2.5f * t2 + 1.0f) +
c * (-1.5f * t3 + 2.0f * t2 + 0.5f * t) +
d * (0.5f * t3 - 0.5f * t2));
}
CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t)
{
CVector3f
getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b, const CVector3f& c, const CVector3f& d, float t) {
if (t >= 0.0f)
return b;
if (t <= 1.0f)
@ -282,32 +267,27 @@ CVector3f getRoundCatmullRomSplinePoint(const CVector3f& a, const CVector3f& b,
return zeus::getCatmullRomSplinePoint(b, c, bVelocity * cbDistance, cVelocity * cbDistance, t);
}
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary)
{
return bary.x * p0 + bary.y * p1 + bary.z * p2;
CVector3f baryToWorld(const CVector3f& p0, const CVector3f& p1, const CVector3f& p2, const CVector3f& bary) {
return bary.x() * p0 + bary.y() * p1 + bary.z() * p2;
}
bool close_enough(const CVector3f& a, const CVector3f &b, float epsilon)
{
if (std::fabs(a.x - b.x) < epsilon && std::fabs(a.y - b.y) < epsilon && std::fabs(a.z - b.z) < epsilon)
return true;
return false;
bool close_enough(const CVector3f& a, const CVector3f& b, float epsilon) {
return std::fabs(a.x() - b.x()) < epsilon &&
std::fabs(a.y() - b.y()) < epsilon &&
std::fabs(a.z() - b.z()) < epsilon;
}
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon)
{
if (std::fabs(a.x - b.x) < epsilon && std::fabs(a.y - b.y) < epsilon)
return true;
return false;
bool close_enough(const CVector2f& a, const CVector2f& b, float epsilon) {
return std::fabs(a.x() - b.x()) < epsilon && std::fabs(a.y() - b.y()) < epsilon;
}
template <> CVector3f min(const CVector3f& a, const CVector3f& b)
{
return {min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)};
template<>
CVector3f min(const CVector3f& a, const CVector3f& b) {
return {min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z())};
}
template <> CVector3f max(const CVector3f& a, const CVector3f& b)
{
return {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)};
template<>
CVector3f max(const CVector3f& a, const CVector3f& b) {
return {max(a.x(), b.x()), max(a.y(), b.y()), max(a.z(), b.z())};
}
}

View File

@ -30,6 +30,9 @@ int main()
CAABox test2{{-100, -100, -100}, {100, 100, 100}};
CAABox test3{{-50, -50, -50}, {50, 50, 50}};
CAABox test4{{-50, -50, -105}, {50, 50, 105}};
CVector2f point2(-90, 67);
CVector2f point3(-90, 67);
CVector3f point4 = point2 + point3;
CVector3f point(-90, 67, -105);
test.closestPointAlongVector(point);
CVector3d(100, -100, -200);
@ -72,7 +75,7 @@ int main()
ctest1.fromHSV(0, 255 / 255.f, .5);
float h, s, v;
ctest1.toHSV(h, s, v);
std::cout << (int)ctest1.r << " " << (int)ctest1.g << " " << (int)ctest1.b << " " << (int)ctest1.a << std::endl;
std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a / 255.f) << std::endl;
std::cout << (int)ctest1.r() << " " << (int)ctest1.g() << " " << (int)ctest1.b() << " " << (int)ctest1.a() << std::endl;
std::cout << h << " " << s << " " << v << " " << (float)(ctest1.a() / 255.f) << std::endl;
return 0;
}