Rewrite attribute buffer, matrix & stream handling

Now array attributes (GXSetArray) will be properly
fetched based on the vertex format. Buffers are
still assumed to be byte-swapped to little-endian.

Stream handling completely redone and many issues
resolved.

Eliminates matrix transposes. AURORA_NATIVE_MATRIX
is no longer necessary and removed.
This commit is contained in:
Luke Street 2025-04-14 17:16:13 -06:00
parent 3316ad9a7f
commit a600b0b84c
21 changed files with 1215 additions and 901 deletions

View File

@ -3,8 +3,6 @@ project(aurora LANGUAGES C CXX)
set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD 20)
option(AURORA_NATIVE_MATRIX "Assume OpenGL-layout matrices, disables transposing" OFF)
add_subdirectory(extern) add_subdirectory(extern)
include(cmake/aurora_core.cmake) include(cmake/aurora_core.cmake)

View File

@ -4,7 +4,6 @@ add_library(aurora_gx STATIC
lib/gfx/gx.cpp lib/gfx/gx.cpp
lib/gfx/gx_shader.cpp lib/gfx/gx_shader.cpp
lib/gfx/texture_convert.cpp lib/gfx/texture_convert.cpp
lib/gfx/stream/shader.cpp
lib/gfx/model/shader.cpp lib/gfx/model/shader.cpp
lib/dolphin/gx/GXBump.cpp lib/dolphin/gx/GXBump.cpp
lib/dolphin/gx/GXCull.cpp lib/dolphin/gx/GXCull.cpp
@ -28,9 +27,6 @@ add_library(aurora::gx ALIAS aurora_gx)
target_link_libraries(aurora_gx PUBLIC aurora::core xxhash) target_link_libraries(aurora_gx PUBLIC aurora::core xxhash)
target_link_libraries(aurora_gx PRIVATE absl::btree absl::flat_hash_map) target_link_libraries(aurora_gx PRIVATE absl::btree absl::flat_hash_map)
if (AURORA_NATIVE_MATRIX)
target_compile_definitions(aurora_gx PRIVATE AURORA_NATIVE_MATRIX)
endif ()
if (EMSCRIPTEN) if (EMSCRIPTEN)
target_link_options(aurora_gx PUBLIC -sUSE_WEBGPU=1 -sASYNCIFY -sEXIT_RUNTIME) target_link_options(aurora_gx PUBLIC -sUSE_WEBGPU=1 -sASYNCIFY -sEXIT_RUNTIME)
target_compile_definitions(aurora_gx PRIVATE ENABLE_BACKEND_WEBGPU) target_compile_definitions(aurora_gx PRIVATE ENABLE_BACKEND_WEBGPU)

View File

@ -35,9 +35,6 @@ struct Vec2 {
constexpr Vec2() = default; constexpr Vec2() = default;
constexpr Vec2(T x, T y) : x(x), y(y) {} constexpr Vec2(T x, T y) : x(x), y(y) {}
AURORA_VEC2_EXTRA AURORA_VEC2_EXTRA
#ifdef METAFORCE
constexpr Vec2(const zeus::CVector2f& vec) : x(vec.x()), y(vec.y()) {}
#endif
bool operator==(const Vec2& rhs) const { return x == rhs.x && y == rhs.y; } bool operator==(const Vec2& rhs) const { return x == rhs.x && y == rhs.y; }
bool operator!=(const Vec2& rhs) const { return !(*this == rhs); } bool operator!=(const Vec2& rhs) const { return !(*this == rhs); }
@ -51,10 +48,6 @@ struct Vec3 {
constexpr Vec3() = default; constexpr Vec3() = default;
constexpr Vec3(T x, T y, T z) : x(x), y(y), z(z) {} constexpr Vec3(T x, T y, T z) : x(x), y(y), z(z) {}
AURORA_VEC3_EXTRA AURORA_VEC3_EXTRA
#ifdef METAFORCE
constexpr Vec3(const zeus::CVector3f& vec) : x(vec.x()), y(vec.y()), z(vec.z()) {}
operator zeus::CVector3f() const { return {x, y, z}; }
#endif
bool operator==(const Vec3& rhs) const { return x == rhs.x && y == rhs.y && z == rhs.z; } bool operator==(const Vec3& rhs) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
bool operator!=(const Vec3& rhs) const { return !(*this == rhs); } bool operator!=(const Vec3& rhs) const { return !(*this == rhs); }
@ -77,10 +70,6 @@ struct Vec4 {
// For Vec3 -> Vec4 // For Vec3 -> Vec4
constexpr Vec4(Vec3<T> v, T w) : m{v.x, v.y, v.z, w} {} constexpr Vec4(Vec3<T> v, T w) : m{v.x, v.y, v.z, w} {}
AURORA_VEC4_EXTRA AURORA_VEC4_EXTRA
#ifdef METAFORCE
constexpr Vec4(const zeus::CVector4f& vec) : x(vec.x()), y(vec.y()), z(vec.z()), w(vec.w()) {}
constexpr Vec4(const zeus::CColor& color) : x(color.r()), y(color.g()), z(color.b()), w(color.a()) {}
#endif
inline Vec4& operator=(const Vec4& other) { inline Vec4& operator=(const Vec4& other) {
memcpy(&m, &other.m, sizeof(Vt)); memcpy(&m, &other.m, sizeof(Vt));
@ -119,7 +108,7 @@ struct Vec4 {
bool operator!=(const Vec4& rhs) const { return !(*this == rhs); } bool operator!=(const Vec4& rhs) const { return !(*this == rhs); }
}; };
template <typename T> template <typename T>
[[nodiscard]] inline Vec4<T> operator+(const Vec4<T>& a, const Vec4<T>& b) { [[nodiscard]] Vec4<T> operator+(const Vec4<T>& a, const Vec4<T>& b) {
#ifdef USE_GCC_VECTOR_EXTENSIONS #ifdef USE_GCC_VECTOR_EXTENSIONS
return a.m + b.m; return a.m + b.m;
#else #else
@ -127,7 +116,7 @@ template <typename T>
#endif #endif
} }
template <typename T> template <typename T>
[[nodiscard]] inline Vec4<T> operator*(const Vec4<T>& a, const Vec4<T>& b) { [[nodiscard]] Vec4<T> operator*(const Vec4<T>& a, const Vec4<T>& b) {
#ifdef USE_GCC_VECTOR_EXTENSIONS #ifdef USE_GCC_VECTOR_EXTENSIONS
return a.m * b.m; return a.m * b.m;
#else #else
@ -170,6 +159,18 @@ struct Mat4x2 {
bool operator!=(const Mat4x2& rhs) const { return !(*this == rhs); } bool operator!=(const Mat4x2& rhs) const { return !(*this == rhs); }
}; };
template <typename T> template <typename T>
struct Mat2x4 {
Vec4<T> m0{};
Vec4<T> m1{};
constexpr Mat2x4() = default;
constexpr Mat2x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2) : m0(m0), m1(m1) {}
bool operator==(const Mat2x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1; }
bool operator!=(const Mat2x4& rhs) const { return !(*this == rhs); }
};
static_assert(sizeof(Mat2x4<float>) == 32);
template <typename T>
struct Mat4x4; struct Mat4x4;
template <typename T> template <typename T>
struct Mat3x4 { struct Mat3x4 {
@ -180,10 +181,13 @@ struct Mat3x4 {
constexpr Mat3x4() = default; constexpr Mat3x4() = default;
constexpr Mat3x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2) : m0(m0), m1(m1), m2(m2) {} constexpr Mat3x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2) : m0(m0), m1(m1), m2(m2) {}
inline Mat4x4<T> to4x4() const; [[nodiscard]] Mat4x4<T> to4x4() const;
inline Mat4x4<T> toTransposed4x4() const; [[nodiscard]] Mat4x4<T> toTransposed4x4() const;
bool operator==(const Mat3x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2; }
bool operator!=(const Mat3x4& rhs) const { return !(*this == rhs); }
}; };
static_assert(sizeof(Mat3x4<float>) == sizeof(float[3][4])); static_assert(sizeof(Mat3x4<float>) == 48);
template <typename T> template <typename T>
struct Mat4x4 { struct Mat4x4 {
Vec4<T> m0{}; Vec4<T> m0{};
@ -195,10 +199,6 @@ struct Mat4x4 {
constexpr Mat4x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2, const Vec4<T>& m3) constexpr Mat4x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2, const Vec4<T>& m3)
: m0(m0), m1(m1), m2(m2), m3(m3) {} : m0(m0), m1(m1), m2(m2), m3(m3) {}
AURORA_MAT4X4_EXTRA AURORA_MAT4X4_EXTRA
#ifdef METAFORCE
constexpr Mat4x4(const zeus::CMatrix4f& m) : m0(m[0]), m1(m[1]), m2(m[2]), m3(m[3]) {}
constexpr Mat4x4(const zeus::CTransform& m) : Mat4x4(m.toMatrix4f()) {}
#endif
[[nodiscard]] Mat4x4 transpose() const { [[nodiscard]] Mat4x4 transpose() const {
return { return {
@ -208,23 +208,17 @@ struct Mat4x4 {
{m0[3], m1[3], m2[3], m3[3]}, {m0[3], m1[3], m2[3], m3[3]},
}; };
} }
inline Mat4x4& operator=(const Mat4x4& other) { Mat4x4& operator=(const Mat4x4& other) = default;
m0 = other.m0;
m1 = other.m1;
m2 = other.m2;
m3 = other.m3;
return *this;
}
inline Vec4<T>& operator[](size_t i) { return *(&m0 + i); } Vec4<T>& operator[](size_t i) { return *(&m0 + i); }
inline const Vec4<T>& operator[](size_t i) const { return *(&m0 + i); } const Vec4<T>& operator[](size_t i) const { return *(&m0 + i); }
bool operator==(const Mat4x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2 && m3 == rhs.m3; } bool operator==(const Mat4x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2 && m3 == rhs.m3; }
bool operator!=(const Mat4x4& rhs) const { return !(*this == rhs); } bool operator!=(const Mat4x4& rhs) const { return !(*this == rhs); }
}; };
static_assert(sizeof(Mat4x4<float>) == sizeof(float[4][4])); static_assert(sizeof(Mat4x4<float>) == 64);
template <typename T> template <typename T>
[[nodiscard]] inline Mat4x4<T> operator*(const Mat4x4<T>& a, const Mat4x4<T>& b) { [[nodiscard]] Mat4x4<T> operator*(const Mat4x4<T>& a, const Mat4x4<T>& b) {
Mat4x4<T> out; Mat4x4<T> out;
for (size_t i = 0; i < 4; ++i) { for (size_t i = 0; i < 4; ++i) {
*(&out.m0 + i) = a.m0 * b[i].template shuffle<0, 0, 0, 0>() + a.m1 * b[i].template shuffle<1, 1, 1, 1>() + *(&out.m0 + i) = a.m0 * b[i].template shuffle<0, 0, 0, 0>() + a.m1 * b[i].template shuffle<1, 1, 1, 1>() +
@ -233,28 +227,27 @@ template <typename T>
return out; return out;
} }
template <typename T> template <typename T>
[[nodiscard]] inline Mat4x4<T> Mat3x4<T>::to4x4() const { [[nodiscard]] Mat4x4<T> Mat3x4<T>::to4x4() const {
return { return {
{m0.m[0], m0.m[1], m0.m[2], 0.f}, {m0[0], m0[1], m0[2], 0.f},
{m1.m[0], m1.m[1], m1.m[2], 0.f}, {m1[0], m1[1], m1[2], 0.f},
{m2.m[0], m2.m[1], m2.m[2], 0.f}, {m2[0], m2[1], m2[2], 0.f},
{m0.m[3], m1.m[3], m2.m[3], 1.f}, {m0[3], m1[3], m2[3], 1.f},
}; };
} }
template <typename T> template <typename T>
[[nodiscard]] inline Mat4x4<T> Mat3x4<T>::toTransposed4x4() const { [[nodiscard]] Mat4x4<T> Mat3x4<T>::toTransposed4x4() const {
return Mat4x4<T>{ return Mat4x4<T>{
m0, {m0[0], m1[0], m2[0], 0.f},
m1, {m0[1], m1[1], m2[1], 0.f},
m2, {m0[2], m1[2], m2[2], 0.f},
{0.f, 0.f, 0.f, 1.f}, {m0[3], m1[3], m2[3], 1.f},
} };
.transpose();
} }
constexpr Mat4x4<float> Mat4x4_Identity{ constexpr Mat4x4 Mat4x4_Identity{
Vec4<float>{1.f, 0.f, 0.f, 0.f}, Vec4{1.f, 0.f, 0.f, 0.f},
Vec4<float>{0.f, 1.f, 0.f, 0.f}, Vec4{0.f, 1.f, 0.f, 0.f},
Vec4<float>{0.f, 0.f, 1.f, 0.f}, Vec4{0.f, 0.f, 1.f, 0.f},
Vec4<float>{0.f, 0.f, 0.f, 1.f}, Vec4{0.f, 0.f, 0.f, 1.f},
}; };
} // namespace aurora } // namespace aurora

View File

@ -68,11 +68,11 @@ void GXTexCoord2s16(s16 s, s16 t);
void GXTexCoord2u8(u8 s, u8 t); void GXTexCoord2u8(u8 s, u8 t);
void GXTexCoord2s8(s8 s, s8 t); void GXTexCoord2s8(s8 s, s8 t);
void GXTexCoord1f32(f32 s, f32 t); void GXTexCoord1f32(f32 s);
void GXTexCoord1u16(u16 s, u16 t); void GXTexCoord1u16(u16 s);
void GXTexCoord1s16(s16 s, s16 t); void GXTexCoord1s16(s16 s);
void GXTexCoord1u8(u8 s, u8 t); void GXTexCoord1u8(u8 s);
void GXTexCoord1s8(s8 s, s8 t); void GXTexCoord1s8(s8 s);
void GXTexCoord1x16(u16 index); void GXTexCoord1x16(u16 index);
void GXTexCoord1x8(u8 index); void GXTexCoord1x8(u8 index);

View File

@ -7,7 +7,6 @@ extern "C" {
void GXSetVtxDesc(GXAttr attr, GXAttrType type) { update_gx_state(g_gxState.vtxDesc[attr], type); } void GXSetVtxDesc(GXAttr attr, GXAttrType type) { update_gx_state(g_gxState.vtxDesc[attr], type); }
void GXSetVtxDescv(GXVtxDescList* list) { void GXSetVtxDescv(GXVtxDescList* list) {
g_gxState.vtxDesc.fill({});
while (list->attr != GX_VA_NULL) { while (list->attr != GX_VA_NULL) {
update_gx_state(g_gxState.vtxDesc[list->attr], list->type); update_gx_state(g_gxState.vtxDesc[list->attr], list->type);
++list; ++list;
@ -17,8 +16,8 @@ void GXSetVtxDescv(GXVtxDescList* list) {
void GXClearVtxDesc() { g_gxState.vtxDesc.fill({}); } void GXClearVtxDesc() { g_gxState.vtxDesc.fill({}); }
void GXSetVtxAttrFmt(GXVtxFmt vtxfmt, GXAttr attr, GXCompCnt cnt, GXCompType type, u8 frac) { void GXSetVtxAttrFmt(GXVtxFmt vtxfmt, GXAttr attr, GXCompCnt cnt, GXCompType type, u8 frac) {
CHECK(vtxfmt >= GX_VTXFMT0 && vtxfmt < GX_MAX_VTXFMT, "invalid vtxfmt {}", static_cast<int>(vtxfmt)); CHECK(vtxfmt >= GX_VTXFMT0 && vtxfmt < GX_MAX_VTXFMT, "invalid vtxfmt {}", underlying(vtxfmt));
CHECK(attr >= GX_VA_PNMTXIDX && attr < GX_VA_MAX_ATTR, "invalid attr {}", static_cast<int>(attr)); CHECK(attr >= GX_VA_PNMTXIDX && attr < GX_VA_MAX_ATTR, "invalid attr {}", underlying(attr));
auto& fmt = g_gxState.vtxFmts[vtxfmt].attrs[attr]; auto& fmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
update_gx_state(fmt.cnt, cnt); update_gx_state(fmt.cnt, cnt);
update_gx_state(fmt.type, type); update_gx_state(fmt.type, type);
@ -38,7 +37,7 @@ void GXSetArray(GXAttr attr, const void* data, u32 size, u8 stride) {
// TODO move GXBegin, GXEnd here // TODO move GXBegin, GXEnd here
void GXSetTexCoordGen2(GXTexCoordID dst, GXTexGenType type, GXTexGenSrc src, u32 mtx, GXBool normalize, u32 postMtx) { void GXSetTexCoordGen2(GXTexCoordID dst, GXTexGenType type, GXTexGenSrc src, u32 mtx, GXBool normalize, u32 postMtx) {
CHECK(dst >= GX_TEXCOORD0 && dst <= GX_TEXCOORD7, "invalid tex coord {}", static_cast<int>(dst)); CHECK(dst >= GX_TEXCOORD0 && dst <= GX_TEXCOORD7, "invalid tex coord {}", underlying(dst));
update_gx_state(g_gxState.tcgs[dst], update_gx_state(g_gxState.tcgs[dst],
{type, src, static_cast<GXTexMtx>(mtx), static_cast<GXPTTexMtx>(postMtx), normalize}); {type, src, static_cast<GXTexMtx>(mtx), static_cast<GXPTTexMtx>(postMtx), normalize});
} }

View File

@ -20,7 +20,7 @@ void GXGetVtxAttrFmt(GXVtxFmt idx, GXAttr attr, GXCompCnt* compCnt, GXCompType*
// TODO GXGetViewportv // TODO GXGetViewportv
void GXGetProjectionv(f32* p) { void GXGetProjectionv(f32* p) {
const auto& mtx = g_gxState.origProj; const auto& mtx = g_gxState.proj;
p[0] = static_cast<float>(g_gxState.projType); p[0] = static_cast<float>(g_gxState.projType);
p[1] = mtx.m0[0]; p[1] = mtx.m0[0];
p[3] = mtx.m1[1]; p[3] = mtx.m1[1];

View File

@ -4,15 +4,8 @@ extern "C" {
void GXSetProjection(const void* mtx_, GXProjectionType type) { void GXSetProjection(const void* mtx_, GXProjectionType type) {
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_); const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
g_gxState.origProj = mtx;
g_gxState.projType = type; g_gxState.projType = type;
update_gx_state(g_gxState.proj, update_gx_state(g_gxState.proj, mtx);
#ifdef AURORA_NATIVE_MATRIX
mtx
#else
mtx.transpose()
#endif
);
} }
// TODO GXSetProjectionv // TODO GXSetProjectionv
@ -20,13 +13,8 @@ void GXSetProjection(const void* mtx_, GXProjectionType type) {
void GXLoadPosMtxImm(const void* mtx_, u32 id) { void GXLoadPosMtxImm(const void* mtx_, u32 id) {
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id)); CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id));
auto& state = g_gxState.pnMtx[id / 3]; auto& state = g_gxState.pnMtx[id / 3];
#ifdef AURORA_NATIVE_MATRIX const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
update_gx_state(state.pos, mtx); update_gx_state(state.pos, mtx);
#else
const auto* mtx = reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state(state.pos, mtx->toTransposed4x4());
#endif
} }
// TODO GXLoadPosMtxIndx // TODO GXLoadPosMtxIndx
@ -34,56 +22,37 @@ void GXLoadPosMtxImm(const void* mtx_, u32 id) {
void GXLoadNrmMtxImm(const void* mtx_, u32 id) { void GXLoadNrmMtxImm(const void* mtx_, u32 id) {
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id)); CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id));
auto& state = g_gxState.pnMtx[id / 3]; auto& state = g_gxState.pnMtx[id / 3];
#ifdef AURORA_NATIVE_MATRIX const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
update_gx_state(state.nrm, mtx); update_gx_state(state.nrm, mtx);
#else
const auto* mtx = reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state(state.nrm, mtx->toTransposed4x4());
#endif
} }
// TODO GXLoadNrmMtxImm3x3 // TODO GXLoadNrmMtxImm3x3
// TODO GXLoadNrmMtxIndx3x3 // TODO GXLoadNrmMtxIndx3x3
void GXSetCurrentMtx(u32 id) { void GXSetCurrentMtx(u32 id) {
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id)); CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", id);
update_gx_state(g_gxState.currentPnMtx, id / 3); update_gx_state(g_gxState.currentPnMtx, id / 3);
} }
void GXLoadTexMtxImm(const void* mtx_, u32 id, GXTexMtxType type) { void GXLoadTexMtxImm(const void* mtx_, u32 id, GXTexMtxType type) {
CHECK((id >= GX_TEXMTX0 && id <= GX_IDENTITY) || (id >= GX_PTTEXMTX0 && id <= GX_PTIDENTITY), "invalid tex mtx {}", CHECK((id >= GX_TEXMTX0 && id <= GX_IDENTITY) || (id >= GX_PTTEXMTX0 && id <= GX_PTIDENTITY), "invalid tex mtx {}",
static_cast<int>(id)); id);
if (id >= GX_PTTEXMTX0) { if (id >= GX_PTTEXMTX0) {
CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", static_cast<int>(type)); CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", underlying(type));
const auto idx = (id - GX_PTTEXMTX0) / 3; const auto idx = (id - GX_PTTEXMTX0) / 3;
#ifdef AURORA_NATIVE_MATRIX
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
update_gx_state<aurora::Mat4x4<float>>(g_gxState.ptTexMtxs[idx], mtx);
#else
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_); const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state<aurora::Mat4x4<float>>(g_gxState.ptTexMtxs[idx], mtx.toTransposed4x4()); update_gx_state(g_gxState.ptTexMtxs[idx], mtx);
#endif
} else { } else {
const auto idx = (id - GX_TEXMTX0) / 3; const auto idx = (id - GX_TEXMTX0) / 3;
switch (type) { switch (type) {
case GX_MTX3x4: { case GX_MTX3x4: {
#ifdef AURORA_NATIVE_MATRIX
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
#else
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_); const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx.toTransposed4x4()); update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
#endif
break; break;
} }
case GX_MTX2x4: { case GX_MTX2x4: {
const auto& mtx = *reinterpret_cast<const aurora::Mat4x2<float>*>(mtx_); const auto& mtx = *reinterpret_cast<const aurora::Mat2x4<float>*>(mtx_);
#ifdef AURORA_NATIVE_MATRIX
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx); update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
#else
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx.transpose());
#endif
break; break;
} }
} }

View File

@ -1,47 +1,113 @@
#include "gx.hpp" #include "gx.hpp"
#include "../../gfx/stream/shader.hpp" #include "aurora/math.hpp"
#include "../../gfx/model/shader.hpp"
#include "../../gfx/gx_fmt.hpp"
#include <algorithm> #include <cstring>
#include <optional> #include <optional>
#ifndef NDEBUG struct Attribute {
static inline GXAttr next_attr(size_t begin) { uint32_t offset;
auto iter = std::find_if(g_gxState.vtxDesc.begin() + begin, g_gxState.vtxDesc.end(), GXAttr attr;
[](const auto type) { return type != GX_NONE; }); GXAttrType type;
if (begin > 0 && iter == g_gxState.vtxDesc.end()) { aurora::gfx::gx::VtxAttrFmt fmt;
// wrap around };
iter = std::find_if(g_gxState.vtxDesc.begin(), g_gxState.vtxDesc.end(),
[](const auto type) { return type != GX_NONE; });
}
return GXAttr(iter - g_gxState.vtxDesc.begin());
}
#endif
struct SStreamState { struct SStreamState {
GXPrimitive primitive; GXPrimitive primitive;
GXVtxFmt vtxFmt; GXVtxFmt vtxFmt;
std::vector<Attribute> attrs;
u16 curAttr = 0;
u16 vertexCount = 0; u16 vertexCount = 0;
u16 vertexStart = 0; u16 vertexStart;
u16 vertexSize;
aurora::ByteBuffer vertexBuffer; aurora::ByteBuffer vertexBuffer;
uint8_t* vertexData = nullptr;
std::vector<u16> indices; std::vector<u16> indices;
#ifndef NDEBUG
GXAttr nextAttr;
#endif
explicit SStreamState(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 numVerts, u16 vertexSize, u16 vertexStart) noexcept explicit SStreamState(GXPrimitive primitive, GXVtxFmt vtxFmt, std::vector<Attribute> attrs, u16 numVerts,
: primitive(primitive), vtxFmt(vtxFmt), vertexStart(vertexStart) { u16 vertexSize, u16 vertexStart) noexcept
vertexBuffer.reserve_extra(size_t(numVerts) * vertexSize); : primitive(primitive), vtxFmt(vtxFmt), attrs(std::move(attrs)), vertexStart(vertexStart), vertexSize(vertexSize) {
vertexBuffer.reserve_extra(static_cast<size_t>(numVerts) * vertexSize);
if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) { if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) {
indices.reserve((u32(numVerts) - 3) * 3 + 3); indices.reserve(((static_cast<u32>(numVerts) - 3) * 3) + 3);
} else if (numVerts > 4 && primitive == GX_QUADS) { } else if (numVerts > 4 && primitive == GX_QUADS) {
indices.reserve(u32(numVerts) / 4 * 6); indices.reserve(static_cast<u32>(numVerts) / 4 * 6);
} else { } else {
indices.reserve(numVerts); indices.reserve(numVerts);
} }
#ifndef NDEBUG }
nextAttr = next_attr(0);
#endif [[maybe_unused]] u8 check_direct(GXAttr attr, GXCompCnt cnt, GXCompType type) noexcept {
const auto& curAttr = attrs[this->curAttr];
ASSERT(curAttr.attr == attr, "bad attribute order: {}, expected {}", attr, curAttr.attr);
ASSERT(curAttr.type == GX_DIRECT, "bad attribute type: GX_DIRECT, expected {}", curAttr.type);
ASSERT(curAttr.fmt.cnt == cnt, "bad attribute count: {}, expected {}", cnt, curAttr.fmt.cnt);
ASSERT(curAttr.fmt.type == type, "bad attribute type: {}, expected {}", type, curAttr.fmt.type);
return curAttr.fmt.frac;
}
void check_indexed(GXAttr attr, GXAttrType type) noexcept {
const auto& curAttr = attrs[this->curAttr];
ASSERT(curAttr.attr == attr, "bad attribute order: {}, expected {}", attr, curAttr.attr);
ASSERT(curAttr.type == type, "bad attribute type: {}, expected {}", type, curAttr.type);
}
template <typename T>
void append(const T& value) noexcept {
append_data(&value, sizeof(value), attrs[curAttr].offset);
next_attribute();
}
private:
void append_data(const void* ptr, size_t size, uint32_t offset) {
if (vertexData == nullptr) {
const auto vertexStart = vertexBuffer.size();
vertexBuffer.append_zeroes(vertexSize);
vertexData = vertexBuffer.data() + vertexStart;
inc_vertex_count();
}
ASSERT(offset + size <= vertexSize, "bad attribute end: {}, expected {}", offset + size, vertexSize);
memcpy(vertexData + offset, ptr, size);
}
void next_attribute() noexcept {
curAttr = curAttr + 1;
if (curAttr >= attrs.size()) {
curAttr = 0;
vertexData = nullptr;
}
}
void inc_vertex_count() noexcept {
auto curVertex = vertexStart + vertexCount;
if (primitive == GX_LINES || primitive == GX_LINESTRIP || primitive == GX_POINTS) {
// Currently unsupported, skip
return;
}
if (primitive == GX_TRIANGLES || primitive == GX_TRIANGLESTRIP || vertexCount < 3) {
// pass
} else if (primitive == GX_TRIANGLEFAN) {
indices.push_back(vertexStart);
indices.push_back(curVertex - 1);
} /*else if (primitive == GX_TRIANGLESTRIP) {
if ((vertexCount & 1) == 0) {
indices.push_back(curVertex - 2);
indices.push_back(curVertex - 1);
} else {
indices.push_back(curVertex - 1);
indices.push_back(curVertex - 2);
}
}*/
else if (primitive == GX_QUADS) {
if ((vertexCount & 3) == 3) {
indices.push_back(curVertex - 3);
indices.push_back(curVertex - 1);
}
}
indices.push_back(curVertex);
++vertexCount;
} }
}; };
@ -51,228 +117,319 @@ static u16 lastVertexStart = 0;
extern "C" { extern "C" {
void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) { void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) {
CHECK(!sStreamState, "Stream began twice!"); CHECK(!sStreamState, "Stream began twice!");
uint16_t vertexSize = 0; uint16_t vertexSize = 0;
uint16_t numDirectAttrs = 0;
uint16_t numIndexedAttrs = 0;
for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) { for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) {
if (type == GX_DIRECT) { if (type == GX_DIRECT) {
++numDirectAttrs;
if (attr == GX_VA_POS || attr == GX_VA_NRM) { if (attr == GX_VA_POS || attr == GX_VA_NRM) {
vertexSize += 12; vertexSize += 12;
} else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) { } else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) {
vertexSize += 16; vertexSize += 16;
} else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) { } else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) {
vertexSize += 8; vertexSize += 8;
} else UNLIKELY { } else
FATAL("dont know how to handle attr {}", static_cast<int>(attr)); UNLIKELY { FATAL("dont know how to handle attr {}", attr); }
}
} else if (type == GX_INDEX8 || type == GX_INDEX16) { } else if (type == GX_INDEX8 || type == GX_INDEX16) {
vertexSize += 2; ++numIndexedAttrs;
} }
attr = GXAttr(attr + 1); attr = static_cast<GXAttr>(attr + 1);
} }
auto [num4xAttr, rem] = std::div(numIndexedAttrs, 4);
u32 num2xAttr = 0;
if (rem > 2) {
++num4xAttr;
} else if (rem > 0) {
++num2xAttr;
}
u32 directStart = num4xAttr * 8 + num2xAttr * 4;
vertexSize += directStart;
u32 indexOffset = 0;
u32 directOffset = directStart;
std::vector<Attribute> attrs;
attrs.reserve(numDirectAttrs + numIndexedAttrs);
const auto& curVtxFmt = g_gxState.vtxFmts[vtxFmt];
for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) {
if (type == GX_DIRECT) {
u32 attrSize;
if (attr == GX_VA_POS || attr == GX_VA_NRM) {
attrSize = 12;
} else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) {
attrSize = 16;
} else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) {
attrSize = 8;
} else
UNLIKELY { FATAL("dont know how to handle attr {}", attr); }
const auto& attrFmt = curVtxFmt.attrs[attr];
attrs.emplace_back(directOffset, attr, type, attrFmt);
directOffset += attrSize;
} else if (type == GX_INDEX8 || type == GX_INDEX16) {
attrs.emplace_back(indexOffset, attr, type);
indexOffset += 2;
}
attr = static_cast<GXAttr>(attr + 1);
}
CHECK(vertexSize > 0, "no vtx attributes enabled?"); CHECK(vertexSize > 0, "no vtx attributes enabled?");
sStreamState.emplace(primitive, vtxFmt, nVerts, vertexSize, g_gxState.stateDirty ? 0 : lastVertexStart); sStreamState.emplace(primitive, vtxFmt, std::move(attrs), nVerts, vertexSize,
/*g_gxState.stateDirty ? 0 : lastVertexStart*/ 0);
} }
static inline void check_attr_order(GXAttr attr) noexcept { void GXPosition3f32(f32 x, f32 y, f32 z) {
#ifndef NDEBUG sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_F32);
CHECK(sStreamState, "Stream not started!"); sStreamState->append(aurora::Vec3{x, y, z});
CHECK(sStreamState->nextAttr == attr, "bad attribute order: {}, expected {}", static_cast<int>(attr),
static_cast<int>(sStreamState->nextAttr));
sStreamState->nextAttr = next_attr(attr + 1);
#endif
}
void GXPosition3f32(float x, float y, float z) {
check_attr_order(GX_VA_POS);
auto& state = *sStreamState;
state.vertexBuffer.append(&x, sizeof(float));
state.vertexBuffer.append(&y, sizeof(float));
state.vertexBuffer.append(&z, sizeof(float));
auto curVertex = state.vertexStart + state.vertexCount;
if (state.primitive == GX_TRIANGLES || state.vertexCount < 3) {
// pass
} else if (state.primitive == GX_TRIANGLEFAN) {
state.indices.push_back(state.vertexStart);
state.indices.push_back(curVertex - 1);
} else if (state.primitive == GX_TRIANGLESTRIP) {
if ((state.vertexCount & 1) == 0) {
state.indices.push_back(curVertex - 2);
state.indices.push_back(curVertex - 1);
} else {
state.indices.push_back(curVertex - 1);
state.indices.push_back(curVertex - 2);
}
} else if (state.primitive == GX_QUADS) {
if ((state.vertexCount & 3) == 3) {
state.indices.push_back(curVertex - 3);
state.indices.push_back(curVertex - 1);
}
}
state.indices.push_back(curVertex);
++state.vertexCount;
} }
void GXPosition3u16(u16 x, u16 y, u16 z) { void GXPosition3u16(u16 x, u16 y, u16 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS]; const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_U16);
GXPosition3f32( sStreamState->append(aurora::Vec3{
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac) static_cast<f32>(z) / static_cast<f32>(1 << frac),
); });
} }
void GXPosition3s16(s16 x, s16 y, s16 z) { void GXPosition3s16(s16 x, s16 y, s16 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS]; const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_S16);
GXPosition3f32( sStreamState->append(aurora::Vec3{
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac) static_cast<f32>(z) / static_cast<f32>(1 << frac),
); });
} }
void GXPosition3u8(u8 x, u8 y, u8 z) { void GXPosition3u8(u8 x, u8 y, u8 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS]; const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_U8);
GXPosition3f32( sStreamState->append(aurora::Vec3{
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac) static_cast<f32>(z) / static_cast<f32>(1 << frac),
); });
} }
void GXPosition3s8(s8 x, s8 y, s8 z) { void GXPosition3s8(s8 x, s8 y, s8 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS]; const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_S8);
GXPosition3f32( sStreamState->append(aurora::Vec3{
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac) static_cast<f32>(z) / static_cast<f32>(1 << frac),
); });
} }
void GXPosition2f32(float x, float y) { void GXPosition2f32(f32 x, f32 y) {
GXPosition3f32(x, y, 0.f); sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_F32);
sStreamState->append(aurora::Vec3{x, y, 0.f});
} }
void GXPosition2u16(u16 x, u16 y) { void GXPosition2u16(u16 x, u16 y) {
GXPosition3u16(x, y, 0); const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_U16);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
0.f,
});
} }
void GXPosition2s16(s16 x, s16 y) { void GXPosition2s16(s16 x, s16 y) {
GXPosition3s16(x, y, 0); const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_S16);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
0.f,
});
} }
void GXPosition2u8(u8 x, u8 y) { void GXPosition2u8(u8 x, u8 y) {
GXPosition3u8(x, y, 0); const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_U8);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
0.f,
});
} }
void GXPosition2s8(s8 x, s8 y) { void GXPosition2s8(s8 x, s8 y) {
GXPosition3s8(x, y, 0); const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_S8);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
0.f,
});
} }
void GXPosition1x16(u16 idx) { void GXPosition1x16(u16 idx) {
check_attr_order(GX_VA_POS); sStreamState->check_indexed(GX_VA_POS, GX_INDEX16);
// keep aligned sStreamState->append<u16>(idx);
if (sStreamState->vertexBuffer.size() % 4 != 0) {
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
}
sStreamState->vertexBuffer.append(&idx, 2);
} }
void GXPosition1x8(u8 idx) { void GXPosition1x8(u8 idx) {
GXPosition1x16(idx); sStreamState->check_indexed(GX_VA_POS, GX_INDEX8);
sStreamState->append<u16>(idx);
} }
void GXNormal3f32(float x, float y, float z) { void GXNormal3f32(f32 x, f32 y, f32 z) {
check_attr_order(GX_VA_NRM); sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_F32);
sStreamState->vertexBuffer.append(&x, 4); sStreamState->append(aurora::Vec3{x, y, z});
sStreamState->vertexBuffer.append(&y, 4);
sStreamState->vertexBuffer.append(&z, 4);
} }
void GXNormal3s16(s16 x, s16 y, s16 z) { void GXNormal3s16(s16 x, s16 y, s16 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_NRM]; const auto frac = sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_S16);
GXNormal3f32( sStreamState->append(aurora::Vec3{
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac) static_cast<f32>(z) / static_cast<f32>(1 << frac),
); });
} }
void GXNormal3s8(s8 x, s8 y, s8 z) { void GXNormal3s8(s8 x, s8 y, s8 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_NRM]; const auto frac = sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_S8);
GXNormal3f32( sStreamState->append(aurora::Vec3{
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac) static_cast<f32>(z) / static_cast<f32>(1 << frac),
); });
} }
void GXNormal1x16(u16 idx) { void GXNormal1x16(u16 index) {
check_attr_order(GX_VA_NRM); sStreamState->check_indexed(GX_VA_NRM, GX_INDEX16);
// keep aligned sStreamState->append<u16>(index);
if (sStreamState->vertexBuffer.size() % 4 != 0) {
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
}
sStreamState->vertexBuffer.append(&idx, 2);
} }
void GXNormal1x8(u8 idx) { void GXNormal1x8(u8 index) {
GXNormal1x16(idx); sStreamState->check_indexed(GX_VA_POS, GX_INDEX8);
sStreamState->append<u16>(index);
} }
void GXColor4f32(float r, float g, float b, float a) { void GXColor4f32(f32 r, f32 g, f32 b, f32 a) {
check_attr_order(GX_VA_CLR0); sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
sStreamState->vertexBuffer.append(&r, 4); sStreamState->append(aurora::Vec4{r, g, b, a});
sStreamState->vertexBuffer.append(&g, 4);
sStreamState->vertexBuffer.append(&b, 4);
sStreamState->vertexBuffer.append(&a, 4);
} }
void GXColor4u8(u8 r, u8 g, u8 b, u8 a) { void GXColor4u8(u8 r, u8 g, u8 b, u8 a) {
GXColor4f32(static_cast<float>(r) / 255.f, static_cast<float>(g) / 255.f, static_cast<float>(b) / 255.f, sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
static_cast<float>(a) / 255.f); sStreamState->append(aurora::Vec4{
static_cast<f32>(r) / 255.f,
static_cast<f32>(g) / 255.f,
static_cast<f32>(b) / 255.f,
static_cast<f32>(a) / 255.f,
});
} }
void GXColor3u8(u8 r, u8 g, u8 b) { void GXColor3u8(u8 r, u8 g, u8 b) {
GXColor4u8(r, g, b, 255); sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGB, GX_RGB8);
sStreamState->append(aurora::Vec4{
static_cast<f32>(r) / 255.f,
static_cast<f32>(g) / 255.f,
static_cast<f32>(b) / 255.f,
1.f,
});
} }
void GXColor1x16(u16 idx) { void GXColor1u32(u32 clr) {
check_attr_order(GX_VA_CLR0); sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
// keep aligned sStreamState->append(aurora::Vec4{
if (sStreamState->vertexBuffer.size() % 4 != 0) { static_cast<f32>((clr >> 24) & 0xff) / 255.f,
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4)); static_cast<f32>((clr >> 16) & 0xff) / 255.f,
} static_cast<f32>((clr >> 8) & 0xff) / 255.f,
sStreamState->vertexBuffer.append(&idx, 2); static_cast<f32>(clr & 0xff) / 255.f,
});
} }
void GXColor1x8(u8 idx) { void GXColor1u16(u16 clr) {
GXColor1x16(idx); sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGB, GX_RGB565);
sStreamState->append(aurora::Vec4{
static_cast<f32>((clr >> 11) & 0x1f) / 31.f,
static_cast<f32>((clr >> 5) & 0x3f) / 63.f,
static_cast<f32>(clr & 0x1f) / 31.f,
1.f,
});
} }
void GXTexCoord2f32(float u, float v) { void GXTexCoord2f32(f32 s, f32 t) {
check_attr_order(GX_VA_TEX0); sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_F32);
sStreamState->vertexBuffer.append(&u, 4); sStreamState->append(aurora::Vec2{s, t});
sStreamState->vertexBuffer.append(&v, 4); }
void GXTexCoord2u16(u16 s, u16 t) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_U16);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
static_cast<f32>(t) / static_cast<f32>(1 << frac),
});
} }
void GXTexCoord2s16(s16 s, s16 t) { void GXTexCoord2s16(s16 s, s16 t) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_TEX0]; const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_S16);
GXTexCoord2f32( sStreamState->append(aurora::Vec2{
static_cast<float>(s) / static_cast<f32>(1 << attrFmt.frac), static_cast<f32>(s) / static_cast<f32>(1 << frac),
static_cast<float>(t) / static_cast<f32>(1 << attrFmt.frac) static_cast<f32>(t) / static_cast<f32>(1 << frac),
); });
} }
void GXTexCoord1x16(u16 idx) { void GXTexCoord2u8(u8 s, u8 t) {
check_attr_order(GX_VA_TEX0); const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_U8);
// keep aligned sStreamState->append(aurora::Vec2{
if (sStreamState->vertexBuffer.size() % 4 != 0) { static_cast<f32>(s) / static_cast<f32>(1 << frac),
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4)); static_cast<f32>(t) / static_cast<f32>(1 << frac),
} });
sStreamState->vertexBuffer.append(&idx, 2);
} }
void GXTexCoord1x8(u8 idx) { void GXTexCoord2s8(s8 s, s8 t) {
GXTexCoord1x16(idx); const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_S8);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
static_cast<f32>(t) / static_cast<f32>(1 << frac),
});
}
void GXTexCoord1f32(f32 s) {
sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_F32);
sStreamState->append(aurora::Vec2{s, 0.f});
}
void GXTexCoord1u16(u16 s) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_U16);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXTexCoord1s16(s16 s) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_S16);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXTexCoord1u8(u8 s) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_U8);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXTexCoord1s8(s8 s) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_S8);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXTexCoord1x16(u16 index) {
sStreamState->check_indexed(GX_VA_TEX0, GX_INDEX16);
sStreamState->append(index);
}
void GXTexCoord1x8(u8 index) {
sStreamState->check_indexed(GX_VA_TEX0, GX_INDEX8);
sStreamState->append(static_cast<u16>(index));
} }
void GXEnd() { void GXEnd() {
@ -282,27 +439,55 @@ void GXEnd() {
} }
const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size()); const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size());
const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices}); const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices});
if (g_gxState.stateDirty) {
aurora::gfx::stream::PipelineConfig config{}; aurora::gfx::gx::BindGroupRanges ranges{};
populate_pipeline_config(config, GX_TRIANGLES); for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
const auto info = build_shader_info(config.shaderConfig); if (g_gxState.vtxDesc[i] != GX_INDEX8 && g_gxState.vtxDesc[i] != GX_INDEX16) {
const auto pipeline = aurora::gfx::pipeline_ref(config); continue;
aurora::gfx::push_draw_command(aurora::gfx::stream::DrawData{ }
.pipeline = pipeline, auto& array = g_gxState.arrays[i];
.vertRange = vertRange, if (array.cachedRange.size > 0) {
.uniformRange = build_uniform(info), // Use the currently cached range
.indexRange = indexRange, ranges.vaRanges[i] = array.cachedRange;
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()), } else {
.bindGroups = build_bind_groups(info, config.shaderConfig, {}), // Push array data to storage and cache range
.dstAlpha = g_gxState.dstAlpha, const auto range = aurora::gfx::push_storage(static_cast<const uint8_t*>(array.data), array.size);
}); ranges.vaRanges[i] = range;
} else { array.cachedRange = range;
aurora::gfx::merge_draw_command(aurora::gfx::stream::DrawData{ }
.vertRange = vertRange,
.indexRange = indexRange,
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
});
} }
// if (g_gxState.stateDirty) {
aurora::gfx::model::PipelineConfig config{};
GXPrimitive primitive = GX_TRIANGLES;
switch (sStreamState->primitive) {
case GX_TRIANGLESTRIP:
primitive = GX_TRIANGLESTRIP;
break;
default:
break;
}
populate_pipeline_config(config, primitive, sStreamState->vtxFmt);
const auto info = build_shader_info(config.shaderConfig);
const auto bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, ranges);
const auto pipeline = aurora::gfx::pipeline_ref(config);
aurora::gfx::push_draw_command(aurora::gfx::model::DrawData{
.pipeline = pipeline,
.vertRange = vertRange,
.idxRange = indexRange,
.dataRanges = ranges,
.uniformRange = build_uniform(info),
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
.bindGroups = bindGroups,
.dstAlpha = g_gxState.dstAlpha,
});
// } else {
// aurora::gfx::merge_draw_command(aurora::gfx::model::DrawData{
// .vertRange = vertRange,
// .idxRange = indexRange,
// .indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
// });
// }
lastVertexStart = sStreamState->vertexStart + sStreamState->vertexCount; lastVertexStart = sStreamState->vertexStart + sStreamState->vertexCount;
sStreamState.reset(); sStreamState.reset();
} }

View File

@ -3,7 +3,6 @@
#include "../internal.hpp" #include "../internal.hpp"
#include "../webgpu/gpu.hpp" #include "../webgpu/gpu.hpp"
#include "model/shader.hpp" #include "model/shader.hpp"
#include "stream/shader.hpp"
#include "texture.hpp" #include "texture.hpp"
#include <condition_variable> #include <condition_variable>
@ -11,7 +10,6 @@
#include <fstream> #include <fstream>
#include <mutex> #include <mutex>
#include <thread> #include <thread>
#include <variant>
#include <absl/container/flat_hash_map.h> #include <absl/container/flat_hash_map.h>
#include <magic_enum.hpp> #include <magic_enum.hpp>
@ -37,13 +35,11 @@ constexpr uint64_t StagingBufferSize =
UniformBufferSize + VertexBufferSize + IndexBufferSize + StorageBufferSize + TextureUploadSize; UniformBufferSize + VertexBufferSize + IndexBufferSize + StorageBufferSize + TextureUploadSize;
struct ShaderState { struct ShaderState {
stream::State stream;
model::State model; model::State model;
}; };
struct ShaderDrawCommand { struct ShaderDrawCommand {
ShaderType type; ShaderType type;
union { union {
stream::DrawData stream;
model::DrawData model; model::DrawData model;
}; };
}; };
@ -168,10 +164,9 @@ static u32 g_serializedPipelineCount = 0;
template <typename PipelineConfig> template <typename PipelineConfig>
static void serialize_pipeline_config(ShaderType type, const PipelineConfig& config) { static void serialize_pipeline_config(ShaderType type, const PipelineConfig& config) {
static_assert(std::has_unique_object_representations_v<PipelineConfig>); static_assert(std::has_unique_object_representations_v<PipelineConfig>);
g_serializedPipelines.append(&type, sizeof(type)); g_serializedPipelines.append(type);
const u32 configSize = sizeof(config); g_serializedPipelines.append<u32>(sizeof(config));
g_serializedPipelines.append(&configSize, sizeof(configSize)); g_serializedPipelines.append(config);
g_serializedPipelines.append(&config, configSize);
++g_serializedPipelineCount; ++g_serializedPipelineCount;
} }
@ -278,33 +273,19 @@ void resolve_pass(TextureHandle texture, ClipRect rect, bool clear, Vec4<float>
++g_currentRenderPass; ++g_currentRenderPass;
} }
template <> // template <>
const stream::State& get_state() { // void merge_draw_command(stream::DrawData data) {
return g_state.stream; // auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream;
} // CHECK(last.vertRange.offset + last.vertRange.size == data.vertRange.offset, "Invalid vertex merge range: {} -> {}",
// last.vertRange.offset + last.vertRange.size, data.vertRange.offset);
template <> // CHECK(last.indexRange.offset + last.indexRange.size == data.indexRange.offset, "Invalid index merge range: {} ->
void push_draw_command(stream::DrawData data) { // {}",
push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data}); // last.indexRange.offset + last.indexRange.size, data.indexRange.offset);
} // last.vertRange.size += data.vertRange.size;
// last.indexRange.size += data.indexRange.size;
template <> // last.indexCount += data.indexCount;
void merge_draw_command(stream::DrawData data) { // ++g_mergedDrawCallCount;
auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream; // }
CHECK(last.vertRange.offset + last.vertRange.size == data.vertRange.offset, "Invalid vertex merge range: {} -> {}",
last.vertRange.offset + last.vertRange.size, data.vertRange.offset);
CHECK(last.indexRange.offset + last.indexRange.size == data.indexRange.offset, "Invalid index merge range: {} -> {}",
last.indexRange.offset + last.indexRange.size, data.indexRange.offset);
last.vertRange.size += data.vertRange.size;
last.indexRange.size += data.indexRange.size;
last.indexCount += data.indexCount;
++g_mergedDrawCallCount;
}
template <>
PipelineRef pipeline_ref(stream::PipelineConfig config) {
return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); });
}
template <> template <>
void push_draw_command(model::DrawData data) { void push_draw_command(model::DrawData data) {
@ -378,16 +359,6 @@ void load_pipeline_cache() {
u32 size = *reinterpret_cast<const u32*>(pipelineCache.data() + offset); u32 size = *reinterpret_cast<const u32*>(pipelineCache.data() + offset);
offset += sizeof(u32); offset += sizeof(u32);
switch (type) { switch (type) {
case ShaderType::Stream: {
if (size != sizeof(stream::PipelineConfig)) {
break;
}
const auto config = *reinterpret_cast<const stream::PipelineConfig*>(pipelineCache.data() + offset);
if (config.version != gx::GXPipelineConfigVersion) {
break;
}
find_pipeline(type, config, [=]() { return stream::create_pipeline(g_state.stream, config); }, true);
} break;
case ShaderType::Model: { case ShaderType::Model: {
if (size != sizeof(model::PipelineConfig)) { if (size != sizeof(model::PipelineConfig)) {
break; break;
@ -397,9 +368,10 @@ void load_pipeline_cache() {
break; break;
} }
find_pipeline(type, config, [=]() { return model::create_pipeline(g_state.model, config); }, true); find_pipeline(type, config, [=]() { return model::create_pipeline(g_state.model, config); }, true);
} break; break;
}
default: default:
Log.warn("Unknown pipeline type {}", static_cast<int>(type)); Log.warn("Unknown pipeline type {}", underlying(type));
break; break;
} }
offset += size; offset += size;
@ -459,7 +431,6 @@ void initialize() {
} }
map_staging_buffer(); map_staging_buffer();
g_state.stream = stream::construct_state();
g_state.model = model::construct_state(); g_state.model = model::construct_state();
load_pipeline_cache(); load_pipeline_cache();
@ -581,6 +552,9 @@ void end_frame(const wgpu::CommandEncoder& cmd) {
currentStagingBuffer = (currentStagingBuffer + 1) % g_stagingBuffers.size(); currentStagingBuffer = (currentStagingBuffer + 1) % g_stagingBuffers.size();
map_staging_buffer(); map_staging_buffer();
g_currentRenderPass = UINT32_MAX; g_currentRenderPass = UINT32_MAX;
for (auto& array : gx::g_gxState.arrays) {
array.cachedRange = {};
}
if (!g_hasPipelineThread) { if (!g_hasPipelineThread) {
pipeline_worker(); pipeline_worker();
@ -612,7 +586,7 @@ void render(wgpu::CommandEncoder& cmd) {
.view = webgpu::g_depthBuffer.view, .view = webgpu::g_depthBuffer.view,
.depthLoadOp = passInfo.clear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load, .depthLoadOp = passInfo.clear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load,
.depthStoreOp = wgpu::StoreOp::Store, .depthStoreOp = wgpu::StoreOp::Store,
.depthClearValue = 1.f, .depthClearValue = gx::UseReversedZ ? 0.f : 1.f,
}; };
const auto label = fmt::format("Render pass {}", i); const auto label = fmt::format("Render pass {}", i);
const wgpu::RenderPassDescriptor renderPassDescriptor{ const wgpu::RenderPassDescriptor renderPassDescriptor{
@ -680,7 +654,9 @@ void render_pass(const wgpu::RenderPassEncoder& pass, u32 idx) {
switch (cmd.type) { switch (cmd.type) {
case CommandType::SetViewport: { case CommandType::SetViewport: {
const auto& vp = cmd.data.setViewport; const auto& vp = cmd.data.setViewport;
pass.SetViewport(vp.left, vp.top, vp.width, vp.height, vp.znear, vp.zfar); const float minDepth = gx::UseReversedZ ? 1.f - vp.zfar : vp.znear;
const float maxDepth = gx::UseReversedZ ? 1.f - vp.znear : vp.zfar;
pass.SetViewport(vp.left, vp.top, vp.width, vp.height, minDepth, maxDepth);
} break; } break;
case CommandType::SetScissor: { case CommandType::SetScissor: {
const auto& sc = cmd.data.setScissor; const auto& sc = cmd.data.setScissor;
@ -694,9 +670,6 @@ void render_pass(const wgpu::RenderPassEncoder& pass, u32 idx) {
case CommandType::Draw: { case CommandType::Draw: {
const auto& draw = cmd.data.draw; const auto& draw = cmd.data.draw;
switch (draw.type) { switch (draw.type) {
case ShaderType::Stream:
stream::render(g_state.stream, draw.stream, pass);
break;
case ShaderType::Model: case ShaderType::Model:
model::render(g_state.model, draw.model, pass); model::render(g_state.model, draw.model, pass);
break; break;

View File

@ -56,8 +56,7 @@ public:
ByteBuffer() noexcept = default; ByteBuffer() noexcept = default;
explicit ByteBuffer(size_t size) noexcept explicit ByteBuffer(size_t size) noexcept
: m_data(static_cast<uint8_t*>(calloc(1, size))), m_length(size), m_capacity(size) {} : m_data(static_cast<uint8_t*>(calloc(1, size))), m_length(size), m_capacity(size) {}
explicit ByteBuffer(uint8_t* data, size_t size) noexcept explicit ByteBuffer(uint8_t* data, size_t size) noexcept : m_data(data), m_capacity(size), m_owned(false) {}
: m_data(data), m_capacity(size), m_owned(false) {}
~ByteBuffer() noexcept { ~ByteBuffer() noexcept {
if (m_data != nullptr && m_owned) { if (m_data != nullptr && m_owned) {
free(m_data); free(m_data);
@ -98,6 +97,11 @@ public:
m_length += size; m_length += size;
} }
template <typename T>
void append(const T& obj) {
append(&obj, sizeof(T));
}
void append_zeroes(size_t size) { void append_zeroes(size_t size) {
resize(m_length + size, true); resize(m_length + size, true);
m_length += size; m_length += size;
@ -179,8 +183,7 @@ struct TextureRef;
using TextureHandle = std::shared_ptr<TextureRef>; using TextureHandle = std::shared_ptr<TextureRef>;
enum class ShaderType : uint8_t { enum class ShaderType : uint8_t {
Stream, Model = 1,
Model,
}; };
void initialize(); void initialize();

View File

@ -7,7 +7,6 @@
#include <absl/container/flat_hash_map.h> #include <absl/container/flat_hash_map.h>
#include <cfloat> #include <cfloat>
#include <cmath>
using aurora::gfx::gx::g_gxState; using aurora::gfx::gx::g_gxState;
static aurora::Module Log("aurora::gx"); static aurora::Module Log("aurora::gx");
@ -25,7 +24,7 @@ const TextureBind& get_texture(GXTexMapID id) noexcept { return g_gxState.textur
static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) { static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) {
switch (fac) { switch (fac) {
DEFAULT_FATAL("invalid blend factor {}", static_cast<int>(fac)); DEFAULT_FATAL("invalid blend factor {}", underlying(fac));
case GX_BL_ZERO: case GX_BL_ZERO:
return wgpu::BlendFactor::Zero; return wgpu::BlendFactor::Zero;
case GX_BL_ONE: case GX_BL_ONE:
@ -55,21 +54,21 @@ static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) {
static inline wgpu::CompareFunction to_compare_function(GXCompare func) { static inline wgpu::CompareFunction to_compare_function(GXCompare func) {
switch (func) { switch (func) {
DEFAULT_FATAL("invalid depth fn {}", static_cast<int>(func)); DEFAULT_FATAL("invalid depth fn {}", underlying(func));
case GX_NEVER: case GX_NEVER:
return wgpu::CompareFunction::Never; return wgpu::CompareFunction::Never;
case GX_LESS: case GX_LESS:
return wgpu::CompareFunction::Less; return UseReversedZ ? wgpu::CompareFunction::Greater : wgpu::CompareFunction::Less;
case GX_EQUAL: case GX_EQUAL:
return wgpu::CompareFunction::Equal; return wgpu::CompareFunction::Equal;
case GX_LEQUAL: case GX_LEQUAL:
return wgpu::CompareFunction::LessEqual; return UseReversedZ ? wgpu::CompareFunction::GreaterEqual : wgpu::CompareFunction::LessEqual;
case GX_GREATER: case GX_GREATER:
return wgpu::CompareFunction::Greater; return UseReversedZ ? wgpu::CompareFunction::Less : wgpu::CompareFunction::Greater;
case GX_NEQUAL: case GX_NEQUAL:
return wgpu::CompareFunction::NotEqual; return wgpu::CompareFunction::NotEqual;
case GX_GEQUAL: case GX_GEQUAL:
return wgpu::CompareFunction::GreaterEqual; return UseReversedZ ? wgpu::CompareFunction::LessEqual : wgpu::CompareFunction::GreaterEqual;
case GX_ALWAYS: case GX_ALWAYS:
return wgpu::CompareFunction::Always; return wgpu::CompareFunction::Always;
} }
@ -79,7 +78,7 @@ static inline wgpu::BlendState to_blend_state(GXBlendMode mode, GXBlendFactor sr
GXLogicOp op, u32 dstAlpha) { GXLogicOp op, u32 dstAlpha) {
wgpu::BlendComponent colorBlendComponent; wgpu::BlendComponent colorBlendComponent;
switch (mode) { switch (mode) {
DEFAULT_FATAL("unsupported blend mode {}", static_cast<int>(mode)); DEFAULT_FATAL("unsupported blend mode {}", underlying(mode));
case GX_BM_NONE: case GX_BM_NONE:
colorBlendComponent = { colorBlendComponent = {
.operation = wgpu::BlendOperation::Add, .operation = wgpu::BlendOperation::Add,
@ -103,7 +102,7 @@ static inline wgpu::BlendState to_blend_state(GXBlendMode mode, GXBlendFactor sr
break; break;
case GX_BM_LOGIC: case GX_BM_LOGIC:
switch (op) { switch (op) {
DEFAULT_FATAL("unsupported logic op {}", static_cast<int>(op)); DEFAULT_FATAL("unsupported logic op {}", underlying(op));
case GX_LO_CLEAR: case GX_LO_CLEAR:
colorBlendComponent = { colorBlendComponent = {
.operation = wgpu::BlendOperation::Add, .operation = wgpu::BlendOperation::Add,
@ -160,7 +159,7 @@ static inline wgpu::ColorWriteMask to_write_mask(bool colorUpdate, bool alphaUpd
static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCullMode gx_cullMode) { static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCullMode gx_cullMode) {
wgpu::PrimitiveTopology primitive = wgpu::PrimitiveTopology::TriangleList; wgpu::PrimitiveTopology primitive = wgpu::PrimitiveTopology::TriangleList;
switch (gx_prim) { switch (gx_prim) {
DEFAULT_FATAL("unsupported primitive type {}", static_cast<int>(gx_prim)); DEFAULT_FATAL("unsupported primitive type {}", underlying(gx_prim));
case GX_TRIANGLES: case GX_TRIANGLES:
break; break;
case GX_TRIANGLESTRIP: case GX_TRIANGLESTRIP:
@ -169,7 +168,7 @@ static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCul
} }
wgpu::CullMode cullMode = wgpu::CullMode::None; wgpu::CullMode cullMode = wgpu::CullMode::None;
switch (gx_cullMode) { switch (gx_cullMode) {
DEFAULT_FATAL("unsupported cull mode {}", static_cast<int>(gx_cullMode)); DEFAULT_FATAL("unsupported cull mode {}", underlying(gx_cullMode));
case GX_CULL_FRONT: case GX_CULL_FRONT:
cullMode = wgpu::CullMode::Front; cullMode = wgpu::CullMode::Front;
break; break;
@ -193,14 +192,6 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
.format = g_graphicsConfig.depthFormat, .format = g_graphicsConfig.depthFormat,
.depthWriteEnabled = config.depthUpdate, .depthWriteEnabled = config.depthUpdate,
.depthCompare = to_compare_function(config.depthFunc), .depthCompare = to_compare_function(config.depthFunc),
.stencilFront =
wgpu::StencilFaceState{
.compare = wgpu::CompareFunction::Always,
},
.stencilBack =
wgpu::StencilFaceState{
.compare = wgpu::CompareFunction::Always,
},
}; };
const auto blendState = const auto blendState =
to_blend_state(config.blendMode, config.blendFacSrc, config.blendFacDst, config.blendOp, config.dstAlpha); to_blend_state(config.blendMode, config.blendFacSrc, config.blendFacDst, config.blendOp, config.dstAlpha);
@ -249,25 +240,23 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
return g_device.CreateRenderPipeline(&descriptor); return g_device.CreateRenderPipeline(&descriptor);
} }
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noexcept { void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept {
const auto& vtxFmt = g_gxState.vtxFmts[fmt];
config.shaderConfig.fogType = g_gxState.fog.type; config.shaderConfig.fogType = g_gxState.fog.type;
config.shaderConfig.vtxAttrs = g_gxState.vtxDesc; config.shaderConfig.vtxAttrs = g_gxState.vtxDesc;
int lastIndexedAttr = -1;
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) { for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
const auto type = g_gxState.vtxDesc[i]; const auto type = g_gxState.vtxDesc[i];
if (type != GX_INDEX8 && type != GX_INDEX16) { if (type != GX_INDEX8 && type != GX_INDEX16) {
config.shaderConfig.attrMapping[i] = GX_VA_NULL; config.shaderConfig.attrMapping[i] = {};
continue; continue;
} }
const auto& array = g_gxState.arrays[i]; // Map attribute to its own storage
if (lastIndexedAttr >= 0 && array == g_gxState.arrays[lastIndexedAttr]) { config.shaderConfig.attrMapping[i] = StorageConfig {
// Map attribute to previous attribute .attr = static_cast<GXAttr>(i),
config.shaderConfig.attrMapping[i] = config.shaderConfig.attrMapping[lastIndexedAttr]; .cnt = vtxFmt.attrs[i].cnt,
} else { .compType = vtxFmt.attrs[i].type,
// Map attribute to its own storage .frac = vtxFmt.attrs[i].frac,
config.shaderConfig.attrMapping[i] = static_cast<GXAttr>(i); };
}
lastIndexedAttr = i;
} }
config.shaderConfig.tevSwapTable = g_gxState.tevSwapTable; config.shaderConfig.tevSwapTable = g_gxState.tevSwapTable;
for (u8 i = 0; i < g_gxState.numTevStages; ++i) { for (u8 i = 0; i < g_gxState.numTevStages; ++i) {
@ -328,14 +317,14 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noe
Range build_uniform(const ShaderInfo& info) noexcept { Range build_uniform(const ShaderInfo& info) noexcept {
auto [buf, range] = map_uniform(info.uniformSize); auto [buf, range] = map_uniform(info.uniformSize);
{ {
buf.append(&g_gxState.pnMtx[g_gxState.currentPnMtx], 128); buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]);
buf.append(&g_gxState.proj, 64); buf.append(g_gxState.proj);
} }
for (int i = 0; i < info.loadsTevReg.size(); ++i) { for (int i = 0; i < info.loadsTevReg.size(); ++i) {
if (!info.loadsTevReg.test(i)) { if (!info.loadsTevReg.test(i)) {
continue; continue;
} }
buf.append(&g_gxState.colorRegs[i], 16); buf.append(g_gxState.colorRegs[i]);
} }
bool lightingEnabled = false; bool lightingEnabled = false;
for (int i = 0; i < info.sampledColorChannels.size(); ++i) { for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
@ -352,11 +341,10 @@ Range build_uniform(const ShaderInfo& info) noexcept {
if (lightingEnabled) { if (lightingEnabled) {
// Lights // Lights
static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights); static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights);
buf.append(&g_gxState.lights, 80 * GX::MaxLights); buf.append(g_gxState.lights);
// Light state for all channels // Light state for all channels
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
u32 lightState = g_gxState.colorChannelState[i].lightMask.to_ulong(); buf.append<u32>(g_gxState.colorChannelState[i].lightMask.to_ulong());
buf.append(&lightState, 4);
} }
} }
for (int i = 0; i < info.sampledColorChannels.size(); ++i) { for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
@ -366,25 +354,25 @@ Range build_uniform(const ShaderInfo& info) noexcept {
const auto& ccc = g_gxState.colorChannelConfig[i * 2]; const auto& ccc = g_gxState.colorChannelConfig[i * 2];
const auto& ccs = g_gxState.colorChannelState[i * 2]; const auto& ccs = g_gxState.colorChannelState[i * 2];
if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) { if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) {
buf.append(&ccs.ambColor, 16); buf.append(ccs.ambColor);
} }
if (ccc.matSrc == GX_SRC_REG) { if (ccc.matSrc == GX_SRC_REG) {
buf.append(&ccs.matColor, 16); buf.append(ccs.matColor);
} }
const auto& ccca = g_gxState.colorChannelConfig[i * 2 + 1]; const auto& ccca = g_gxState.colorChannelConfig[i * 2 + 1];
const auto& ccsa = g_gxState.colorChannelState[i * 2 + 1]; const auto& ccsa = g_gxState.colorChannelState[i * 2 + 1];
if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) { if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) {
buf.append(&ccsa.ambColor, 16); buf.append(ccsa.ambColor);
} }
if (ccca.matSrc == GX_SRC_REG) { if (ccca.matSrc == GX_SRC_REG) {
buf.append(&ccsa.matColor, 16); buf.append(ccsa.matColor);
} }
} }
for (int i = 0; i < info.sampledKColors.size(); ++i) { for (int i = 0; i < info.sampledKColors.size(); ++i) {
if (!info.sampledKColors.test(i)) { if (!info.sampledKColors.test(i)) {
continue; continue;
} }
buf.append(&g_gxState.kcolors[i], 16); buf.append(g_gxState.kcolors[i]);
} }
for (int i = 0; i < info.usesTexMtx.size(); ++i) { for (int i = 0; i < info.usesTexMtx.size(); ++i) {
if (!info.usesTexMtx.test(i)) { if (!info.usesTexMtx.test(i)) {
@ -392,26 +380,16 @@ Range build_uniform(const ShaderInfo& info) noexcept {
} }
const auto& state = g_gxState; const auto& state = g_gxState;
switch (info.texMtxTypes[i]) { switch (info.texMtxTypes[i]) {
DEFAULT_FATAL("unhandled tex mtx type {}", static_cast<int>(info.texMtxTypes[i])); DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i]));
case GX_TG_MTX2x4: case GX_TG_MTX2x4:
if (std::holds_alternative<Mat4x2<float>>(state.texMtxs[i])) { if (std::holds_alternative<Mat2x4<float>>(state.texMtxs[i])) {
buf.append(&std::get<Mat4x2<float>>(state.texMtxs[i]), 32); buf.append(std::get<Mat2x4<float>>(state.texMtxs[i]));
} else if (std::holds_alternative<Mat4x4<float>>(g_gxState.texMtxs[i])) {
// TODO: SMB hits this?
Mat4x2<float> mtx{
{1.f, 0.f},
{0.f, 1.f},
{0.f, 0.f},
{0.f, 0.f},
};
buf.append(&mtx, 32);
} else } else
UNLIKELY FATAL("expected 2x4 mtx in idx {}", i); UNLIKELY FATAL("expected 2x4 mtx in idx {}", i);
break; break;
case GX_TG_MTX3x4: case GX_TG_MTX3x4:
if (std::holds_alternative<Mat4x4<float>>(g_gxState.texMtxs[i])) { if (std::holds_alternative<Mat3x4<float>>(g_gxState.texMtxs[i])) {
const auto& mat = std::get<Mat4x4<float>>(g_gxState.texMtxs[i]); buf.append(std::get<Mat3x4<float>>(g_gxState.texMtxs[i]));
buf.append(&mat, 64);
} else } else
UNLIKELY FATAL("expected 3x4 mtx in idx {}", i); UNLIKELY FATAL("expected 3x4 mtx in idx {}", i);
break; break;
@ -421,18 +399,11 @@ Range build_uniform(const ShaderInfo& info) noexcept {
if (!info.usesPTTexMtx.test(i)) { if (!info.usesPTTexMtx.test(i)) {
continue; continue;
} }
buf.append(&g_gxState.ptTexMtxs[i], 64); buf.append(g_gxState.ptTexMtxs[i]);
} }
if (info.usesFog) { if (info.usesFog) {
const auto& state = g_gxState.fog; const auto& state = g_gxState.fog;
struct Fog { Fog fog{.color = state.color};
Vec4<float> color = state.color;
float a = 0.f;
float b = 0.5f;
float c = 0.f;
float pad = FLT_MAX;
} fog{};
static_assert(sizeof(Fog) == 32);
if (state.nearZ != state.farZ && state.startZ != state.endZ) { if (state.nearZ != state.farZ && state.startZ != state.endZ) {
const float depthRange = state.farZ - state.nearZ; const float depthRange = state.farZ - state.nearZ;
const float fogRange = state.endZ - state.startZ; const float fogRange = state.endZ - state.startZ;
@ -440,7 +411,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
fog.b = state.farZ / depthRange; fog.b = state.farZ / depthRange;
fog.c = state.startZ / fogRange; fog.c = state.startZ / fogRange;
} }
buf.append(&fog, 32); buf.append(fog);
} }
for (int i = 0; i < info.sampledTextures.size(); ++i) { for (int i = 0; i < info.sampledTextures.size(); ++i) {
if (!info.sampledTextures.test(i)) { if (!info.sampledTextures.test(i)) {
@ -448,7 +419,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
} }
const auto& tex = get_texture(static_cast<GXTexMapID>(i)); const auto& tex = get_texture(static_cast<GXTexMapID>(i));
CHECK(tex, "unbound texture {}", i); CHECK(tex, "unbound texture {}", i);
buf.append(&tex.texObj.lodBias, 4); buf.append(tex.texObj.lodBias);
} }
g_gxState.stateDirty = false; g_gxState.stateDirty = false;
return range; return range;
@ -564,7 +535,7 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader
}; };
u32 bindIdx = 1; u32 bindIdx = 1;
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) { for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
if (config.attrMapping[i] == static_cast<GXAttr>(i)) { if (config.attrMapping[i].attr == static_cast<GXAttr>(i)) {
uniformLayoutEntries[bindIdx] = wgpu::BindGroupLayoutEntry{ uniformLayoutEntries[bindIdx] = wgpu::BindGroupLayoutEntry{
.binding = bindIdx, .binding = bindIdx,
.visibility = wgpu::ShaderStage::Vertex, .visibility = wgpu::ShaderStage::Vertex,
@ -688,7 +659,7 @@ void shutdown() noexcept {
static wgpu::AddressMode wgpu_address_mode(GXTexWrapMode mode) { static wgpu::AddressMode wgpu_address_mode(GXTexWrapMode mode) {
switch (mode) { switch (mode) {
DEFAULT_FATAL("invalid wrap mode {}", static_cast<int>(mode)); DEFAULT_FATAL("invalid wrap mode {}", underlying(mode));
case GX_CLAMP: case GX_CLAMP:
return wgpu::AddressMode::ClampToEdge; return wgpu::AddressMode::ClampToEdge;
case GX_REPEAT: case GX_REPEAT:
@ -735,8 +706,6 @@ wgpu::SamplerDescriptor TextureBind::get_descriptor() const noexcept {
.magFilter = wgpu::FilterMode::Nearest, .magFilter = wgpu::FilterMode::Nearest,
.minFilter = wgpu::FilterMode::Nearest, .minFilter = wgpu::FilterMode::Nearest,
.mipmapFilter = wgpu::MipmapFilterMode::Nearest, .mipmapFilter = wgpu::MipmapFilterMode::Nearest,
.lodMinClamp = 0.f,
.lodMaxClamp = 1000.f,
.maxAnisotropy = 1, .maxAnisotropy = 1,
}; };
} }
@ -750,8 +719,6 @@ wgpu::SamplerDescriptor TextureBind::get_descriptor() const noexcept {
.magFilter = magFilter, .magFilter = magFilter,
.minFilter = minFilter, .minFilter = minFilter,
.mipmapFilter = mipFilter, .mipmapFilter = mipFilter,
.lodMinClamp = 0.f,
.lodMaxClamp = 1000.f,
.maxAnisotropy = wgpu_aniso(texObj.maxAniso), .maxAnisotropy = wgpu_aniso(texObj.maxAniso),
}; };
} }

View File

@ -46,6 +46,11 @@ constexpr float GX_LARGE_NUMBER = -1048576.0f;
#endif #endif
namespace aurora::gfx::gx { namespace aurora::gfx::gx {
constexpr bool EnableNormalVisualization = false;
constexpr bool EnableDebugPrints = false;
constexpr bool UsePerPixelLighting = true;
constexpr bool UseReversedZ = true;
constexpr u32 MaxTextures = GX_MAX_TEXMAP; constexpr u32 MaxTextures = GX_MAX_TEXMAP;
constexpr u32 MaxTluts = 20; constexpr u32 MaxTluts = 20;
constexpr u32 MaxTevStages = GX_MAX_TEVSTAGE; constexpr u32 MaxTevStages = GX_MAX_TEVSTAGE;
@ -144,8 +149,7 @@ struct ColorChannelState {
Vec4<float> ambColor; Vec4<float> ambColor;
GX::LightMask lightMask; GX::LightMask lightMask;
}; };
// Mat4x4 used instead of Mat4x3 for padding purposes using TexMtxVariant = std::variant<std::monostate, Mat2x4<float>, Mat3x4<float>>;
using TexMtxVariant = std::variant<std::monostate, Mat4x2<float>, Mat4x4<float>>;
struct TcgConfig { struct TcgConfig {
GXTexGenType type = GX_TG_MTX2x4; GXTexGenType type = GX_TG_MTX2x4;
GXTexGenSrc src = GX_MAX_TEXGENSRC; GXTexGenSrc src = GX_MAX_TEXGENSRC;
@ -213,10 +217,10 @@ struct VtxFmt {
std::array<VtxAttrFmt, MaxVtxAttr> attrs; std::array<VtxAttrFmt, MaxVtxAttr> attrs;
}; };
struct PnMtx { struct PnMtx {
Mat4x4<float> pos; Mat3x4<float> pos;
Mat4x4<float> nrm; Mat3x4<float> nrm;
}; };
static_assert(sizeof(PnMtx) == sizeof(Mat4x4<float>) * 2); static_assert(sizeof(PnMtx) == sizeof(Mat3x4<float>) * 2);
struct Light { struct Light {
Vec4<float> pos{0.f, 0.f, 0.f}; Vec4<float> pos{0.f, 0.f, 0.f};
Vec4<float> dir{0.f, 0.f, 0.f}; Vec4<float> dir{0.f, 0.f, 0.f};
@ -230,6 +234,14 @@ struct Light {
bool operator!=(const Light& rhs) const { return !(*this == rhs); } bool operator!=(const Light& rhs) const { return !(*this == rhs); }
}; };
static_assert(sizeof(Light) == 80); static_assert(sizeof(Light) == 80);
struct Fog {
Vec4<float> color;
float a = 0.f;
float b = 0.5f;
float c = 0.f;
float pad = FLT_MAX;
};
static_assert(sizeof(Fog) == 32);
struct AttrArray { struct AttrArray {
const void* data; const void* data;
u32 size; u32 size;
@ -245,7 +257,6 @@ struct GXState {
std::array<PnMtx, MaxPnMtx> pnMtx; std::array<PnMtx, MaxPnMtx> pnMtx;
u32 currentPnMtx; u32 currentPnMtx;
Mat4x4<float> proj; Mat4x4<float> proj;
Mat4x4<float> origProj; // for GXGetProjectionv
GXProjectionType projType; // for GXGetProjectionv GXProjectionType projType; // for GXGetProjectionv
FogState fog; FogState fog;
GXCullMode cullMode = GX_CULL_BACK; GXCullMode cullMode = GX_CULL_BACK;
@ -266,7 +277,7 @@ struct GXState {
std::array<TextureBind, MaxTextures> textures; std::array<TextureBind, MaxTextures> textures;
std::array<GXTlutObj_, MaxTluts> tluts; std::array<GXTlutObj_, MaxTluts> tluts;
std::array<TexMtxVariant, MaxTexMtx> texMtxs; std::array<TexMtxVariant, MaxTexMtx> texMtxs;
std::array<Mat4x4<float>, MaxPTTexMtx> ptTexMtxs; std::array<Mat3x4<float>, MaxPTTexMtx> ptTexMtxs;
std::array<TcgConfig, MaxTexCoord> tcgs; std::array<TcgConfig, MaxTexCoord> tcgs;
std::array<GXAttrType, MaxVtxAttr> vtxDesc; std::array<GXAttrType, MaxVtxAttr> vtxDesc;
std::array<VtxFmt, MaxVtxFmt> vtxFmts; std::array<VtxFmt, MaxVtxFmt> vtxFmts;
@ -345,11 +356,18 @@ struct TextureConfig {
bool operator==(const TextureConfig& rhs) const { return memcmp(this, &rhs, sizeof(*this)) == 0; } bool operator==(const TextureConfig& rhs) const { return memcmp(this, &rhs, sizeof(*this)) == 0; }
}; };
static_assert(std::has_unique_object_representations_v<TextureConfig>); static_assert(std::has_unique_object_representations_v<TextureConfig>);
struct StorageConfig {
GXAttr attr = GX_VA_NULL;
GXCompCnt cnt = static_cast<GXCompCnt>(0xFF);
GXCompType compType = static_cast<GXCompType>(0xFF);
u8 frac = 0;
std::array<u8, 3> pad{};
};
struct ShaderConfig { struct ShaderConfig {
GXFogType fogType; GXFogType fogType;
std::array<GXAttrType, MaxVtxAttr> vtxAttrs; std::array<GXAttrType, MaxVtxAttr> vtxAttrs;
// Mapping for indexed attributes -> storage buffer // Mapping for indexed attributes -> storage buffer
std::array<GXAttr, MaxVtxAttr> attrMapping; std::array<StorageConfig, MaxVtxAttr> attrMapping;
std::array<TevSwap, MaxTevSwap> tevSwapTable; std::array<TevSwap, MaxTevSwap> tevSwapTable;
std::array<TevStage, MaxTevStages> tevStages; std::array<TevStage, MaxTevStages> tevStages;
u32 tevStageCount = 0; u32 tevStageCount = 0;
@ -363,7 +381,7 @@ struct ShaderConfig {
}; };
static_assert(std::has_unique_object_representations_v<ShaderConfig>); static_assert(std::has_unique_object_representations_v<ShaderConfig>);
constexpr u32 GXPipelineConfigVersion = 4; constexpr u32 GXPipelineConfigVersion = 5;
struct PipelineConfig { struct PipelineConfig {
u32 version = GXPipelineConfigVersion; u32 version = GXPipelineConfigVersion;
ShaderConfig shaderConfig; ShaderConfig shaderConfig;
@ -405,7 +423,7 @@ struct ShaderInfo {
struct BindGroupRanges { struct BindGroupRanges {
std::array<Range, GX_VA_MAX_ATTR> vaRanges{}; std::array<Range, GX_VA_MAX_ATTR> vaRanges{};
}; };
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noexcept; void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept;
wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info, wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info,
ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader, ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader,
const char* label) noexcept; const char* label) noexcept;

View File

@ -1,3 +1,7 @@
#pragma once
#include "../internal.hpp"
#include <dolphin/gx/GXEnum.h> #include <dolphin/gx/GXEnum.h>
#include <fmt/format.h> #include <fmt/format.h>
#include <string> #include <string>
@ -25,7 +29,7 @@ inline std::string format_as(const GXTevOp& op) {
case GX_TEV_COMP_RGB8_EQ: case GX_TEV_COMP_RGB8_EQ:
return "GX_TEV_COMP_RGB8_EQ"; return "GX_TEV_COMP_RGB8_EQ";
default: default:
return fmt::format("GXTevOp({})", static_cast<int>(op)); return fmt::format("GXTevOp({})", underlying(op));
} }
} }
@ -64,7 +68,7 @@ inline std::string format_as(const GXTevColorArg& arg) {
case GX_CC_ZERO: case GX_CC_ZERO:
return "GX_CC_ZERO"; return "GX_CC_ZERO";
default: default:
return fmt::format("GXTevColorArg({})", static_cast<int>(arg)); return fmt::format("GXTevColorArg({})", underlying(arg));
} }
} }
@ -87,7 +91,7 @@ inline std::string format_as(const GXTevAlphaArg& arg) {
case GX_CA_ZERO: case GX_CA_ZERO:
return "GX_CA_ZERO"; return "GX_CA_ZERO";
default: default:
return fmt::format("GXTevAlphaArg({})", static_cast<int>(arg)); return fmt::format("GXTevAlphaArg({})", underlying(arg));
} }
} }
@ -118,7 +122,7 @@ inline std::string format_as(const GXTexGenSrc& src) {
case GX_TG_TEX7: case GX_TG_TEX7:
return "GX_TG_TEX7"; return "GX_TG_TEX7";
default: default:
return fmt::format("GXTexGenSrc({})", static_cast<int>(src)); return fmt::format("GXTexGenSrc({})", underlying(src));
} }
} }
@ -133,7 +137,7 @@ inline std::string format_as(const GXTexGenType& type) {
case GX_TG_BUMP1: case GX_TG_BUMP1:
return "GX_TG_BUMP1"; return "GX_TG_BUMP1";
default: default:
return fmt::format("GXTexGenType({})", static_cast<int>(type)); return fmt::format("GXTexGenType({})", underlying(type));
} }
} }
@ -146,7 +150,7 @@ inline std::string format_as(const GXTevBias& bias) {
case GX_TB_SUBHALF: case GX_TB_SUBHALF:
return "GX_TB_SUBHALF"; return "GX_TB_SUBHALF";
default: default:
return fmt::format("GXTevBias({})", static_cast<int>(bias)); return fmt::format("GXTevBias({})", underlying(bias));
} }
} }
@ -161,7 +165,7 @@ inline std::string format_as(const GXTevScale& scale) {
case GX_CS_DIVIDE_2: case GX_CS_DIVIDE_2:
return "GX_CS_DIVIDE_2"; return "GX_CS_DIVIDE_2";
default: default:
return fmt::format("GXTevScale({})", static_cast<int>(scale)); return fmt::format("GXTevScale({})", underlying(scale));
} }
} }
@ -176,7 +180,7 @@ inline std::string format_as(const GXTevRegID& reg) {
case GX_TEVREG2: case GX_TEVREG2:
return "GX_TEVREG2"; return "GX_TEVREG2";
default: default:
return fmt::format("GXTevRegID({})", static_cast<int>(reg)); return fmt::format("GXTevRegID({})", underlying(reg));
} }
} }
@ -231,7 +235,7 @@ inline std::string format_as(const GXTevKColorSel& sel) {
case GX_TEV_KCSEL_K3_A: case GX_TEV_KCSEL_K3_A:
return "GX_TEV_KCSEL_K3_A"; return "GX_TEV_KCSEL_K3_A";
default: default:
return fmt::format("GXTevKColorSel({})", static_cast<int>(sel)); return fmt::format("GXTevKColorSel({})", underlying(sel));
} }
} }
@ -286,7 +290,7 @@ inline std::string format_as(const GXTevKAlphaSel& sel) {
case GX_TEV_KASEL_K3_A: case GX_TEV_KASEL_K3_A:
return "GX_TEV_KASEL_K3_A"; return "GX_TEV_KASEL_K3_A";
default: default:
return fmt::format("GXTevKAlphaSel({})", static_cast<int>(sel)); return fmt::format("GXTevKAlphaSel({})", underlying(sel));
} }
} }
@ -313,7 +317,7 @@ inline std::string format_as(const GXTexMapID& id) {
case GX_TEX_DISABLE: case GX_TEX_DISABLE:
return "GX_TEX_DISABLE"; return "GX_TEX_DISABLE";
default: default:
return fmt::format("GXTexMapID({})", static_cast<int>(id)); return fmt::format("GXTexMapID({})", underlying(id));
} }
} }
@ -340,7 +344,7 @@ inline std::string format_as(const GXChannelID& id) {
case GX_COLOR_NULL: case GX_COLOR_NULL:
return "GX_COLOR_NULL"; return "GX_COLOR_NULL";
default: default:
return fmt::format("GXChannelID({})", static_cast<int>(id)); return fmt::format("GXChannelID({})", underlying(id));
} }
} }
@ -351,7 +355,7 @@ inline std::string format_as(const GXColorSrc& src) {
case GX_SRC_VTX: case GX_SRC_VTX:
return "GX_SRC_VTX"; return "GX_SRC_VTX";
default: default:
return fmt::format("GXColorSrc({})", static_cast<int>(src)); return fmt::format("GXColorSrc({})", underlying(src));
} }
} }
@ -380,7 +384,7 @@ inline std::string format_as(const GXTexMtx& mtx) {
case GX_IDENTITY: case GX_IDENTITY:
return "GX_IDENTITY"; return "GX_IDENTITY";
default: default:
return fmt::format("GXTexMtx({})", static_cast<int>(mtx)); return fmt::format("GXTexMtx({})", underlying(mtx));
} }
} }
@ -429,7 +433,7 @@ inline std::string format_as(const GXPTTexMtx& mtx) {
case GX_PTIDENTITY: case GX_PTIDENTITY:
return "GX_PTIDENTITY"; return "GX_PTIDENTITY";
default: default:
return fmt::format("GXPTTexMtx({})", static_cast<int>(mtx)); return fmt::format("GXPTTexMtx({})", underlying(mtx));
} }
} }
@ -452,7 +456,7 @@ inline std::string format_as(const GXCompare& comp) {
case GX_ALWAYS: case GX_ALWAYS:
return "GX_ALWAYS"; return "GX_ALWAYS";
default: default:
return fmt::format("GXCompare({})", static_cast<int>(comp)); return fmt::format("GXCompare({})", underlying(comp));
} }
} }
@ -467,7 +471,7 @@ inline std::string format_as(const GXAlphaOp& op) {
case GX_AOP_XNOR: case GX_AOP_XNOR:
return "GX_AOP_XNOR"; return "GX_AOP_XNOR";
default: default:
return fmt::format("GXAlphaOp({})", static_cast<int>(op)); return fmt::format("GXAlphaOp({})", underlying(op));
} }
} }
@ -496,7 +500,7 @@ inline std::string format_as(const GXFogType& type) {
case GX_FOG_ORTHO_REVEXP2: case GX_FOG_ORTHO_REVEXP2:
return "GX_FOG_ORTHO_REVEXP2"; return "GX_FOG_ORTHO_REVEXP2";
default: default:
return fmt::format("GXFogType({})", static_cast<int>(type)); return fmt::format("GXFogType({})", underlying(type));
} }
} }
@ -521,6 +525,158 @@ inline std::string format_as(const GXTexCoordID& id) {
case GX_TEXCOORD_NULL: case GX_TEXCOORD_NULL:
return "GX_TEXCOORD_NULL"; return "GX_TEXCOORD_NULL";
default: default:
return fmt::format("GXTexCoordID({})", static_cast<int>(id)); return fmt::format("GXTexCoordID({})", underlying(id));
}
}
inline std::string format_as(const GXPrimitive& prim) {
switch (prim) {
case GX_QUADS:
return "GX_QUADS";
case GX_TRIANGLES:
return "GX_TRIANGLES";
case GX_TRIANGLESTRIP:
return "GX_TRIANGLESTRIP";
case GX_TRIANGLEFAN:
return "GX_TRIANGLEFAN";
case GX_LINES:
return "GX_LINES";
case GX_LINESTRIP:
return "GX_LINESTRIP";
case GX_POINTS:
return "GX_POINTS";
default:
return fmt::format("GXPrimitive({})", underlying(prim));
}
}
inline std::string format_as(const GXAttr& attr) {
switch (attr) {
case GX_VA_PNMTXIDX:
return "GX_VA_PNMTXIDX";
case GX_VA_TEX0MTXIDX:
return "GX_VA_TEX0MTXIDX";
case GX_VA_TEX1MTXIDX:
return "GX_VA_TEX1MTXIDX";
case GX_VA_TEX2MTXIDX:
return "GX_VA_TEX2MTXIDX";
case GX_VA_TEX3MTXIDX:
return "GX_VA_TEX3MTXIDX";
case GX_VA_TEX4MTXIDX:
return "GX_VA_TEX4MTXIDX";
case GX_VA_TEX5MTXIDX:
return "GX_VA_TEX5MTXIDX";
case GX_VA_TEX6MTXIDX:
return "GX_VA_TEX6MTXIDX";
case GX_VA_TEX7MTXIDX:
return "GX_VA_TEX7MTXIDX";
case GX_VA_POS:
return "GX_VA_POS";
case GX_VA_NRM:
return "GX_VA_NRM";
case GX_VA_CLR0:
return "GX_VA_CLR0";
case GX_VA_CLR1:
return "GX_VA_CLR1";
case GX_VA_TEX0:
return "GX_VA_TEX0";
case GX_VA_TEX1:
return "GX_VA_TEX1";
case GX_VA_TEX2:
return "GX_VA_TEX2";
case GX_VA_TEX3:
return "GX_VA_TEX3";
case GX_VA_TEX4:
return "GX_VA_TEX4";
case GX_VA_TEX5:
return "GX_VA_TEX5";
case GX_VA_TEX6:
return "GX_VA_TEX6";
case GX_VA_TEX7:
return "GX_VA_TEX7";
case GX_POS_MTX_ARRAY:
return "GX_POS_MTX_ARRAY";
case GX_NRM_MTX_ARRAY:
return "GX_NRM_MTX_ARRAY";
case GX_TEX_MTX_ARRAY:
return "GX_TEX_MTX_ARRAY";
case GX_LIGHT_ARRAY:
return "GX_LIGHT_ARRAY";
case GX_VA_NBT:
return "GX_VA_NBT";
case GX_VA_NULL:
return "GX_VA_NULL";
default:
return fmt::format("GXAttr({})", underlying(attr));
}
}
inline std::string format_as(const GXCompCnt& cnt) {
switch (cnt) {
case GX_POS_XY:
return "GX_POS_XY|GX_NRM_XYZ|GX_CLR_RGB|GX_TEX_S";
case GX_POS_XYZ:
return "GX_POS_XYZ|GX_NRM_NBT|GX_CLR_RGBA|GX_TEX_ST";
case GX_NRM_NBT3:
return "GX_NRM_NBT3";
default:
return fmt::format("GXCompCnt({})", underlying(cnt));
}
}
inline std::string format_as(const GXCompType& type) {
switch (type) {
case GX_U8:
return "GX_U8|GX_RGB565";
case GX_S8:
return "GX_S8|GX_RGB8";
case GX_U16:
return "GX_U16|GX_RGBX8";
case GX_S16:
return "GX_S16|GX_RGBA4";
case GX_F32:
return "GX_F32|GX_RGBA6";
case GX_RGBA8:
return "GX_RGBA8";
default:
return fmt::format("GXCompType({})", underlying(type));
}
}
inline std::string format_as(const GXAttrType& type) {
switch (type) {
case GX_NONE:
return "GX_NONE";
case GX_DIRECT:
return "GX_DIRECT";
case GX_INDEX8:
return "GX_INDEX8";
case GX_INDEX16:
return "GX_INDEX16";
default:
return fmt::format("GXAttrType({})", underlying(type));
}
}
inline std::string format_as(const GXVtxFmt& fmt) {
switch (fmt) {
case GX_VTXFMT0:
return "GX_VTXFMT0";
case GX_VTXFMT1:
return "GX_VTXFMT1";
case GX_VTXFMT2:
return "GX_VTXFMT2";
case GX_VTXFMT3:
return "GX_VTXFMT3";
case GX_VTXFMT4:
return "GX_VTXFMT4";
case GX_VTXFMT5:
return "GX_VTXFMT5";
case GX_VTXFMT6:
return "GX_VTXFMT6";
case GX_VTXFMT7:
return "GX_VTXFMT7";
default:
return fmt::format("GXVtxFmt({})", underlying(fmt));
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,60 +1,29 @@
#include "shader.hpp" #include "shader.hpp"
#include "../../webgpu/gpu.hpp" #include "../../webgpu/gpu.hpp"
#include "../gx_fmt.hpp"
#include <absl/container/flat_hash_map.h> #include <absl/container/flat_hash_map.h>
namespace aurora::gfx::model { namespace aurora::gfx::model {
static Module Log("aurora::gfx::model"); static Module Log("aurora::gfx::model");
template <typename T>
constexpr T bswap16(T val) noexcept {
static_assert(sizeof(T) == sizeof(u16));
union {
u16 u;
T t;
} v{.t = val};
#if __GNUC__
v.u = __builtin_bswap16(v.u);
#elif _WIN32
v.u = _byteswap_ushort(v.u);
#else
v.u = (v.u << 8) | ((v.u >> 8) & 0xFF);
#endif
return v.t;
}
template <typename T>
constexpr T bswap32(T val) noexcept {
static_assert(sizeof(T) == sizeof(u32));
union {
u32 u;
T t;
} v{.t = val};
#if __GNUC__
v.u = __builtin_bswap32(v.u);
#elif _WIN32
v.u = _byteswap_ulong(v.u);
#else
v.u = ((v.u & 0x0000FFFF) << 16) | ((v.u & 0xFFFF0000) >> 16) | ((v.u & 0x00FF00FF) << 8) | ((v.u & 0xFF00FF00) >> 8);
#endif
return v.t;
}
using IndexedAttrs = std::array<bool, GX_VA_MAX_ATTR>; using IndexedAttrs = std::array<bool, GX_VA_MAX_ATTR>;
struct DisplayListCache { struct DisplayListCache {
ByteBuffer vtxBuf; ByteBuffer vtxBuf;
ByteBuffer idxBuf; ByteBuffer idxBuf;
IndexedAttrs indexedAttrs; IndexedAttrs indexedAttrs;
GXVtxFmt fmt;
DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs) DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs, GXVtxFmt fmt)
: vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs) {} : vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs), fmt(fmt) {}
}; };
static absl::flat_hash_map<HashType, DisplayListCache> sCachedDisplayLists; static absl::flat_hash_map<HashType, DisplayListCache> sCachedDisplayLists;
static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount, static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount,
IndexedAttrs& indexedAttrs) { IndexedAttrs& indexedAttrs) {
using aurora::gfx::gx::g_gxState; using gx::g_gxState;
struct { struct {
u8 count; u8 count;
GXCompType type; GXCompType type;
@ -66,14 +35,13 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) { for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr]; const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
switch (g_gxState.vtxDesc[attr]) { switch (g_gxState.vtxDesc[attr]) {
DEFAULT_FATAL("unhandled attribute type {}", static_cast<int>(g_gxState.vtxDesc[attr])); DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]);
case GX_NONE: case GX_NONE:
break; break;
case GX_DIRECT: case GX_DIRECT:
#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3)) #define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3))
switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) { switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) {
DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", static_cast<int>(attr), static_cast<int>(attrFmt.cnt), DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type);
static_cast<int>(attrFmt.type));
case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32): case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32):
case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32): case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32):
attrArrays[attr].count = 3; attrArrays[attr].count = 3;
@ -150,12 +118,10 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
for (u32 v = 0; v < vtxCount; ++v) { for (u32 v = 0; v < vtxCount; ++v) {
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) { for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
if (g_gxState.vtxDesc[attr] == GX_INDEX8) { if (g_gxState.vtxDesc[attr] == GX_INDEX8) {
u16 index = *ptr; buf.append(static_cast<u16>(*ptr));
buf.append(&index, 2);
++ptr; ++ptr;
} else if (g_gxState.vtxDesc[attr] == GX_INDEX16) { } else if (g_gxState.vtxDesc[attr] == GX_INDEX16) {
u16 index = bswap16(*reinterpret_cast<const u16*>(ptr)); buf.append(bswap(*reinterpret_cast<const u16*>(ptr)));
buf.append(&index, 2);
ptr += 2; ptr += 2;
} }
if (g_gxState.vtxDesc[attr] != GX_DIRECT) { if (g_gxState.vtxDesc[attr] != GX_DIRECT) {
@ -182,7 +148,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
break; break;
case GX_U16: case GX_U16:
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
const auto value = bswap16(reinterpret_cast<const u16*>(ptr)[i]); const auto value = bswap(reinterpret_cast<const u16*>(ptr)[i]);
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac); out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
} }
buf.append(out.data(), sizeof(f32) * count); buf.append(out.data(), sizeof(f32) * count);
@ -190,7 +156,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
break; break;
case GX_S16: case GX_S16:
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
const auto value = bswap16(reinterpret_cast<const s16*>(ptr)[i]); const auto value = bswap(reinterpret_cast<const s16*>(ptr)[i]);
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac); out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
} }
buf.append(out.data(), sizeof(f32) * count); buf.append(out.data(), sizeof(f32) * count);
@ -198,7 +164,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
break; break;
case GX_F32: case GX_F32:
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
out[i] = bswap32(reinterpret_cast<const f32*>(ptr)[i]); out[i] = bswap(reinterpret_cast<const f32*>(ptr)[i]);
} }
buf.append(out.data(), sizeof(f32) * count); buf.append(out.data(), sizeof(f32) * count);
ptr += count * sizeof(f32); ptr += count * sizeof(f32);
@ -227,7 +193,7 @@ static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u
buf.reserve_extra(vtxCount * sizeof(u16)); buf.reserve_extra(vtxCount * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) { for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v; const u16 idx = vtxStart + v;
buf.append(&idx, sizeof(u16)); buf.append(idx);
++numIndices; ++numIndices;
} }
} else if (prim == GX_TRIANGLEFAN) { } else if (prim == GX_TRIANGLEFAN) {
@ -235,29 +201,26 @@ static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u
for (u16 v = 0; v < vtxCount; ++v) { for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v; const u16 idx = vtxStart + v;
if (v < 3) { if (v < 3) {
buf.append(&idx, sizeof(u16)); buf.append(idx);
++numIndices; ++numIndices;
continue; continue;
} }
const std::array<u16, 3> idxs{vtxStart, u16(idx - 1), idx}; buf.append(std::array{vtxStart, static_cast<u16>(idx - 1), idx});
buf.append(idxs.data(), sizeof(u16) * 3);
numIndices += 3; numIndices += 3;
} }
} else if (prim == GX_TRIANGLESTRIP) { } else if (prim == GX_TRIANGLESTRIP) {
buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16)); buf.reserve_extra(((static_cast<u32>(vtxCount) - 3) * 3 + 3) * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) { for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v; const u16 idx = vtxStart + v;
if (v < 3) { if (v < 3) {
buf.append(&idx, sizeof(u16)); buf.append(idx);
++numIndices; ++numIndices;
continue; continue;
} }
if ((v & 1) == 0) { if ((v & 1) == 0) {
const std::array<u16, 3> idxs{u16(idx - 2), u16(idx - 1), idx}; buf.append(std::array{static_cast<u16>(idx - 2), static_cast<u16>(idx - 1), idx});
buf.append(idxs.data(), sizeof(u16) * 3);
} else { } else {
const std::array<u16, 3> idxs{u16(idx - 1), u16(idx - 2), idx}; buf.append(std::array{static_cast<u16>(idx - 1), static_cast<u16>(idx - 2), idx});
buf.append(idxs.data(), sizeof(u16) * 3);
} }
numIndices += 3; numIndices += 3;
} }
@ -271,6 +234,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
Range vertRange, idxRange; Range vertRange, idxRange;
u32 numIndices = 0; u32 numIndices = 0;
IndexedAttrs indexedAttrs{}; IndexedAttrs indexedAttrs{};
GXVtxFmt fmt = GX_MAX_VTXFMT;
auto it = sCachedDisplayLists.find(hash); auto it = sCachedDisplayLists.find(hash);
if (it != sCachedDisplayLists.end()) { if (it != sCachedDisplayLists.end()) {
const auto& cache = it->second; const auto& cache = it->second;
@ -278,6 +242,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size()); vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size());
idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size()); idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size());
indexedAttrs = cache.indexedAttrs; indexedAttrs = cache.indexedAttrs;
fmt = cache.fmt;
} else { } else {
const u8* data = dlStart; const u8* data = dlStart;
u32 pos = 0; u32 pos = 0;
@ -302,8 +267,12 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
case GX_DRAW_TRIANGLE_STRIP: case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN: { case GX_DRAW_TRIANGLE_FAN: {
const auto prim = static_cast<GXPrimitive>(opcode); const auto prim = static_cast<GXPrimitive>(opcode);
const auto fmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK); const auto newFmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
u16 vtxCount = bswap16(*reinterpret_cast<const u16*>(data + pos)); if (fmt != GX_MAX_VTXFMT && fmt != newFmt) {
FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt);
}
fmt = newFmt;
u16 vtxCount = bswap(*reinterpret_cast<const u16*>(data + pos));
pos += 2; pos += 2;
pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount, indexedAttrs); pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount, indexedAttrs);
numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount); numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount);
@ -319,22 +288,16 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
} }
vertRange = push_verts(vtxBuf.data(), vtxBuf.size()); vertRange = push_verts(vtxBuf.data(), vtxBuf.size());
idxRange = push_indices(idxBuf.data(), idxBuf.size()); idxRange = push_indices(idxBuf.data(), idxBuf.size());
sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs); sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs, fmt);
} }
gx::BindGroupRanges ranges{}; gx::BindGroupRanges ranges{};
int lastIndexedAttr = -1;
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) { for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
if (!indexedAttrs[i]) { if (!indexedAttrs[i]) {
continue; continue;
} }
auto& array = gx::g_gxState.arrays[i]; auto& array = gx::g_gxState.arrays[i];
if (lastIndexedAttr >= 0 && array == gx::g_gxState.arrays[lastIndexedAttr]) { if (array.cachedRange.size > 0) {
// Reuse range from last attribute in shader
// Don't set the output range, so it remains unbound
const auto range = gx::g_gxState.arrays[lastIndexedAttr].cachedRange;
array.cachedRange = range;
} else if (array.cachedRange.size > 0) {
// Use the currently cached range // Use the currently cached range
ranges.vaRanges[i] = array.cachedRange; ranges.vaRanges[i] = array.cachedRange;
} else { } else {
@ -343,11 +306,10 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
ranges.vaRanges[i] = range; ranges.vaRanges[i] = range;
array.cachedRange = range; array.cachedRange = range;
} }
lastIndexedAttr = i;
} }
model::PipelineConfig config{}; model::PipelineConfig config{};
populate_pipeline_config(config, GX_TRIANGLES); populate_pipeline_config(config, GX_TRIANGLES, fmt);
const auto info = gx::build_shader_info(config.shaderConfig); const auto info = gx::build_shader_info(config.shaderConfig);
const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges); const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges);
const auto pipeline = pipeline_ref(config); const auto pipeline = pipeline_ref(config);
@ -366,7 +328,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
State construct_state() { return {}; } State construct_state() { return {}; }
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config) { wgpu::RenderPipeline create_pipeline(const State& state, const PipelineConfig& config) {
const auto info = build_shader_info(config.shaderConfig); // TODO remove const auto info = build_shader_info(config.shaderConfig); // TODO remove
const auto shader = build_shader(config.shaderConfig, info); const auto shader = build_shader(config.shaderConfig, info);
@ -385,7 +347,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const
// Indexed attributes // Indexed attributes
for (u32 i = 0; i < num4xAttr; ++i) { for (u32 i = 0; i < num4xAttr; ++i) {
vtxAttrs[shaderLocation] = { vtxAttrs[shaderLocation] = {
.format = wgpu::VertexFormat::Sint16x4, .format = wgpu::VertexFormat::Uint16x4,
.offset = offset, .offset = offset,
.shaderLocation = shaderLocation, .shaderLocation = shaderLocation,
}; };
@ -394,7 +356,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const
} }
for (u32 i = 0; i < num2xAttr; ++i) { for (u32 i = 0; i < num2xAttr; ++i) {
vtxAttrs[shaderLocation] = { vtxAttrs[shaderLocation] = {
.format = wgpu::VertexFormat::Sint16x2, .format = wgpu::VertexFormat::Uint16x2,
.offset = offset, .offset = offset,
.shaderLocation = shaderLocation, .shaderLocation = shaderLocation,
}; };

View File

@ -1,82 +0,0 @@
#include "shader.hpp"
#include "../../webgpu/gpu.hpp"
namespace aurora::gfx::stream {
static Module Log("aurora::gfx::stream");
using webgpu::g_device;
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config) {
const auto info = build_shader_info(config.shaderConfig); // TODO remove
const auto shader = build_shader(config.shaderConfig, info);
std::array<wgpu::VertexAttribute, 4> attributes{};
attributes[0] = wgpu::VertexAttribute{
.format = wgpu::VertexFormat::Float32x3,
.offset = 0,
.shaderLocation = 0,
};
uint64_t offset = 12;
uint32_t shaderLocation = 1;
if (config.shaderConfig.vtxAttrs[GX_VA_NRM] == GX_DIRECT) {
attributes[shaderLocation] = wgpu::VertexAttribute{
.format = wgpu::VertexFormat::Float32x3,
.offset = offset,
.shaderLocation = shaderLocation,
};
offset += 12;
shaderLocation++;
}
if (config.shaderConfig.vtxAttrs[GX_VA_CLR0] == GX_DIRECT) {
attributes[shaderLocation] = wgpu::VertexAttribute{
.format = wgpu::VertexFormat::Float32x4,
.offset = offset,
.shaderLocation = shaderLocation,
};
offset += 16;
shaderLocation++;
}
for (int i = GX_VA_TEX0; i < GX_VA_TEX7; ++i) {
if (config.shaderConfig.vtxAttrs[i] != GX_DIRECT) {
continue;
}
attributes[shaderLocation] = wgpu::VertexAttribute{
.format = wgpu::VertexFormat::Float32x2,
.offset = offset,
.shaderLocation = shaderLocation,
};
offset += 8;
shaderLocation++;
}
const std::array vertexBuffers{wgpu::VertexBufferLayout{
.arrayStride = offset,
.attributeCount = shaderLocation,
.attributes = attributes.data(),
}};
return build_pipeline(config, info, vertexBuffers, shader, "Stream Pipeline");
}
State construct_state() { return {}; }
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
if (!bind_pipeline(data.pipeline, pass)) {
return;
}
const std::array offsets{data.uniformRange.offset};
pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data());
if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) {
pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup));
pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup));
}
pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size);
pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint16, data.indexRange.offset, data.indexRange.size);
if (data.dstAlpha != UINT32_MAX) {
const wgpu::Color color{0.f, 0.f, 0.f, data.dstAlpha / 255.f};
pass.SetBlendConstant(&color);
}
pass.DrawIndexed(data.indexCount);
}
} // namespace aurora::gfx::stream

View File

@ -1,24 +0,0 @@
#pragma once
#include "../common.hpp"
#include "../gx.hpp"
namespace aurora::gfx::stream {
struct DrawData {
PipelineRef pipeline;
Range vertRange;
Range uniformRange;
Range indexRange;
uint32_t indexCount;
gx::GXBindGroups bindGroups;
u32 dstAlpha;
};
struct PipelineConfig : public gx::PipelineConfig {};
struct State {};
State construct_state();
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config);
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass);
} // namespace aurora::gfx::stream

View File

@ -66,17 +66,6 @@ static size_t ComputeMippedBlockCountDXT1(uint32_t w, uint32_t h, uint32_t mips)
return ret; return ret;
} }
template <typename T>
constexpr T bswap16(T val) noexcept {
#if __GNUC__
return __builtin_bswap16(val);
#elif _WIN32
return _byteswap_ushort(val);
#else
return (val = (val << 8) | ((val >> 8) & 0xFF));
#endif
}
template <typename T> template <typename T>
concept TextureDecoder = requires(T) { concept TextureDecoder = requires(T) {
typename T::Source; typename T::Source;
@ -178,15 +167,15 @@ struct TextureDecoderIA4 {
}; };
struct TextureDecoderIA8 { struct TextureDecoderIA8 {
using Source = uint8_t; using Source = uint16_t;
using Target = RGBA8; using Target = RGBA8;
static constexpr uint32_t Frac = 1; static constexpr uint32_t Frac = 1;
static constexpr uint32_t BlockWidth = 8; static constexpr uint32_t BlockWidth = 4;
static constexpr uint32_t BlockHeight = 4; static constexpr uint32_t BlockHeight = 4;
static void decode_texel(Target* target, const Source* in, const uint32_t x) { static void decode_texel(Target* target, const Source* in, const uint32_t x) {
const auto texel = bswap16(in[x]); const auto texel = bswap(in[x]);
const uint8_t intensity = texel >> 8; const uint8_t intensity = texel >> 8;
target[x].r = intensity; target[x].r = intensity;
target[x].g = intensity; target[x].g = intensity;
@ -228,7 +217,7 @@ struct TextureDecoderRGB565 {
static constexpr uint32_t BlockHeight = 4; static constexpr uint32_t BlockHeight = 4;
static void decode_texel(Target* target, const Source* in, const uint32_t x) { static void decode_texel(Target* target, const Source* in, const uint32_t x) {
const auto texel = bswap16(in[x]); const auto texel = bswap(in[x]);
target[x].r = ExpandTo8<5>(texel >> 11 & 0x1f); target[x].r = ExpandTo8<5>(texel >> 11 & 0x1f);
target[x].g = ExpandTo8<6>(texel >> 5 & 0x3f); target[x].g = ExpandTo8<6>(texel >> 5 & 0x3f);
target[x].b = ExpandTo8<5>(texel & 0x1f); target[x].b = ExpandTo8<5>(texel & 0x1f);
@ -245,7 +234,7 @@ struct TextureDecoderRGB5A3 {
static constexpr uint32_t BlockHeight = 4; static constexpr uint32_t BlockHeight = 4;
static void decode_texel(Target* target, const Source* in, const uint32_t x) { static void decode_texel(Target* target, const Source* in, const uint32_t x) {
const auto texel = bswap16(in[x]); const auto texel = bswap(in[x]);
if ((texel & 0x8000) != 0) { if ((texel & 0x8000) != 0) {
target[x].r = ExpandTo8<5>(texel >> 10 & 0x1f); target[x].r = ExpandTo8<5>(texel >> 10 & 0x1f);
target[x].g = ExpandTo8<5>(texel >> 5 & 0x1f); target[x].g = ExpandTo8<5>(texel >> 5 & 0x1f);
@ -322,8 +311,8 @@ static ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mip
for (uint32_t y = 0; y < 2; ++y) { for (uint32_t y = 0; y < 2; ++y) {
DXT1Block* target = targetMip + (baseY + y) * w + baseX; DXT1Block* target = targetMip + (baseY + y) * w + baseX;
for (size_t x = 0; x < 2; ++x) { for (size_t x = 0; x < 2; ++x) {
target[x].color1 = bswap16(in[x].color1); target[x].color1 = bswap(in[x].color1);
target[x].color2 = bswap16(in[x].color2); target[x].color2 = bswap(in[x].color2);
for (size_t i = 0; i < 4; ++i) { for (size_t i = 0; i < 4; ++i) {
std::array<uint8_t, 4> ind; std::array<uint8_t, 4> ind;
const uint8_t packed = in[x].lines[i]; const uint8_t packed = in[x].lines[i];
@ -365,8 +354,8 @@ static ByteBuffer BuildRGBA8FromCMPR(uint32_t width, uint32_t height, uint32_t m
for (uint32_t yb = 0; yb < 8; yb += 4) { for (uint32_t yb = 0; yb < 8; yb += 4) {
for (uint32_t xb = 0; xb < 8; xb += 4) { for (uint32_t xb = 0; xb < 8; xb += 4) {
// CMPR difference: Big-endian color1/2 // CMPR difference: Big-endian color1/2
const uint16_t color1 = bswap16(*reinterpret_cast<const uint16_t*>(src)); const uint16_t color1 = bswap(*reinterpret_cast<const uint16_t*>(src));
const uint16_t color2 = bswap16(*reinterpret_cast<const uint16_t*>(src + 2)); const uint16_t color2 = bswap(*reinterpret_cast<const uint16_t*>(src + 2));
src += 4; src += 4;
// Fill in first two colors in color table. // Fill in first two colors in color table.
@ -480,4 +469,4 @@ ByteBuffer convert_tlut(u32 format, uint32_t width, ArrayRef<uint8_t> data) {
return DecodeLinear<TextureDecoderRGB5A3>(width, data); return DecodeLinear<TextureDecoderRGB5A3>(width, data);
} }
} }
} // namespace aurora::gfx } // namespace aurora::gfx

View File

@ -6,6 +6,8 @@
#include <array> #include <array>
#include <cassert> #include <cassert>
#include <cstdint>
#include <type_traits>
#include <vector> #include <vector>
using namespace std::string_view_literals; using namespace std::string_view_literals;
@ -21,6 +23,46 @@ using namespace std::string_view_literals;
#endif #endif
#endif #endif
template <typename T>
requires(sizeof(T) == sizeof(uint16_t) && std::is_arithmetic_v<T>)
constexpr T bswap(T val) noexcept {
union {
uint16_t u;
T t;
} v{.t = val};
#if __GNUC__
v.u = __builtin_bswap16(v.u);
#elif _WIN32
v.u = _byteswap_ushort(v.u);
#else
v.u = (v.u << 8) | ((v.u >> 8) & 0xFF);
#endif
return v.t;
}
template <typename T>
requires(sizeof(T) == sizeof(uint32_t) && std::is_arithmetic_v<T>)
constexpr T bswap(T val) noexcept {
union {
uint32_t u;
T t;
} v{.t = val};
#if __GNUC__
v.u = __builtin_bswap32(v.u);
#elif _WIN32
v.u = _byteswap_ulong(v.u);
#else
v.u = ((v.u & 0x0000FFFF) << 16) | ((v.u & 0xFFFF0000) >> 16) | ((v.u & 0x00FF00FF) << 8) | ((v.u & 0xFF00FF00) >> 8);
#endif
return v.t;
}
template <typename T>
requires(std::is_enum_v<T>)
auto underlying(T value) -> std::underlying_type_t<T> {
return static_cast<std::underlying_type_t<T>>(value);
}
#ifndef ALIGN #ifndef ALIGN
#define ALIGN(x, a) (((x) + ((a) - 1)) & ~((a) - 1)) #define ALIGN(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
#endif #endif
@ -33,11 +75,7 @@ using namespace std::string_view_literals;
#else #else
#define UNLIKELY #define UNLIKELY
#endif #endif
#define FATAL(msg, ...) \ #define FATAL(msg, ...) Log.fatal(msg, ##__VA_ARGS__);
{ \
Log.fatal(msg, ##__VA_ARGS__); \
unreachable(); \
}
#define ASSERT(cond, msg, ...) \ #define ASSERT(cond, msg, ...) \
if (!(cond)) \ if (!(cond)) \
UNLIKELY FATAL(msg, ##__VA_ARGS__) UNLIKELY FATAL(msg, ##__VA_ARGS__)

View File

@ -4,15 +4,9 @@
#include <fmt/base.h> #include <fmt/base.h>
#include <fmt/format.h> #include <fmt/format.h>
#include <string_view>
#ifdef __GNUC__ #include <cstdlib>
[[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); } #include <string_view>
#elif defined(_MSC_VER)
[[noreturn]] __forceinline void unreachable() { __assume(false); }
#else
#error Unknown compiler
#endif
namespace aurora { namespace aurora {
void log_internal(AuroraLogLevel level, const char* module, const char* message, unsigned int len) noexcept; void log_internal(AuroraLogLevel level, const char* module, const char* message, unsigned int len) noexcept;
@ -50,7 +44,7 @@ struct Module {
template <typename... T> template <typename... T>
[[noreturn]] void fatal(fmt::format_string<T...> fmt, T&&... args) noexcept { [[noreturn]] void fatal(fmt::format_string<T...> fmt, T&&... args) noexcept {
report(LOG_FATAL, fmt, std::forward<T>(args)...); report(LOG_FATAL, fmt, std::forward<T>(args)...);
unreachable(); std::abort();
} }
}; };
} // namespace aurora } // namespace aurora

View File

@ -385,15 +385,12 @@ bool initialize(AuroraBackend auroraBackend) {
g_adapter.GetLimits(&supportedLimits); g_adapter.GetLimits(&supportedLimits);
const wgpu::Limits requiredLimits{ const wgpu::Limits requiredLimits{
// Use "best" supported alignments // Use "best" supported alignments
.maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 .maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 ? WGPU_LIMIT_U32_UNDEFINED
? WGPU_LIMIT_U32_UNDEFINED : supportedLimits.maxTextureDimension1D,
: supportedLimits.maxTextureDimension1D, .maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 ? WGPU_LIMIT_U32_UNDEFINED
.maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 : supportedLimits.maxTextureDimension2D,
? WGPU_LIMIT_U32_UNDEFINED .maxTextureDimension3D = supportedLimits.maxTextureDimension3D == 0 ? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension2D, : supportedLimits.maxTextureDimension3D,
.maxTextureDimension3D = supportedLimits.maxTextureDimension3D == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension3D,
.minUniformBufferOffsetAlignment = supportedLimits.minUniformBufferOffsetAlignment == 0 .minUniformBufferOffsetAlignment = supportedLimits.minUniformBufferOffsetAlignment == 0
? WGPU_LIMIT_U32_UNDEFINED ? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.minUniformBufferOffsetAlignment, : supportedLimits.minUniformBufferOffsetAlignment,
@ -401,6 +398,12 @@ bool initialize(AuroraBackend auroraBackend) {
? WGPU_LIMIT_U32_UNDEFINED ? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.minStorageBufferOffsetAlignment, : supportedLimits.minStorageBufferOffsetAlignment,
}; };
Log.info(
"Using limits\n maxTextureDimension1D: {}\n maxTextureDimension2D: {}\n maxTextureDimension3D: {}\n "
"minUniformBufferOffsetAlignment: {}\n minStorageBufferOffsetAlignment: {}",
requiredLimits.maxTextureDimension1D, requiredLimits.maxTextureDimension2D,
requiredLimits.maxTextureDimension3D, requiredLimits.minUniformBufferOffsetAlignment,
requiredLimits.minStorageBufferOffsetAlignment);
std::vector<wgpu::FeatureName> requiredFeatures; std::vector<wgpu::FeatureName> requiredFeatures;
wgpu::SupportedFeatures supportedFeatures; wgpu::SupportedFeatures supportedFeatures;
g_adapter.GetFeatures(&supportedFeatures); g_adapter.GetFeatures(&supportedFeatures);
@ -442,22 +445,20 @@ bool initialize(AuroraBackend auroraBackend) {
}); });
deviceDescriptor.SetUncapturedErrorCallback( deviceDescriptor.SetUncapturedErrorCallback(
[](const wgpu::Device& device, wgpu::ErrorType type, wgpu::StringView message) { [](const wgpu::Device& device, wgpu::ErrorType type, wgpu::StringView message) {
FATAL("WebGPU error {}: {}", static_cast<int>(type), message); FATAL("WebGPU error {}: {}", underlying(type), message);
});
deviceDescriptor.SetDeviceLostCallback(
wgpu::CallbackMode::AllowSpontaneous,
[](const wgpu::Device& device, wgpu::DeviceLostReason reason, wgpu::StringView message) {
Log.warn("Device lost: {}", message);
});
const auto future = g_adapter.RequestDevice(
&deviceDescriptor, wgpu::CallbackMode::WaitAnyOnly,
[](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
if (status == wgpu::RequestDeviceStatus::Success) {
g_device = std::move(device);
} else {
Log.warn("Device request failed: {}", message);
}
}); });
deviceDescriptor.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous,
[](const wgpu::Device& device, wgpu::DeviceLostReason reason,
wgpu::StringView message) { Log.warn("Device lost: {}", message); });
const auto future =
g_adapter.RequestDevice(&deviceDescriptor, wgpu::CallbackMode::WaitAnyOnly,
[](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
if (status == wgpu::RequestDeviceStatus::Success) {
g_device = std::move(device);
} else {
Log.warn("Device request failed: {}", message);
}
});
const auto status = g_instance.WaitAny(future, 5000000000); const auto status = g_instance.WaitAny(future, 5000000000);
if (status != wgpu::WaitStatus::Success) { if (status != wgpu::WaitStatus::Success) {
Log.error("Failed to create device: {}", magic_enum::enum_name(status)); Log.error("Failed to create device: {}", magic_enum::enum_name(status));