diff --git a/CMakeLists.txt b/CMakeLists.txt index 76b0fef..1e12a2a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,8 +3,6 @@ project(aurora LANGUAGES C CXX) set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 20) -option(AURORA_NATIVE_MATRIX "Assume OpenGL-layout matrices, disables transposing" OFF) - add_subdirectory(extern) include(cmake/aurora_core.cmake) diff --git a/cmake/aurora_gx.cmake b/cmake/aurora_gx.cmake index 1bd8bad..2de474c 100644 --- a/cmake/aurora_gx.cmake +++ b/cmake/aurora_gx.cmake @@ -4,7 +4,6 @@ add_library(aurora_gx STATIC lib/gfx/gx.cpp lib/gfx/gx_shader.cpp lib/gfx/texture_convert.cpp - lib/gfx/stream/shader.cpp lib/gfx/model/shader.cpp lib/dolphin/gx/GXBump.cpp lib/dolphin/gx/GXCull.cpp @@ -28,9 +27,6 @@ add_library(aurora::gx ALIAS aurora_gx) target_link_libraries(aurora_gx PUBLIC aurora::core xxhash) target_link_libraries(aurora_gx PRIVATE absl::btree absl::flat_hash_map) -if (AURORA_NATIVE_MATRIX) - target_compile_definitions(aurora_gx PRIVATE AURORA_NATIVE_MATRIX) -endif () if (EMSCRIPTEN) target_link_options(aurora_gx PUBLIC -sUSE_WEBGPU=1 -sASYNCIFY -sEXIT_RUNTIME) target_compile_definitions(aurora_gx PRIVATE ENABLE_BACKEND_WEBGPU) diff --git a/include/aurora/math.hpp b/include/aurora/math.hpp index 7d51bad..e6c0dd4 100644 --- a/include/aurora/math.hpp +++ b/include/aurora/math.hpp @@ -35,9 +35,6 @@ struct Vec2 { constexpr Vec2() = default; constexpr Vec2(T x, T y) : x(x), y(y) {} AURORA_VEC2_EXTRA -#ifdef METAFORCE - constexpr Vec2(const zeus::CVector2f& vec) : x(vec.x()), y(vec.y()) {} -#endif bool operator==(const Vec2& rhs) const { return x == rhs.x && y == rhs.y; } bool operator!=(const Vec2& rhs) const { return !(*this == rhs); } @@ -51,10 +48,6 @@ struct Vec3 { constexpr Vec3() = default; constexpr Vec3(T x, T y, T z) : x(x), y(y), z(z) {} AURORA_VEC3_EXTRA -#ifdef METAFORCE - constexpr Vec3(const zeus::CVector3f& vec) : x(vec.x()), y(vec.y()), z(vec.z()) {} - operator zeus::CVector3f() const { return {x, y, z}; } -#endif bool operator==(const Vec3& rhs) const { return x == rhs.x && y == rhs.y && z == rhs.z; } bool operator!=(const Vec3& rhs) const { return !(*this == rhs); } @@ -77,10 +70,6 @@ struct Vec4 { // For Vec3 -> Vec4 constexpr Vec4(Vec3 v, T w) : m{v.x, v.y, v.z, w} {} AURORA_VEC4_EXTRA -#ifdef METAFORCE - constexpr Vec4(const zeus::CVector4f& vec) : x(vec.x()), y(vec.y()), z(vec.z()), w(vec.w()) {} - constexpr Vec4(const zeus::CColor& color) : x(color.r()), y(color.g()), z(color.b()), w(color.a()) {} -#endif inline Vec4& operator=(const Vec4& other) { memcpy(&m, &other.m, sizeof(Vt)); @@ -119,7 +108,7 @@ struct Vec4 { bool operator!=(const Vec4& rhs) const { return !(*this == rhs); } }; template -[[nodiscard]] inline Vec4 operator+(const Vec4& a, const Vec4& b) { +[[nodiscard]] Vec4 operator+(const Vec4& a, const Vec4& b) { #ifdef USE_GCC_VECTOR_EXTENSIONS return a.m + b.m; #else @@ -127,7 +116,7 @@ template #endif } template -[[nodiscard]] inline Vec4 operator*(const Vec4& a, const Vec4& b) { +[[nodiscard]] Vec4 operator*(const Vec4& a, const Vec4& b) { #ifdef USE_GCC_VECTOR_EXTENSIONS return a.m * b.m; #else @@ -170,6 +159,18 @@ struct Mat4x2 { bool operator!=(const Mat4x2& rhs) const { return !(*this == rhs); } }; template +struct Mat2x4 { + Vec4 m0{}; + Vec4 m1{}; + + constexpr Mat2x4() = default; + constexpr Mat2x4(const Vec4& m0, const Vec4& m1, const Vec4& m2) : m0(m0), m1(m1) {} + + bool operator==(const Mat2x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1; } + bool operator!=(const Mat2x4& rhs) const { return !(*this == rhs); } +}; +static_assert(sizeof(Mat2x4) == 32); +template struct Mat4x4; template struct Mat3x4 { @@ -180,10 +181,13 @@ struct Mat3x4 { constexpr Mat3x4() = default; constexpr Mat3x4(const Vec4& m0, const Vec4& m1, const Vec4& m2) : m0(m0), m1(m1), m2(m2) {} - inline Mat4x4 to4x4() const; - inline Mat4x4 toTransposed4x4() const; + [[nodiscard]] Mat4x4 to4x4() const; + [[nodiscard]] Mat4x4 toTransposed4x4() const; + + bool operator==(const Mat3x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2; } + bool operator!=(const Mat3x4& rhs) const { return !(*this == rhs); } }; -static_assert(sizeof(Mat3x4) == sizeof(float[3][4])); +static_assert(sizeof(Mat3x4) == 48); template struct Mat4x4 { Vec4 m0{}; @@ -195,10 +199,6 @@ struct Mat4x4 { constexpr Mat4x4(const Vec4& m0, const Vec4& m1, const Vec4& m2, const Vec4& m3) : m0(m0), m1(m1), m2(m2), m3(m3) {} AURORA_MAT4X4_EXTRA -#ifdef METAFORCE - constexpr Mat4x4(const zeus::CMatrix4f& m) : m0(m[0]), m1(m[1]), m2(m[2]), m3(m[3]) {} - constexpr Mat4x4(const zeus::CTransform& m) : Mat4x4(m.toMatrix4f()) {} -#endif [[nodiscard]] Mat4x4 transpose() const { return { @@ -208,23 +208,17 @@ struct Mat4x4 { {m0[3], m1[3], m2[3], m3[3]}, }; } - inline Mat4x4& operator=(const Mat4x4& other) { - m0 = other.m0; - m1 = other.m1; - m2 = other.m2; - m3 = other.m3; - return *this; - } + Mat4x4& operator=(const Mat4x4& other) = default; - inline Vec4& operator[](size_t i) { return *(&m0 + i); } - inline const Vec4& operator[](size_t i) const { return *(&m0 + i); } + Vec4& operator[](size_t i) { return *(&m0 + i); } + const Vec4& operator[](size_t i) const { return *(&m0 + i); } bool operator==(const Mat4x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2 && m3 == rhs.m3; } bool operator!=(const Mat4x4& rhs) const { return !(*this == rhs); } }; -static_assert(sizeof(Mat4x4) == sizeof(float[4][4])); +static_assert(sizeof(Mat4x4) == 64); template -[[nodiscard]] inline Mat4x4 operator*(const Mat4x4& a, const Mat4x4& b) { +[[nodiscard]] Mat4x4 operator*(const Mat4x4& a, const Mat4x4& b) { Mat4x4 out; for (size_t i = 0; i < 4; ++i) { *(&out.m0 + i) = a.m0 * b[i].template shuffle<0, 0, 0, 0>() + a.m1 * b[i].template shuffle<1, 1, 1, 1>() + @@ -233,28 +227,27 @@ template return out; } template -[[nodiscard]] inline Mat4x4 Mat3x4::to4x4() const { +[[nodiscard]] Mat4x4 Mat3x4::to4x4() const { return { - {m0.m[0], m0.m[1], m0.m[2], 0.f}, - {m1.m[0], m1.m[1], m1.m[2], 0.f}, - {m2.m[0], m2.m[1], m2.m[2], 0.f}, - {m0.m[3], m1.m[3], m2.m[3], 1.f}, + {m0[0], m0[1], m0[2], 0.f}, + {m1[0], m1[1], m1[2], 0.f}, + {m2[0], m2[1], m2[2], 0.f}, + {m0[3], m1[3], m2[3], 1.f}, }; } template -[[nodiscard]] inline Mat4x4 Mat3x4::toTransposed4x4() const { +[[nodiscard]] Mat4x4 Mat3x4::toTransposed4x4() const { return Mat4x4{ - m0, - m1, - m2, - {0.f, 0.f, 0.f, 1.f}, - } - .transpose(); + {m0[0], m1[0], m2[0], 0.f}, + {m0[1], m1[1], m2[1], 0.f}, + {m0[2], m1[2], m2[2], 0.f}, + {m0[3], m1[3], m2[3], 1.f}, + }; } -constexpr Mat4x4 Mat4x4_Identity{ - Vec4{1.f, 0.f, 0.f, 0.f}, - Vec4{0.f, 1.f, 0.f, 0.f}, - Vec4{0.f, 0.f, 1.f, 0.f}, - Vec4{0.f, 0.f, 0.f, 1.f}, +constexpr Mat4x4 Mat4x4_Identity{ + Vec4{1.f, 0.f, 0.f, 0.f}, + Vec4{0.f, 1.f, 0.f, 0.f}, + Vec4{0.f, 0.f, 1.f, 0.f}, + Vec4{0.f, 0.f, 0.f, 1.f}, }; } // namespace aurora diff --git a/include/dolphin/gx/GXVert.h b/include/dolphin/gx/GXVert.h index 8af0cde..93f9914 100644 --- a/include/dolphin/gx/GXVert.h +++ b/include/dolphin/gx/GXVert.h @@ -68,11 +68,11 @@ void GXTexCoord2s16(s16 s, s16 t); void GXTexCoord2u8(u8 s, u8 t); void GXTexCoord2s8(s8 s, s8 t); -void GXTexCoord1f32(f32 s, f32 t); -void GXTexCoord1u16(u16 s, u16 t); -void GXTexCoord1s16(s16 s, s16 t); -void GXTexCoord1u8(u8 s, u8 t); -void GXTexCoord1s8(s8 s, s8 t); +void GXTexCoord1f32(f32 s); +void GXTexCoord1u16(u16 s); +void GXTexCoord1s16(s16 s); +void GXTexCoord1u8(u8 s); +void GXTexCoord1s8(s8 s); void GXTexCoord1x16(u16 index); void GXTexCoord1x8(u8 index); diff --git a/lib/dolphin/gx/GXGeometry.cpp b/lib/dolphin/gx/GXGeometry.cpp index 8bcfeda..110f1c6 100644 --- a/lib/dolphin/gx/GXGeometry.cpp +++ b/lib/dolphin/gx/GXGeometry.cpp @@ -7,7 +7,6 @@ extern "C" { void GXSetVtxDesc(GXAttr attr, GXAttrType type) { update_gx_state(g_gxState.vtxDesc[attr], type); } void GXSetVtxDescv(GXVtxDescList* list) { - g_gxState.vtxDesc.fill({}); while (list->attr != GX_VA_NULL) { update_gx_state(g_gxState.vtxDesc[list->attr], list->type); ++list; @@ -17,8 +16,8 @@ void GXSetVtxDescv(GXVtxDescList* list) { void GXClearVtxDesc() { g_gxState.vtxDesc.fill({}); } void GXSetVtxAttrFmt(GXVtxFmt vtxfmt, GXAttr attr, GXCompCnt cnt, GXCompType type, u8 frac) { - CHECK(vtxfmt >= GX_VTXFMT0 && vtxfmt < GX_MAX_VTXFMT, "invalid vtxfmt {}", static_cast(vtxfmt)); - CHECK(attr >= GX_VA_PNMTXIDX && attr < GX_VA_MAX_ATTR, "invalid attr {}", static_cast(attr)); + CHECK(vtxfmt >= GX_VTXFMT0 && vtxfmt < GX_MAX_VTXFMT, "invalid vtxfmt {}", underlying(vtxfmt)); + CHECK(attr >= GX_VA_PNMTXIDX && attr < GX_VA_MAX_ATTR, "invalid attr {}", underlying(attr)); auto& fmt = g_gxState.vtxFmts[vtxfmt].attrs[attr]; update_gx_state(fmt.cnt, cnt); update_gx_state(fmt.type, type); @@ -38,7 +37,7 @@ void GXSetArray(GXAttr attr, const void* data, u32 size, u8 stride) { // TODO move GXBegin, GXEnd here void GXSetTexCoordGen2(GXTexCoordID dst, GXTexGenType type, GXTexGenSrc src, u32 mtx, GXBool normalize, u32 postMtx) { - CHECK(dst >= GX_TEXCOORD0 && dst <= GX_TEXCOORD7, "invalid tex coord {}", static_cast(dst)); + CHECK(dst >= GX_TEXCOORD0 && dst <= GX_TEXCOORD7, "invalid tex coord {}", underlying(dst)); update_gx_state(g_gxState.tcgs[dst], {type, src, static_cast(mtx), static_cast(postMtx), normalize}); } diff --git a/lib/dolphin/gx/GXGet.cpp b/lib/dolphin/gx/GXGet.cpp index 4c2cb16..3c5c39a 100644 --- a/lib/dolphin/gx/GXGet.cpp +++ b/lib/dolphin/gx/GXGet.cpp @@ -20,7 +20,7 @@ void GXGetVtxAttrFmt(GXVtxFmt idx, GXAttr attr, GXCompCnt* compCnt, GXCompType* // TODO GXGetViewportv void GXGetProjectionv(f32* p) { - const auto& mtx = g_gxState.origProj; + const auto& mtx = g_gxState.proj; p[0] = static_cast(g_gxState.projType); p[1] = mtx.m0[0]; p[3] = mtx.m1[1]; diff --git a/lib/dolphin/gx/GXTransform.cpp b/lib/dolphin/gx/GXTransform.cpp index 5c69504..de668ea 100644 --- a/lib/dolphin/gx/GXTransform.cpp +++ b/lib/dolphin/gx/GXTransform.cpp @@ -4,15 +4,8 @@ extern "C" { void GXSetProjection(const void* mtx_, GXProjectionType type) { const auto& mtx = *reinterpret_cast*>(mtx_); - g_gxState.origProj = mtx; g_gxState.projType = type; - update_gx_state(g_gxState.proj, -#ifdef AURORA_NATIVE_MATRIX - mtx -#else - mtx.transpose() -#endif - ); + update_gx_state(g_gxState.proj, mtx); } // TODO GXSetProjectionv @@ -20,13 +13,8 @@ void GXSetProjection(const void* mtx_, GXProjectionType type) { void GXLoadPosMtxImm(const void* mtx_, u32 id) { CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast(id)); auto& state = g_gxState.pnMtx[id / 3]; -#ifdef AURORA_NATIVE_MATRIX - const auto& mtx = *reinterpret_cast*>(mtx_); + const auto& mtx = *reinterpret_cast*>(mtx_); update_gx_state(state.pos, mtx); -#else - const auto* mtx = reinterpret_cast*>(mtx_); - update_gx_state(state.pos, mtx->toTransposed4x4()); -#endif } // TODO GXLoadPosMtxIndx @@ -34,56 +22,37 @@ void GXLoadPosMtxImm(const void* mtx_, u32 id) { void GXLoadNrmMtxImm(const void* mtx_, u32 id) { CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast(id)); auto& state = g_gxState.pnMtx[id / 3]; -#ifdef AURORA_NATIVE_MATRIX - const auto& mtx = *reinterpret_cast*>(mtx_); + const auto& mtx = *reinterpret_cast*>(mtx_); update_gx_state(state.nrm, mtx); -#else - const auto* mtx = reinterpret_cast*>(mtx_); - update_gx_state(state.nrm, mtx->toTransposed4x4()); -#endif } // TODO GXLoadNrmMtxImm3x3 // TODO GXLoadNrmMtxIndx3x3 void GXSetCurrentMtx(u32 id) { - CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast(id)); + CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", id); update_gx_state(g_gxState.currentPnMtx, id / 3); } void GXLoadTexMtxImm(const void* mtx_, u32 id, GXTexMtxType type) { CHECK((id >= GX_TEXMTX0 && id <= GX_IDENTITY) || (id >= GX_PTTEXMTX0 && id <= GX_PTIDENTITY), "invalid tex mtx {}", - static_cast(id)); + id); if (id >= GX_PTTEXMTX0) { - CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", static_cast(type)); + CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", underlying(type)); const auto idx = (id - GX_PTTEXMTX0) / 3; -#ifdef AURORA_NATIVE_MATRIX - const auto& mtx = *reinterpret_cast*>(mtx_); - update_gx_state>(g_gxState.ptTexMtxs[idx], mtx); -#else const auto& mtx = *reinterpret_cast*>(mtx_); - update_gx_state>(g_gxState.ptTexMtxs[idx], mtx.toTransposed4x4()); -#endif + update_gx_state(g_gxState.ptTexMtxs[idx], mtx); } else { const auto idx = (id - GX_TEXMTX0) / 3; switch (type) { case GX_MTX3x4: { -#ifdef AURORA_NATIVE_MATRIX - const auto& mtx = *reinterpret_cast*>(mtx_); - update_gx_state(g_gxState.texMtxs[idx], mtx); -#else const auto& mtx = *reinterpret_cast*>(mtx_); - update_gx_state(g_gxState.texMtxs[idx], mtx.toTransposed4x4()); -#endif + update_gx_state(g_gxState.texMtxs[idx], mtx); break; } case GX_MTX2x4: { - const auto& mtx = *reinterpret_cast*>(mtx_); -#ifdef AURORA_NATIVE_MATRIX + const auto& mtx = *reinterpret_cast*>(mtx_); update_gx_state(g_gxState.texMtxs[idx], mtx); -#else - update_gx_state(g_gxState.texMtxs[idx], mtx.transpose()); -#endif break; } } diff --git a/lib/dolphin/gx/GXVert.cpp b/lib/dolphin/gx/GXVert.cpp index 1791322..023874c 100644 --- a/lib/dolphin/gx/GXVert.cpp +++ b/lib/dolphin/gx/GXVert.cpp @@ -1,47 +1,113 @@ #include "gx.hpp" -#include "../../gfx/stream/shader.hpp" +#include "aurora/math.hpp" +#include "../../gfx/model/shader.hpp" +#include "../../gfx/gx_fmt.hpp" -#include +#include #include -#ifndef NDEBUG -static inline GXAttr next_attr(size_t begin) { - auto iter = std::find_if(g_gxState.vtxDesc.begin() + begin, g_gxState.vtxDesc.end(), - [](const auto type) { return type != GX_NONE; }); - if (begin > 0 && iter == g_gxState.vtxDesc.end()) { - // wrap around - iter = std::find_if(g_gxState.vtxDesc.begin(), g_gxState.vtxDesc.end(), - [](const auto type) { return type != GX_NONE; }); - } - return GXAttr(iter - g_gxState.vtxDesc.begin()); -} -#endif +struct Attribute { + uint32_t offset; + GXAttr attr; + GXAttrType type; + aurora::gfx::gx::VtxAttrFmt fmt; +}; struct SStreamState { GXPrimitive primitive; GXVtxFmt vtxFmt; + std::vector attrs; + u16 curAttr = 0; u16 vertexCount = 0; - u16 vertexStart = 0; + u16 vertexStart; + u16 vertexSize; aurora::ByteBuffer vertexBuffer; + uint8_t* vertexData = nullptr; std::vector indices; -#ifndef NDEBUG - GXAttr nextAttr; -#endif - explicit SStreamState(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 numVerts, u16 vertexSize, u16 vertexStart) noexcept - : primitive(primitive), vtxFmt(vtxFmt), vertexStart(vertexStart) { - vertexBuffer.reserve_extra(size_t(numVerts) * vertexSize); + explicit SStreamState(GXPrimitive primitive, GXVtxFmt vtxFmt, std::vector attrs, u16 numVerts, + u16 vertexSize, u16 vertexStart) noexcept + : primitive(primitive), vtxFmt(vtxFmt), attrs(std::move(attrs)), vertexStart(vertexStart), vertexSize(vertexSize) { + vertexBuffer.reserve_extra(static_cast(numVerts) * vertexSize); if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) { - indices.reserve((u32(numVerts) - 3) * 3 + 3); + indices.reserve(((static_cast(numVerts) - 3) * 3) + 3); } else if (numVerts > 4 && primitive == GX_QUADS) { - indices.reserve(u32(numVerts) / 4 * 6); + indices.reserve(static_cast(numVerts) / 4 * 6); } else { indices.reserve(numVerts); } -#ifndef NDEBUG - nextAttr = next_attr(0); -#endif + } + + [[maybe_unused]] u8 check_direct(GXAttr attr, GXCompCnt cnt, GXCompType type) noexcept { + const auto& curAttr = attrs[this->curAttr]; + ASSERT(curAttr.attr == attr, "bad attribute order: {}, expected {}", attr, curAttr.attr); + ASSERT(curAttr.type == GX_DIRECT, "bad attribute type: GX_DIRECT, expected {}", curAttr.type); + ASSERT(curAttr.fmt.cnt == cnt, "bad attribute count: {}, expected {}", cnt, curAttr.fmt.cnt); + ASSERT(curAttr.fmt.type == type, "bad attribute type: {}, expected {}", type, curAttr.fmt.type); + return curAttr.fmt.frac; + } + + void check_indexed(GXAttr attr, GXAttrType type) noexcept { + const auto& curAttr = attrs[this->curAttr]; + ASSERT(curAttr.attr == attr, "bad attribute order: {}, expected {}", attr, curAttr.attr); + ASSERT(curAttr.type == type, "bad attribute type: {}, expected {}", type, curAttr.type); + } + + template + void append(const T& value) noexcept { + append_data(&value, sizeof(value), attrs[curAttr].offset); + next_attribute(); + } + +private: + void append_data(const void* ptr, size_t size, uint32_t offset) { + if (vertexData == nullptr) { + const auto vertexStart = vertexBuffer.size(); + vertexBuffer.append_zeroes(vertexSize); + vertexData = vertexBuffer.data() + vertexStart; + inc_vertex_count(); + } + ASSERT(offset + size <= vertexSize, "bad attribute end: {}, expected {}", offset + size, vertexSize); + memcpy(vertexData + offset, ptr, size); + } + + void next_attribute() noexcept { + curAttr = curAttr + 1; + if (curAttr >= attrs.size()) { + curAttr = 0; + vertexData = nullptr; + } + } + + void inc_vertex_count() noexcept { + auto curVertex = vertexStart + vertexCount; + if (primitive == GX_LINES || primitive == GX_LINESTRIP || primitive == GX_POINTS) { + // Currently unsupported, skip + return; + } + if (primitive == GX_TRIANGLES || primitive == GX_TRIANGLESTRIP || vertexCount < 3) { + // pass + } else if (primitive == GX_TRIANGLEFAN) { + indices.push_back(vertexStart); + indices.push_back(curVertex - 1); + } /*else if (primitive == GX_TRIANGLESTRIP) { + if ((vertexCount & 1) == 0) { + indices.push_back(curVertex - 2); + indices.push_back(curVertex - 1); + } else { + indices.push_back(curVertex - 1); + indices.push_back(curVertex - 2); + } + }*/ + else if (primitive == GX_QUADS) { + if ((vertexCount & 3) == 3) { + indices.push_back(curVertex - 3); + indices.push_back(curVertex - 1); + } + } + indices.push_back(curVertex); + ++vertexCount; } }; @@ -51,228 +117,319 @@ static u16 lastVertexStart = 0; extern "C" { void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) { CHECK(!sStreamState, "Stream began twice!"); + uint16_t vertexSize = 0; + uint16_t numDirectAttrs = 0; + uint16_t numIndexedAttrs = 0; for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) { if (type == GX_DIRECT) { + ++numDirectAttrs; if (attr == GX_VA_POS || attr == GX_VA_NRM) { vertexSize += 12; } else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) { vertexSize += 16; } else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) { vertexSize += 8; - } else UNLIKELY { - FATAL("dont know how to handle attr {}", static_cast(attr)); - } + } else + UNLIKELY { FATAL("dont know how to handle attr {}", attr); } } else if (type == GX_INDEX8 || type == GX_INDEX16) { - vertexSize += 2; + ++numIndexedAttrs; } - attr = GXAttr(attr + 1); + attr = static_cast(attr + 1); } + auto [num4xAttr, rem] = std::div(numIndexedAttrs, 4); + u32 num2xAttr = 0; + if (rem > 2) { + ++num4xAttr; + } else if (rem > 0) { + ++num2xAttr; + } + u32 directStart = num4xAttr * 8 + num2xAttr * 4; + vertexSize += directStart; + + u32 indexOffset = 0; + u32 directOffset = directStart; + std::vector attrs; + attrs.reserve(numDirectAttrs + numIndexedAttrs); + const auto& curVtxFmt = g_gxState.vtxFmts[vtxFmt]; + for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) { + if (type == GX_DIRECT) { + u32 attrSize; + if (attr == GX_VA_POS || attr == GX_VA_NRM) { + attrSize = 12; + } else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) { + attrSize = 16; + } else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) { + attrSize = 8; + } else + UNLIKELY { FATAL("dont know how to handle attr {}", attr); } + const auto& attrFmt = curVtxFmt.attrs[attr]; + attrs.emplace_back(directOffset, attr, type, attrFmt); + directOffset += attrSize; + } else if (type == GX_INDEX8 || type == GX_INDEX16) { + attrs.emplace_back(indexOffset, attr, type); + indexOffset += 2; + } + attr = static_cast(attr + 1); + } + CHECK(vertexSize > 0, "no vtx attributes enabled?"); - sStreamState.emplace(primitive, vtxFmt, nVerts, vertexSize, g_gxState.stateDirty ? 0 : lastVertexStart); + sStreamState.emplace(primitive, vtxFmt, std::move(attrs), nVerts, vertexSize, + /*g_gxState.stateDirty ? 0 : lastVertexStart*/ 0); } -static inline void check_attr_order(GXAttr attr) noexcept { -#ifndef NDEBUG - CHECK(sStreamState, "Stream not started!"); - CHECK(sStreamState->nextAttr == attr, "bad attribute order: {}, expected {}", static_cast(attr), - static_cast(sStreamState->nextAttr)); - sStreamState->nextAttr = next_attr(attr + 1); -#endif -} - -void GXPosition3f32(float x, float y, float z) { - check_attr_order(GX_VA_POS); - auto& state = *sStreamState; - state.vertexBuffer.append(&x, sizeof(float)); - state.vertexBuffer.append(&y, sizeof(float)); - state.vertexBuffer.append(&z, sizeof(float)); - auto curVertex = state.vertexStart + state.vertexCount; - if (state.primitive == GX_TRIANGLES || state.vertexCount < 3) { - // pass - } else if (state.primitive == GX_TRIANGLEFAN) { - state.indices.push_back(state.vertexStart); - state.indices.push_back(curVertex - 1); - } else if (state.primitive == GX_TRIANGLESTRIP) { - if ((state.vertexCount & 1) == 0) { - state.indices.push_back(curVertex - 2); - state.indices.push_back(curVertex - 1); - } else { - state.indices.push_back(curVertex - 1); - state.indices.push_back(curVertex - 2); - } - } else if (state.primitive == GX_QUADS) { - if ((state.vertexCount & 3) == 3) { - state.indices.push_back(curVertex - 3); - state.indices.push_back(curVertex - 1); - } - } - state.indices.push_back(curVertex); - ++state.vertexCount; +void GXPosition3f32(f32 x, f32 y, f32 z) { + sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_F32); + sStreamState->append(aurora::Vec3{x, y, z}); } void GXPosition3u16(u16 x, u16 y, u16 z) { - const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS]; - GXPosition3f32( - static_cast(x) / static_cast(1 << attrFmt.frac), - static_cast(y) / static_cast(1 << attrFmt.frac), - static_cast(z) / static_cast(1 << attrFmt.frac) - ); + const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_U16); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + static_cast(z) / static_cast(1 << frac), + }); } void GXPosition3s16(s16 x, s16 y, s16 z) { - const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS]; - GXPosition3f32( - static_cast(x) / static_cast(1 << attrFmt.frac), - static_cast(y) / static_cast(1 << attrFmt.frac), - static_cast(z) / static_cast(1 << attrFmt.frac) - ); + const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_S16); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + static_cast(z) / static_cast(1 << frac), + }); } void GXPosition3u8(u8 x, u8 y, u8 z) { - const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS]; - GXPosition3f32( - static_cast(x) / static_cast(1 << attrFmt.frac), - static_cast(y) / static_cast(1 << attrFmt.frac), - static_cast(z) / static_cast(1 << attrFmt.frac) - ); + const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_U8); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + static_cast(z) / static_cast(1 << frac), + }); } void GXPosition3s8(s8 x, s8 y, s8 z) { - const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS]; - GXPosition3f32( - static_cast(x) / static_cast(1 << attrFmt.frac), - static_cast(y) / static_cast(1 << attrFmt.frac), - static_cast(z) / static_cast(1 << attrFmt.frac) - ); + const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_S8); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + static_cast(z) / static_cast(1 << frac), + }); } -void GXPosition2f32(float x, float y) { - GXPosition3f32(x, y, 0.f); +void GXPosition2f32(f32 x, f32 y) { + sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_F32); + sStreamState->append(aurora::Vec3{x, y, 0.f}); } void GXPosition2u16(u16 x, u16 y) { - GXPosition3u16(x, y, 0); + const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_U16); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + 0.f, + }); } void GXPosition2s16(s16 x, s16 y) { - GXPosition3s16(x, y, 0); + const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_S16); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + 0.f, + }); } void GXPosition2u8(u8 x, u8 y) { - GXPosition3u8(x, y, 0); + const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_U8); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + 0.f, + }); } void GXPosition2s8(s8 x, s8 y) { - GXPosition3s8(x, y, 0); + const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_S8); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + 0.f, + }); } void GXPosition1x16(u16 idx) { - check_attr_order(GX_VA_POS); - // keep aligned - if (sStreamState->vertexBuffer.size() % 4 != 0) { - sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4)); - } - sStreamState->vertexBuffer.append(&idx, 2); + sStreamState->check_indexed(GX_VA_POS, GX_INDEX16); + sStreamState->append(idx); } void GXPosition1x8(u8 idx) { - GXPosition1x16(idx); + sStreamState->check_indexed(GX_VA_POS, GX_INDEX8); + sStreamState->append(idx); } -void GXNormal3f32(float x, float y, float z) { - check_attr_order(GX_VA_NRM); - sStreamState->vertexBuffer.append(&x, 4); - sStreamState->vertexBuffer.append(&y, 4); - sStreamState->vertexBuffer.append(&z, 4); +void GXNormal3f32(f32 x, f32 y, f32 z) { + sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_F32); + sStreamState->append(aurora::Vec3{x, y, z}); } void GXNormal3s16(s16 x, s16 y, s16 z) { - const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_NRM]; - GXNormal3f32( - static_cast(x) / static_cast(1 << attrFmt.frac), - static_cast(y) / static_cast(1 << attrFmt.frac), - static_cast(z) / static_cast(1 << attrFmt.frac) - ); + const auto frac = sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_S16); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + static_cast(z) / static_cast(1 << frac), + }); } void GXNormal3s8(s8 x, s8 y, s8 z) { - const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_NRM]; - GXNormal3f32( - static_cast(x) / static_cast(1 << attrFmt.frac), - static_cast(y) / static_cast(1 << attrFmt.frac), - static_cast(z) / static_cast(1 << attrFmt.frac) - ); + const auto frac = sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_S8); + sStreamState->append(aurora::Vec3{ + static_cast(x) / static_cast(1 << frac), + static_cast(y) / static_cast(1 << frac), + static_cast(z) / static_cast(1 << frac), + }); } -void GXNormal1x16(u16 idx) { - check_attr_order(GX_VA_NRM); - // keep aligned - if (sStreamState->vertexBuffer.size() % 4 != 0) { - sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4)); - } - sStreamState->vertexBuffer.append(&idx, 2); +void GXNormal1x16(u16 index) { + sStreamState->check_indexed(GX_VA_NRM, GX_INDEX16); + sStreamState->append(index); } -void GXNormal1x8(u8 idx) { - GXNormal1x16(idx); +void GXNormal1x8(u8 index) { + sStreamState->check_indexed(GX_VA_POS, GX_INDEX8); + sStreamState->append(index); } -void GXColor4f32(float r, float g, float b, float a) { - check_attr_order(GX_VA_CLR0); - sStreamState->vertexBuffer.append(&r, 4); - sStreamState->vertexBuffer.append(&g, 4); - sStreamState->vertexBuffer.append(&b, 4); - sStreamState->vertexBuffer.append(&a, 4); +void GXColor4f32(f32 r, f32 g, f32 b, f32 a) { + sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8); + sStreamState->append(aurora::Vec4{r, g, b, a}); } void GXColor4u8(u8 r, u8 g, u8 b, u8 a) { - GXColor4f32(static_cast(r) / 255.f, static_cast(g) / 255.f, static_cast(b) / 255.f, - static_cast(a) / 255.f); + sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8); + sStreamState->append(aurora::Vec4{ + static_cast(r) / 255.f, + static_cast(g) / 255.f, + static_cast(b) / 255.f, + static_cast(a) / 255.f, + }); } void GXColor3u8(u8 r, u8 g, u8 b) { - GXColor4u8(r, g, b, 255); + sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGB, GX_RGB8); + sStreamState->append(aurora::Vec4{ + static_cast(r) / 255.f, + static_cast(g) / 255.f, + static_cast(b) / 255.f, + 1.f, + }); } -void GXColor1x16(u16 idx) { - check_attr_order(GX_VA_CLR0); - // keep aligned - if (sStreamState->vertexBuffer.size() % 4 != 0) { - sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4)); - } - sStreamState->vertexBuffer.append(&idx, 2); +void GXColor1u32(u32 clr) { + sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8); + sStreamState->append(aurora::Vec4{ + static_cast((clr >> 24) & 0xff) / 255.f, + static_cast((clr >> 16) & 0xff) / 255.f, + static_cast((clr >> 8) & 0xff) / 255.f, + static_cast(clr & 0xff) / 255.f, + }); } -void GXColor1x8(u8 idx) { - GXColor1x16(idx); +void GXColor1u16(u16 clr) { + sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGB, GX_RGB565); + sStreamState->append(aurora::Vec4{ + static_cast((clr >> 11) & 0x1f) / 31.f, + static_cast((clr >> 5) & 0x3f) / 63.f, + static_cast(clr & 0x1f) / 31.f, + 1.f, + }); } -void GXTexCoord2f32(float u, float v) { - check_attr_order(GX_VA_TEX0); - sStreamState->vertexBuffer.append(&u, 4); - sStreamState->vertexBuffer.append(&v, 4); +void GXTexCoord2f32(f32 s, f32 t) { + sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_F32); + sStreamState->append(aurora::Vec2{s, t}); +} + +void GXTexCoord2u16(u16 s, u16 t) { + const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_U16); + sStreamState->append(aurora::Vec2{ + static_cast(s) / static_cast(1 << frac), + static_cast(t) / static_cast(1 << frac), + }); } void GXTexCoord2s16(s16 s, s16 t) { - const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_TEX0]; - GXTexCoord2f32( - static_cast(s) / static_cast(1 << attrFmt.frac), - static_cast(t) / static_cast(1 << attrFmt.frac) - ); + const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_S16); + sStreamState->append(aurora::Vec2{ + static_cast(s) / static_cast(1 << frac), + static_cast(t) / static_cast(1 << frac), + }); } -void GXTexCoord1x16(u16 idx) { - check_attr_order(GX_VA_TEX0); - // keep aligned - if (sStreamState->vertexBuffer.size() % 4 != 0) { - sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4)); - } - sStreamState->vertexBuffer.append(&idx, 2); +void GXTexCoord2u8(u8 s, u8 t) { + const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_U8); + sStreamState->append(aurora::Vec2{ + static_cast(s) / static_cast(1 << frac), + static_cast(t) / static_cast(1 << frac), + }); } -void GXTexCoord1x8(u8 idx) { - GXTexCoord1x16(idx); +void GXTexCoord2s8(s8 s, s8 t) { + const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_S8); + sStreamState->append(aurora::Vec2{ + static_cast(s) / static_cast(1 << frac), + static_cast(t) / static_cast(1 << frac), + }); +} + +void GXTexCoord1f32(f32 s) { + sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_F32); + sStreamState->append(aurora::Vec2{s, 0.f}); +} + +void GXTexCoord1u16(u16 s) { + const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_U16); + sStreamState->append(aurora::Vec2{ + static_cast(s) / static_cast(1 << frac), + 0.f, + }); +} + +void GXTexCoord1s16(s16 s) { + const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_S16); + sStreamState->append(aurora::Vec2{ + static_cast(s) / static_cast(1 << frac), + 0.f, + }); +} + +void GXTexCoord1u8(u8 s) { + const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_U8); + sStreamState->append(aurora::Vec2{ + static_cast(s) / static_cast(1 << frac), + 0.f, + }); +} + +void GXTexCoord1s8(s8 s) { + const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_S8); + sStreamState->append(aurora::Vec2{ + static_cast(s) / static_cast(1 << frac), + 0.f, + }); +} + +void GXTexCoord1x16(u16 index) { + sStreamState->check_indexed(GX_VA_TEX0, GX_INDEX16); + sStreamState->append(index); +} + +void GXTexCoord1x8(u8 index) { + sStreamState->check_indexed(GX_VA_TEX0, GX_INDEX8); + sStreamState->append(static_cast(index)); } void GXEnd() { @@ -282,27 +439,55 @@ void GXEnd() { } const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size()); const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices}); - if (g_gxState.stateDirty) { - aurora::gfx::stream::PipelineConfig config{}; - populate_pipeline_config(config, GX_TRIANGLES); - const auto info = build_shader_info(config.shaderConfig); - const auto pipeline = aurora::gfx::pipeline_ref(config); - aurora::gfx::push_draw_command(aurora::gfx::stream::DrawData{ - .pipeline = pipeline, - .vertRange = vertRange, - .uniformRange = build_uniform(info), - .indexRange = indexRange, - .indexCount = static_cast(sStreamState->indices.size()), - .bindGroups = build_bind_groups(info, config.shaderConfig, {}), - .dstAlpha = g_gxState.dstAlpha, - }); - } else { - aurora::gfx::merge_draw_command(aurora::gfx::stream::DrawData{ - .vertRange = vertRange, - .indexRange = indexRange, - .indexCount = static_cast(sStreamState->indices.size()), - }); + + aurora::gfx::gx::BindGroupRanges ranges{}; + for (int i = 0; i < GX_VA_MAX_ATTR; ++i) { + if (g_gxState.vtxDesc[i] != GX_INDEX8 && g_gxState.vtxDesc[i] != GX_INDEX16) { + continue; + } + auto& array = g_gxState.arrays[i]; + if (array.cachedRange.size > 0) { + // Use the currently cached range + ranges.vaRanges[i] = array.cachedRange; + } else { + // Push array data to storage and cache range + const auto range = aurora::gfx::push_storage(static_cast(array.data), array.size); + ranges.vaRanges[i] = range; + array.cachedRange = range; + } } + + // if (g_gxState.stateDirty) { + aurora::gfx::model::PipelineConfig config{}; + GXPrimitive primitive = GX_TRIANGLES; + switch (sStreamState->primitive) { + case GX_TRIANGLESTRIP: + primitive = GX_TRIANGLESTRIP; + break; + default: + break; + } + populate_pipeline_config(config, primitive, sStreamState->vtxFmt); + const auto info = build_shader_info(config.shaderConfig); + const auto bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, ranges); + const auto pipeline = aurora::gfx::pipeline_ref(config); + aurora::gfx::push_draw_command(aurora::gfx::model::DrawData{ + .pipeline = pipeline, + .vertRange = vertRange, + .idxRange = indexRange, + .dataRanges = ranges, + .uniformRange = build_uniform(info), + .indexCount = static_cast(sStreamState->indices.size()), + .bindGroups = bindGroups, + .dstAlpha = g_gxState.dstAlpha, + }); + // } else { + // aurora::gfx::merge_draw_command(aurora::gfx::model::DrawData{ + // .vertRange = vertRange, + // .idxRange = indexRange, + // .indexCount = static_cast(sStreamState->indices.size()), + // }); + // } lastVertexStart = sStreamState->vertexStart + sStreamState->vertexCount; sStreamState.reset(); } diff --git a/lib/gfx/common.cpp b/lib/gfx/common.cpp index 968b90a..0e12d82 100644 --- a/lib/gfx/common.cpp +++ b/lib/gfx/common.cpp @@ -3,7 +3,6 @@ #include "../internal.hpp" #include "../webgpu/gpu.hpp" #include "model/shader.hpp" -#include "stream/shader.hpp" #include "texture.hpp" #include @@ -11,7 +10,6 @@ #include #include #include -#include #include #include @@ -37,13 +35,11 @@ constexpr uint64_t StagingBufferSize = UniformBufferSize + VertexBufferSize + IndexBufferSize + StorageBufferSize + TextureUploadSize; struct ShaderState { - stream::State stream; model::State model; }; struct ShaderDrawCommand { ShaderType type; union { - stream::DrawData stream; model::DrawData model; }; }; @@ -168,10 +164,9 @@ static u32 g_serializedPipelineCount = 0; template static void serialize_pipeline_config(ShaderType type, const PipelineConfig& config) { static_assert(std::has_unique_object_representations_v); - g_serializedPipelines.append(&type, sizeof(type)); - const u32 configSize = sizeof(config); - g_serializedPipelines.append(&configSize, sizeof(configSize)); - g_serializedPipelines.append(&config, configSize); + g_serializedPipelines.append(type); + g_serializedPipelines.append(sizeof(config)); + g_serializedPipelines.append(config); ++g_serializedPipelineCount; } @@ -278,33 +273,19 @@ void resolve_pass(TextureHandle texture, ClipRect rect, bool clear, Vec4 ++g_currentRenderPass; } -template <> -const stream::State& get_state() { - return g_state.stream; -} - -template <> -void push_draw_command(stream::DrawData data) { - push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data}); -} - -template <> -void merge_draw_command(stream::DrawData data) { - auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream; - CHECK(last.vertRange.offset + last.vertRange.size == data.vertRange.offset, "Invalid vertex merge range: {} -> {}", - last.vertRange.offset + last.vertRange.size, data.vertRange.offset); - CHECK(last.indexRange.offset + last.indexRange.size == data.indexRange.offset, "Invalid index merge range: {} -> {}", - last.indexRange.offset + last.indexRange.size, data.indexRange.offset); - last.vertRange.size += data.vertRange.size; - last.indexRange.size += data.indexRange.size; - last.indexCount += data.indexCount; - ++g_mergedDrawCallCount; -} - -template <> -PipelineRef pipeline_ref(stream::PipelineConfig config) { - return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); }); -} +// template <> +// void merge_draw_command(stream::DrawData data) { +// auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream; +// CHECK(last.vertRange.offset + last.vertRange.size == data.vertRange.offset, "Invalid vertex merge range: {} -> {}", +// last.vertRange.offset + last.vertRange.size, data.vertRange.offset); +// CHECK(last.indexRange.offset + last.indexRange.size == data.indexRange.offset, "Invalid index merge range: {} -> +// {}", +// last.indexRange.offset + last.indexRange.size, data.indexRange.offset); +// last.vertRange.size += data.vertRange.size; +// last.indexRange.size += data.indexRange.size; +// last.indexCount += data.indexCount; +// ++g_mergedDrawCallCount; +// } template <> void push_draw_command(model::DrawData data) { @@ -378,16 +359,6 @@ void load_pipeline_cache() { u32 size = *reinterpret_cast(pipelineCache.data() + offset); offset += sizeof(u32); switch (type) { - case ShaderType::Stream: { - if (size != sizeof(stream::PipelineConfig)) { - break; - } - const auto config = *reinterpret_cast(pipelineCache.data() + offset); - if (config.version != gx::GXPipelineConfigVersion) { - break; - } - find_pipeline(type, config, [=]() { return stream::create_pipeline(g_state.stream, config); }, true); - } break; case ShaderType::Model: { if (size != sizeof(model::PipelineConfig)) { break; @@ -397,9 +368,10 @@ void load_pipeline_cache() { break; } find_pipeline(type, config, [=]() { return model::create_pipeline(g_state.model, config); }, true); - } break; + break; + } default: - Log.warn("Unknown pipeline type {}", static_cast(type)); + Log.warn("Unknown pipeline type {}", underlying(type)); break; } offset += size; @@ -459,7 +431,6 @@ void initialize() { } map_staging_buffer(); - g_state.stream = stream::construct_state(); g_state.model = model::construct_state(); load_pipeline_cache(); @@ -581,6 +552,9 @@ void end_frame(const wgpu::CommandEncoder& cmd) { currentStagingBuffer = (currentStagingBuffer + 1) % g_stagingBuffers.size(); map_staging_buffer(); g_currentRenderPass = UINT32_MAX; + for (auto& array : gx::g_gxState.arrays) { + array.cachedRange = {}; + } if (!g_hasPipelineThread) { pipeline_worker(); @@ -612,7 +586,7 @@ void render(wgpu::CommandEncoder& cmd) { .view = webgpu::g_depthBuffer.view, .depthLoadOp = passInfo.clear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load, .depthStoreOp = wgpu::StoreOp::Store, - .depthClearValue = 1.f, + .depthClearValue = gx::UseReversedZ ? 0.f : 1.f, }; const auto label = fmt::format("Render pass {}", i); const wgpu::RenderPassDescriptor renderPassDescriptor{ @@ -680,7 +654,9 @@ void render_pass(const wgpu::RenderPassEncoder& pass, u32 idx) { switch (cmd.type) { case CommandType::SetViewport: { const auto& vp = cmd.data.setViewport; - pass.SetViewport(vp.left, vp.top, vp.width, vp.height, vp.znear, vp.zfar); + const float minDepth = gx::UseReversedZ ? 1.f - vp.zfar : vp.znear; + const float maxDepth = gx::UseReversedZ ? 1.f - vp.znear : vp.zfar; + pass.SetViewport(vp.left, vp.top, vp.width, vp.height, minDepth, maxDepth); } break; case CommandType::SetScissor: { const auto& sc = cmd.data.setScissor; @@ -694,9 +670,6 @@ void render_pass(const wgpu::RenderPassEncoder& pass, u32 idx) { case CommandType::Draw: { const auto& draw = cmd.data.draw; switch (draw.type) { - case ShaderType::Stream: - stream::render(g_state.stream, draw.stream, pass); - break; case ShaderType::Model: model::render(g_state.model, draw.model, pass); break; diff --git a/lib/gfx/common.hpp b/lib/gfx/common.hpp index 6bde6e9..89f798d 100644 --- a/lib/gfx/common.hpp +++ b/lib/gfx/common.hpp @@ -56,8 +56,7 @@ public: ByteBuffer() noexcept = default; explicit ByteBuffer(size_t size) noexcept : m_data(static_cast(calloc(1, size))), m_length(size), m_capacity(size) {} - explicit ByteBuffer(uint8_t* data, size_t size) noexcept - : m_data(data), m_capacity(size), m_owned(false) {} + explicit ByteBuffer(uint8_t* data, size_t size) noexcept : m_data(data), m_capacity(size), m_owned(false) {} ~ByteBuffer() noexcept { if (m_data != nullptr && m_owned) { free(m_data); @@ -98,6 +97,11 @@ public: m_length += size; } + template + void append(const T& obj) { + append(&obj, sizeof(T)); + } + void append_zeroes(size_t size) { resize(m_length + size, true); m_length += size; @@ -179,8 +183,7 @@ struct TextureRef; using TextureHandle = std::shared_ptr; enum class ShaderType : uint8_t { - Stream, - Model, + Model = 1, }; void initialize(); diff --git a/lib/gfx/gx.cpp b/lib/gfx/gx.cpp index edbd69f..ad52148 100644 --- a/lib/gfx/gx.cpp +++ b/lib/gfx/gx.cpp @@ -7,7 +7,6 @@ #include #include -#include using aurora::gfx::gx::g_gxState; static aurora::Module Log("aurora::gx"); @@ -25,7 +24,7 @@ const TextureBind& get_texture(GXTexMapID id) noexcept { return g_gxState.textur static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) { switch (fac) { - DEFAULT_FATAL("invalid blend factor {}", static_cast(fac)); + DEFAULT_FATAL("invalid blend factor {}", underlying(fac)); case GX_BL_ZERO: return wgpu::BlendFactor::Zero; case GX_BL_ONE: @@ -55,21 +54,21 @@ static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) { static inline wgpu::CompareFunction to_compare_function(GXCompare func) { switch (func) { - DEFAULT_FATAL("invalid depth fn {}", static_cast(func)); + DEFAULT_FATAL("invalid depth fn {}", underlying(func)); case GX_NEVER: return wgpu::CompareFunction::Never; case GX_LESS: - return wgpu::CompareFunction::Less; + return UseReversedZ ? wgpu::CompareFunction::Greater : wgpu::CompareFunction::Less; case GX_EQUAL: return wgpu::CompareFunction::Equal; case GX_LEQUAL: - return wgpu::CompareFunction::LessEqual; + return UseReversedZ ? wgpu::CompareFunction::GreaterEqual : wgpu::CompareFunction::LessEqual; case GX_GREATER: - return wgpu::CompareFunction::Greater; + return UseReversedZ ? wgpu::CompareFunction::Less : wgpu::CompareFunction::Greater; case GX_NEQUAL: return wgpu::CompareFunction::NotEqual; case GX_GEQUAL: - return wgpu::CompareFunction::GreaterEqual; + return UseReversedZ ? wgpu::CompareFunction::LessEqual : wgpu::CompareFunction::GreaterEqual; case GX_ALWAYS: return wgpu::CompareFunction::Always; } @@ -79,7 +78,7 @@ static inline wgpu::BlendState to_blend_state(GXBlendMode mode, GXBlendFactor sr GXLogicOp op, u32 dstAlpha) { wgpu::BlendComponent colorBlendComponent; switch (mode) { - DEFAULT_FATAL("unsupported blend mode {}", static_cast(mode)); + DEFAULT_FATAL("unsupported blend mode {}", underlying(mode)); case GX_BM_NONE: colorBlendComponent = { .operation = wgpu::BlendOperation::Add, @@ -103,7 +102,7 @@ static inline wgpu::BlendState to_blend_state(GXBlendMode mode, GXBlendFactor sr break; case GX_BM_LOGIC: switch (op) { - DEFAULT_FATAL("unsupported logic op {}", static_cast(op)); + DEFAULT_FATAL("unsupported logic op {}", underlying(op)); case GX_LO_CLEAR: colorBlendComponent = { .operation = wgpu::BlendOperation::Add, @@ -160,7 +159,7 @@ static inline wgpu::ColorWriteMask to_write_mask(bool colorUpdate, bool alphaUpd static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCullMode gx_cullMode) { wgpu::PrimitiveTopology primitive = wgpu::PrimitiveTopology::TriangleList; switch (gx_prim) { - DEFAULT_FATAL("unsupported primitive type {}", static_cast(gx_prim)); + DEFAULT_FATAL("unsupported primitive type {}", underlying(gx_prim)); case GX_TRIANGLES: break; case GX_TRIANGLESTRIP: @@ -169,7 +168,7 @@ static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCul } wgpu::CullMode cullMode = wgpu::CullMode::None; switch (gx_cullMode) { - DEFAULT_FATAL("unsupported cull mode {}", static_cast(gx_cullMode)); + DEFAULT_FATAL("unsupported cull mode {}", underlying(gx_cullMode)); case GX_CULL_FRONT: cullMode = wgpu::CullMode::Front; break; @@ -193,14 +192,6 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn .format = g_graphicsConfig.depthFormat, .depthWriteEnabled = config.depthUpdate, .depthCompare = to_compare_function(config.depthFunc), - .stencilFront = - wgpu::StencilFaceState{ - .compare = wgpu::CompareFunction::Always, - }, - .stencilBack = - wgpu::StencilFaceState{ - .compare = wgpu::CompareFunction::Always, - }, }; const auto blendState = to_blend_state(config.blendMode, config.blendFacSrc, config.blendFacDst, config.blendOp, config.dstAlpha); @@ -249,25 +240,23 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn return g_device.CreateRenderPipeline(&descriptor); } -void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noexcept { +void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept { + const auto& vtxFmt = g_gxState.vtxFmts[fmt]; config.shaderConfig.fogType = g_gxState.fog.type; config.shaderConfig.vtxAttrs = g_gxState.vtxDesc; - int lastIndexedAttr = -1; for (int i = 0; i < GX_VA_MAX_ATTR; ++i) { const auto type = g_gxState.vtxDesc[i]; if (type != GX_INDEX8 && type != GX_INDEX16) { - config.shaderConfig.attrMapping[i] = GX_VA_NULL; + config.shaderConfig.attrMapping[i] = {}; continue; } - const auto& array = g_gxState.arrays[i]; - if (lastIndexedAttr >= 0 && array == g_gxState.arrays[lastIndexedAttr]) { - // Map attribute to previous attribute - config.shaderConfig.attrMapping[i] = config.shaderConfig.attrMapping[lastIndexedAttr]; - } else { - // Map attribute to its own storage - config.shaderConfig.attrMapping[i] = static_cast(i); - } - lastIndexedAttr = i; + // Map attribute to its own storage + config.shaderConfig.attrMapping[i] = StorageConfig { + .attr = static_cast(i), + .cnt = vtxFmt.attrs[i].cnt, + .compType = vtxFmt.attrs[i].type, + .frac = vtxFmt.attrs[i].frac, + }; } config.shaderConfig.tevSwapTable = g_gxState.tevSwapTable; for (u8 i = 0; i < g_gxState.numTevStages; ++i) { @@ -328,14 +317,14 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noe Range build_uniform(const ShaderInfo& info) noexcept { auto [buf, range] = map_uniform(info.uniformSize); { - buf.append(&g_gxState.pnMtx[g_gxState.currentPnMtx], 128); - buf.append(&g_gxState.proj, 64); + buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]); + buf.append(g_gxState.proj); } for (int i = 0; i < info.loadsTevReg.size(); ++i) { if (!info.loadsTevReg.test(i)) { continue; } - buf.append(&g_gxState.colorRegs[i], 16); + buf.append(g_gxState.colorRegs[i]); } bool lightingEnabled = false; for (int i = 0; i < info.sampledColorChannels.size(); ++i) { @@ -352,11 +341,10 @@ Range build_uniform(const ShaderInfo& info) noexcept { if (lightingEnabled) { // Lights static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights); - buf.append(&g_gxState.lights, 80 * GX::MaxLights); + buf.append(g_gxState.lights); // Light state for all channels for (int i = 0; i < 4; ++i) { - u32 lightState = g_gxState.colorChannelState[i].lightMask.to_ulong(); - buf.append(&lightState, 4); + buf.append(g_gxState.colorChannelState[i].lightMask.to_ulong()); } } for (int i = 0; i < info.sampledColorChannels.size(); ++i) { @@ -366,25 +354,25 @@ Range build_uniform(const ShaderInfo& info) noexcept { const auto& ccc = g_gxState.colorChannelConfig[i * 2]; const auto& ccs = g_gxState.colorChannelState[i * 2]; if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) { - buf.append(&ccs.ambColor, 16); + buf.append(ccs.ambColor); } if (ccc.matSrc == GX_SRC_REG) { - buf.append(&ccs.matColor, 16); + buf.append(ccs.matColor); } const auto& ccca = g_gxState.colorChannelConfig[i * 2 + 1]; const auto& ccsa = g_gxState.colorChannelState[i * 2 + 1]; if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) { - buf.append(&ccsa.ambColor, 16); + buf.append(ccsa.ambColor); } if (ccca.matSrc == GX_SRC_REG) { - buf.append(&ccsa.matColor, 16); + buf.append(ccsa.matColor); } } for (int i = 0; i < info.sampledKColors.size(); ++i) { if (!info.sampledKColors.test(i)) { continue; } - buf.append(&g_gxState.kcolors[i], 16); + buf.append(g_gxState.kcolors[i]); } for (int i = 0; i < info.usesTexMtx.size(); ++i) { if (!info.usesTexMtx.test(i)) { @@ -392,26 +380,16 @@ Range build_uniform(const ShaderInfo& info) noexcept { } const auto& state = g_gxState; switch (info.texMtxTypes[i]) { - DEFAULT_FATAL("unhandled tex mtx type {}", static_cast(info.texMtxTypes[i])); + DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i])); case GX_TG_MTX2x4: - if (std::holds_alternative>(state.texMtxs[i])) { - buf.append(&std::get>(state.texMtxs[i]), 32); - } else if (std::holds_alternative>(g_gxState.texMtxs[i])) { - // TODO: SMB hits this? - Mat4x2 mtx{ - {1.f, 0.f}, - {0.f, 1.f}, - {0.f, 0.f}, - {0.f, 0.f}, - }; - buf.append(&mtx, 32); + if (std::holds_alternative>(state.texMtxs[i])) { + buf.append(std::get>(state.texMtxs[i])); } else UNLIKELY FATAL("expected 2x4 mtx in idx {}", i); break; case GX_TG_MTX3x4: - if (std::holds_alternative>(g_gxState.texMtxs[i])) { - const auto& mat = std::get>(g_gxState.texMtxs[i]); - buf.append(&mat, 64); + if (std::holds_alternative>(g_gxState.texMtxs[i])) { + buf.append(std::get>(g_gxState.texMtxs[i])); } else UNLIKELY FATAL("expected 3x4 mtx in idx {}", i); break; @@ -421,18 +399,11 @@ Range build_uniform(const ShaderInfo& info) noexcept { if (!info.usesPTTexMtx.test(i)) { continue; } - buf.append(&g_gxState.ptTexMtxs[i], 64); + buf.append(g_gxState.ptTexMtxs[i]); } if (info.usesFog) { const auto& state = g_gxState.fog; - struct Fog { - Vec4 color = state.color; - float a = 0.f; - float b = 0.5f; - float c = 0.f; - float pad = FLT_MAX; - } fog{}; - static_assert(sizeof(Fog) == 32); + Fog fog{.color = state.color}; if (state.nearZ != state.farZ && state.startZ != state.endZ) { const float depthRange = state.farZ - state.nearZ; const float fogRange = state.endZ - state.startZ; @@ -440,7 +411,7 @@ Range build_uniform(const ShaderInfo& info) noexcept { fog.b = state.farZ / depthRange; fog.c = state.startZ / fogRange; } - buf.append(&fog, 32); + buf.append(fog); } for (int i = 0; i < info.sampledTextures.size(); ++i) { if (!info.sampledTextures.test(i)) { @@ -448,7 +419,7 @@ Range build_uniform(const ShaderInfo& info) noexcept { } const auto& tex = get_texture(static_cast(i)); CHECK(tex, "unbound texture {}", i); - buf.append(&tex.texObj.lodBias, 4); + buf.append(tex.texObj.lodBias); } g_gxState.stateDirty = false; return range; @@ -564,7 +535,7 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader }; u32 bindIdx = 1; for (int i = 0; i < GX_VA_MAX_ATTR; ++i) { - if (config.attrMapping[i] == static_cast(i)) { + if (config.attrMapping[i].attr == static_cast(i)) { uniformLayoutEntries[bindIdx] = wgpu::BindGroupLayoutEntry{ .binding = bindIdx, .visibility = wgpu::ShaderStage::Vertex, @@ -688,7 +659,7 @@ void shutdown() noexcept { static wgpu::AddressMode wgpu_address_mode(GXTexWrapMode mode) { switch (mode) { - DEFAULT_FATAL("invalid wrap mode {}", static_cast(mode)); + DEFAULT_FATAL("invalid wrap mode {}", underlying(mode)); case GX_CLAMP: return wgpu::AddressMode::ClampToEdge; case GX_REPEAT: @@ -735,8 +706,6 @@ wgpu::SamplerDescriptor TextureBind::get_descriptor() const noexcept { .magFilter = wgpu::FilterMode::Nearest, .minFilter = wgpu::FilterMode::Nearest, .mipmapFilter = wgpu::MipmapFilterMode::Nearest, - .lodMinClamp = 0.f, - .lodMaxClamp = 1000.f, .maxAnisotropy = 1, }; } @@ -750,8 +719,6 @@ wgpu::SamplerDescriptor TextureBind::get_descriptor() const noexcept { .magFilter = magFilter, .minFilter = minFilter, .mipmapFilter = mipFilter, - .lodMinClamp = 0.f, - .lodMaxClamp = 1000.f, .maxAnisotropy = wgpu_aniso(texObj.maxAniso), }; } diff --git a/lib/gfx/gx.hpp b/lib/gfx/gx.hpp index 40094fd..ba4a170 100644 --- a/lib/gfx/gx.hpp +++ b/lib/gfx/gx.hpp @@ -46,6 +46,11 @@ constexpr float GX_LARGE_NUMBER = -1048576.0f; #endif namespace aurora::gfx::gx { +constexpr bool EnableNormalVisualization = false; +constexpr bool EnableDebugPrints = false; +constexpr bool UsePerPixelLighting = true; +constexpr bool UseReversedZ = true; + constexpr u32 MaxTextures = GX_MAX_TEXMAP; constexpr u32 MaxTluts = 20; constexpr u32 MaxTevStages = GX_MAX_TEVSTAGE; @@ -144,8 +149,7 @@ struct ColorChannelState { Vec4 ambColor; GX::LightMask lightMask; }; -// Mat4x4 used instead of Mat4x3 for padding purposes -using TexMtxVariant = std::variant, Mat4x4>; +using TexMtxVariant = std::variant, Mat3x4>; struct TcgConfig { GXTexGenType type = GX_TG_MTX2x4; GXTexGenSrc src = GX_MAX_TEXGENSRC; @@ -213,10 +217,10 @@ struct VtxFmt { std::array attrs; }; struct PnMtx { - Mat4x4 pos; - Mat4x4 nrm; + Mat3x4 pos; + Mat3x4 nrm; }; -static_assert(sizeof(PnMtx) == sizeof(Mat4x4) * 2); +static_assert(sizeof(PnMtx) == sizeof(Mat3x4) * 2); struct Light { Vec4 pos{0.f, 0.f, 0.f}; Vec4 dir{0.f, 0.f, 0.f}; @@ -230,6 +234,14 @@ struct Light { bool operator!=(const Light& rhs) const { return !(*this == rhs); } }; static_assert(sizeof(Light) == 80); +struct Fog { + Vec4 color; + float a = 0.f; + float b = 0.5f; + float c = 0.f; + float pad = FLT_MAX; +}; +static_assert(sizeof(Fog) == 32); struct AttrArray { const void* data; u32 size; @@ -245,7 +257,6 @@ struct GXState { std::array pnMtx; u32 currentPnMtx; Mat4x4 proj; - Mat4x4 origProj; // for GXGetProjectionv GXProjectionType projType; // for GXGetProjectionv FogState fog; GXCullMode cullMode = GX_CULL_BACK; @@ -266,7 +277,7 @@ struct GXState { std::array textures; std::array tluts; std::array texMtxs; - std::array, MaxPTTexMtx> ptTexMtxs; + std::array, MaxPTTexMtx> ptTexMtxs; std::array tcgs; std::array vtxDesc; std::array vtxFmts; @@ -345,11 +356,18 @@ struct TextureConfig { bool operator==(const TextureConfig& rhs) const { return memcmp(this, &rhs, sizeof(*this)) == 0; } }; static_assert(std::has_unique_object_representations_v); +struct StorageConfig { + GXAttr attr = GX_VA_NULL; + GXCompCnt cnt = static_cast(0xFF); + GXCompType compType = static_cast(0xFF); + u8 frac = 0; + std::array pad{}; +}; struct ShaderConfig { GXFogType fogType; std::array vtxAttrs; // Mapping for indexed attributes -> storage buffer - std::array attrMapping; + std::array attrMapping; std::array tevSwapTable; std::array tevStages; u32 tevStageCount = 0; @@ -363,7 +381,7 @@ struct ShaderConfig { }; static_assert(std::has_unique_object_representations_v); -constexpr u32 GXPipelineConfigVersion = 4; +constexpr u32 GXPipelineConfigVersion = 5; struct PipelineConfig { u32 version = GXPipelineConfigVersion; ShaderConfig shaderConfig; @@ -405,7 +423,7 @@ struct ShaderInfo { struct BindGroupRanges { std::array vaRanges{}; }; -void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noexcept; +void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept; wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info, ArrayRef vtxBuffers, wgpu::ShaderModule shader, const char* label) noexcept; diff --git a/lib/gfx/gx_fmt.hpp b/lib/gfx/gx_fmt.hpp index e1e6ae7..a6ff7dc 100644 --- a/lib/gfx/gx_fmt.hpp +++ b/lib/gfx/gx_fmt.hpp @@ -1,3 +1,7 @@ +#pragma once + +#include "../internal.hpp" + #include #include #include @@ -25,7 +29,7 @@ inline std::string format_as(const GXTevOp& op) { case GX_TEV_COMP_RGB8_EQ: return "GX_TEV_COMP_RGB8_EQ"; default: - return fmt::format("GXTevOp({})", static_cast(op)); + return fmt::format("GXTevOp({})", underlying(op)); } } @@ -64,7 +68,7 @@ inline std::string format_as(const GXTevColorArg& arg) { case GX_CC_ZERO: return "GX_CC_ZERO"; default: - return fmt::format("GXTevColorArg({})", static_cast(arg)); + return fmt::format("GXTevColorArg({})", underlying(arg)); } } @@ -87,7 +91,7 @@ inline std::string format_as(const GXTevAlphaArg& arg) { case GX_CA_ZERO: return "GX_CA_ZERO"; default: - return fmt::format("GXTevAlphaArg({})", static_cast(arg)); + return fmt::format("GXTevAlphaArg({})", underlying(arg)); } } @@ -118,7 +122,7 @@ inline std::string format_as(const GXTexGenSrc& src) { case GX_TG_TEX7: return "GX_TG_TEX7"; default: - return fmt::format("GXTexGenSrc({})", static_cast(src)); + return fmt::format("GXTexGenSrc({})", underlying(src)); } } @@ -133,7 +137,7 @@ inline std::string format_as(const GXTexGenType& type) { case GX_TG_BUMP1: return "GX_TG_BUMP1"; default: - return fmt::format("GXTexGenType({})", static_cast(type)); + return fmt::format("GXTexGenType({})", underlying(type)); } } @@ -146,7 +150,7 @@ inline std::string format_as(const GXTevBias& bias) { case GX_TB_SUBHALF: return "GX_TB_SUBHALF"; default: - return fmt::format("GXTevBias({})", static_cast(bias)); + return fmt::format("GXTevBias({})", underlying(bias)); } } @@ -161,7 +165,7 @@ inline std::string format_as(const GXTevScale& scale) { case GX_CS_DIVIDE_2: return "GX_CS_DIVIDE_2"; default: - return fmt::format("GXTevScale({})", static_cast(scale)); + return fmt::format("GXTevScale({})", underlying(scale)); } } @@ -176,7 +180,7 @@ inline std::string format_as(const GXTevRegID& reg) { case GX_TEVREG2: return "GX_TEVREG2"; default: - return fmt::format("GXTevRegID({})", static_cast(reg)); + return fmt::format("GXTevRegID({})", underlying(reg)); } } @@ -231,7 +235,7 @@ inline std::string format_as(const GXTevKColorSel& sel) { case GX_TEV_KCSEL_K3_A: return "GX_TEV_KCSEL_K3_A"; default: - return fmt::format("GXTevKColorSel({})", static_cast(sel)); + return fmt::format("GXTevKColorSel({})", underlying(sel)); } } @@ -286,7 +290,7 @@ inline std::string format_as(const GXTevKAlphaSel& sel) { case GX_TEV_KASEL_K3_A: return "GX_TEV_KASEL_K3_A"; default: - return fmt::format("GXTevKAlphaSel({})", static_cast(sel)); + return fmt::format("GXTevKAlphaSel({})", underlying(sel)); } } @@ -313,7 +317,7 @@ inline std::string format_as(const GXTexMapID& id) { case GX_TEX_DISABLE: return "GX_TEX_DISABLE"; default: - return fmt::format("GXTexMapID({})", static_cast(id)); + return fmt::format("GXTexMapID({})", underlying(id)); } } @@ -340,7 +344,7 @@ inline std::string format_as(const GXChannelID& id) { case GX_COLOR_NULL: return "GX_COLOR_NULL"; default: - return fmt::format("GXChannelID({})", static_cast(id)); + return fmt::format("GXChannelID({})", underlying(id)); } } @@ -351,7 +355,7 @@ inline std::string format_as(const GXColorSrc& src) { case GX_SRC_VTX: return "GX_SRC_VTX"; default: - return fmt::format("GXColorSrc({})", static_cast(src)); + return fmt::format("GXColorSrc({})", underlying(src)); } } @@ -380,7 +384,7 @@ inline std::string format_as(const GXTexMtx& mtx) { case GX_IDENTITY: return "GX_IDENTITY"; default: - return fmt::format("GXTexMtx({})", static_cast(mtx)); + return fmt::format("GXTexMtx({})", underlying(mtx)); } } @@ -429,7 +433,7 @@ inline std::string format_as(const GXPTTexMtx& mtx) { case GX_PTIDENTITY: return "GX_PTIDENTITY"; default: - return fmt::format("GXPTTexMtx({})", static_cast(mtx)); + return fmt::format("GXPTTexMtx({})", underlying(mtx)); } } @@ -452,7 +456,7 @@ inline std::string format_as(const GXCompare& comp) { case GX_ALWAYS: return "GX_ALWAYS"; default: - return fmt::format("GXCompare({})", static_cast(comp)); + return fmt::format("GXCompare({})", underlying(comp)); } } @@ -467,7 +471,7 @@ inline std::string format_as(const GXAlphaOp& op) { case GX_AOP_XNOR: return "GX_AOP_XNOR"; default: - return fmt::format("GXAlphaOp({})", static_cast(op)); + return fmt::format("GXAlphaOp({})", underlying(op)); } } @@ -496,7 +500,7 @@ inline std::string format_as(const GXFogType& type) { case GX_FOG_ORTHO_REVEXP2: return "GX_FOG_ORTHO_REVEXP2"; default: - return fmt::format("GXFogType({})", static_cast(type)); + return fmt::format("GXFogType({})", underlying(type)); } } @@ -521,6 +525,158 @@ inline std::string format_as(const GXTexCoordID& id) { case GX_TEXCOORD_NULL: return "GX_TEXCOORD_NULL"; default: - return fmt::format("GXTexCoordID({})", static_cast(id)); + return fmt::format("GXTexCoordID({})", underlying(id)); + } +} + +inline std::string format_as(const GXPrimitive& prim) { + switch (prim) { + case GX_QUADS: + return "GX_QUADS"; + case GX_TRIANGLES: + return "GX_TRIANGLES"; + case GX_TRIANGLESTRIP: + return "GX_TRIANGLESTRIP"; + case GX_TRIANGLEFAN: + return "GX_TRIANGLEFAN"; + case GX_LINES: + return "GX_LINES"; + case GX_LINESTRIP: + return "GX_LINESTRIP"; + case GX_POINTS: + return "GX_POINTS"; + default: + return fmt::format("GXPrimitive({})", underlying(prim)); + } +} + +inline std::string format_as(const GXAttr& attr) { + switch (attr) { + case GX_VA_PNMTXIDX: + return "GX_VA_PNMTXIDX"; + case GX_VA_TEX0MTXIDX: + return "GX_VA_TEX0MTXIDX"; + case GX_VA_TEX1MTXIDX: + return "GX_VA_TEX1MTXIDX"; + case GX_VA_TEX2MTXIDX: + return "GX_VA_TEX2MTXIDX"; + case GX_VA_TEX3MTXIDX: + return "GX_VA_TEX3MTXIDX"; + case GX_VA_TEX4MTXIDX: + return "GX_VA_TEX4MTXIDX"; + case GX_VA_TEX5MTXIDX: + return "GX_VA_TEX5MTXIDX"; + case GX_VA_TEX6MTXIDX: + return "GX_VA_TEX6MTXIDX"; + case GX_VA_TEX7MTXIDX: + return "GX_VA_TEX7MTXIDX"; + case GX_VA_POS: + return "GX_VA_POS"; + case GX_VA_NRM: + return "GX_VA_NRM"; + case GX_VA_CLR0: + return "GX_VA_CLR0"; + case GX_VA_CLR1: + return "GX_VA_CLR1"; + case GX_VA_TEX0: + return "GX_VA_TEX0"; + case GX_VA_TEX1: + return "GX_VA_TEX1"; + case GX_VA_TEX2: + return "GX_VA_TEX2"; + case GX_VA_TEX3: + return "GX_VA_TEX3"; + case GX_VA_TEX4: + return "GX_VA_TEX4"; + case GX_VA_TEX5: + return "GX_VA_TEX5"; + case GX_VA_TEX6: + return "GX_VA_TEX6"; + case GX_VA_TEX7: + return "GX_VA_TEX7"; + case GX_POS_MTX_ARRAY: + return "GX_POS_MTX_ARRAY"; + case GX_NRM_MTX_ARRAY: + return "GX_NRM_MTX_ARRAY"; + case GX_TEX_MTX_ARRAY: + return "GX_TEX_MTX_ARRAY"; + case GX_LIGHT_ARRAY: + return "GX_LIGHT_ARRAY"; + case GX_VA_NBT: + return "GX_VA_NBT"; + case GX_VA_NULL: + return "GX_VA_NULL"; + default: + return fmt::format("GXAttr({})", underlying(attr)); + } +} + +inline std::string format_as(const GXCompCnt& cnt) { + switch (cnt) { + case GX_POS_XY: + return "GX_POS_XY|GX_NRM_XYZ|GX_CLR_RGB|GX_TEX_S"; + case GX_POS_XYZ: + return "GX_POS_XYZ|GX_NRM_NBT|GX_CLR_RGBA|GX_TEX_ST"; + case GX_NRM_NBT3: + return "GX_NRM_NBT3"; + default: + return fmt::format("GXCompCnt({})", underlying(cnt)); + } +} + +inline std::string format_as(const GXCompType& type) { + switch (type) { + case GX_U8: + return "GX_U8|GX_RGB565"; + case GX_S8: + return "GX_S8|GX_RGB8"; + case GX_U16: + return "GX_U16|GX_RGBX8"; + case GX_S16: + return "GX_S16|GX_RGBA4"; + case GX_F32: + return "GX_F32|GX_RGBA6"; + case GX_RGBA8: + return "GX_RGBA8"; + default: + return fmt::format("GXCompType({})", underlying(type)); + } +} + +inline std::string format_as(const GXAttrType& type) { + switch (type) { + case GX_NONE: + return "GX_NONE"; + case GX_DIRECT: + return "GX_DIRECT"; + case GX_INDEX8: + return "GX_INDEX8"; + case GX_INDEX16: + return "GX_INDEX16"; + default: + return fmt::format("GXAttrType({})", underlying(type)); + } +} + +inline std::string format_as(const GXVtxFmt& fmt) { + switch (fmt) { + case GX_VTXFMT0: + return "GX_VTXFMT0"; + case GX_VTXFMT1: + return "GX_VTXFMT1"; + case GX_VTXFMT2: + return "GX_VTXFMT2"; + case GX_VTXFMT3: + return "GX_VTXFMT3"; + case GX_VTXFMT4: + return "GX_VTXFMT4"; + case GX_VTXFMT5: + return "GX_VTXFMT5"; + case GX_VTXFMT6: + return "GX_VTXFMT6"; + case GX_VTXFMT7: + return "GX_VTXFMT7"; + default: + return fmt::format("GXVtxFmt({})", underlying(fmt)); } } diff --git a/lib/gfx/gx_shader.cpp b/lib/gfx/gx_shader.cpp index e5e5302..ad87a00 100644 --- a/lib/gfx/gx_shader.cpp +++ b/lib/gfx/gx_shader.cpp @@ -1,5 +1,6 @@ #include "common.hpp" +#include "../internal.hpp" #include "../webgpu/gpu.hpp" #include "gx.hpp" #include "gx_fmt.hpp" @@ -10,10 +11,6 @@ #include #include -constexpr bool EnableNormalVisualization = false; -constexpr bool EnableDebugPrints = false; -constexpr bool UsePerPixelLighting = true; - namespace aurora::gfx::gx { using namespace fmt::literals; using namespace std::string_literals; @@ -140,44 +137,44 @@ static bool formatHasAlpha(u32 format) { static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const ShaderConfig& config, const TevStage& stage) { switch (arg) { - DEFAULT_FATAL("invalid color arg {}", static_cast(arg)); + DEFAULT_FATAL("invalid color arg {}", underlying(arg)); case GX_CC_CPREV: return "prev.rgb"; case GX_CC_APREV: - return "vec3(prev.a)"; + return "vec3f(prev.a)"; case GX_CC_C0: return "tevreg0.rgb"; case GX_CC_A0: - return "vec3(tevreg0.a)"; + return "vec3f(tevreg0.a)"; case GX_CC_C1: return "tevreg1.rgb"; case GX_CC_A1: - return "vec3(tevreg1.a)"; + return "vec3f(tevreg1.a)"; case GX_CC_C2: return "tevreg2.rgb"; case GX_CC_A2: - return "vec3(tevreg2.a)"; + return "vec3f(tevreg2.a)"; case GX_CC_TEXC: { CHECK(stage.texMapId != GX_TEXMAP_NULL, "unmapped texture for stage {}", stageIdx); CHECK(stage.texMapId >= GX_TEXMAP0 && stage.texMapId <= GX_TEXMAP7, "invalid texture {} for stage {}", - static_cast(stage.texMapId), stageIdx); + underlying(stage.texMapId), stageIdx); const auto& swap = config.tevSwapTable[stage.tevSwapTex]; return fmt::format("sampled{}.{}{}{}", stageIdx, chan_comp(swap.red), chan_comp(swap.green), chan_comp(swap.blue)); } case GX_CC_TEXA: { CHECK(stage.texMapId != GX_TEXMAP_NULL, "unmapped texture for stage {}", stageIdx); CHECK(stage.texMapId >= GX_TEXMAP0 && stage.texMapId <= GX_TEXMAP7, "invalid texture {} for stage {}", - static_cast(stage.texMapId), stageIdx); + underlying(stage.texMapId), stageIdx); const auto& swap = config.tevSwapTable[stage.tevSwapTex]; - return fmt::format("vec3(sampled{}.{})", stageIdx, chan_comp(swap.alpha)); + return fmt::format("vec3f(sampled{}.{})", stageIdx, chan_comp(swap.alpha)); } case GX_CC_RASC: { CHECK(stage.channelId != GX_COLOR_NULL, "unmapped color channel for stage {}", stageIdx); if (stage.channelId == GX_COLOR_ZERO) { - return "vec3(0.0)"; + return "vec3f(0.0)"; } CHECK(stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1, "invalid color channel {} for stage {}", - static_cast(stage.channelId), stageIdx); + underlying(stage.channelId), stageIdx); u32 idx = stage.channelId - GX_COLOR0A0; const auto& swap = config.tevSwapTable[stage.tevSwapRas]; return fmt::format("rast{}.{}{}{}", idx, chan_comp(swap.red), chan_comp(swap.green), chan_comp(swap.blue)); @@ -185,37 +182,37 @@ static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const Shade case GX_CC_RASA: { CHECK(stage.channelId != GX_COLOR_NULL, "unmapped color channel for stage {}", stageIdx); if (stage.channelId == GX_COLOR_ZERO) { - return "vec3(0.0)"; + return "vec3f(0.0)"; } CHECK(stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1, "invalid color channel {} for stage {}", - static_cast(stage.channelId), stageIdx); + underlying(stage.channelId), stageIdx); u32 idx = stage.channelId - GX_COLOR0A0; const auto& swap = config.tevSwapTable[stage.tevSwapRas]; - return fmt::format("vec3(rast{}.{})", idx, chan_comp(swap.alpha)); + return fmt::format("vec3f(rast{}.{})", idx, chan_comp(swap.alpha)); } case GX_CC_ONE: - return "vec3(1.0)"; + return "vec3f(1.0)"; case GX_CC_HALF: - return "vec3(0.5)"; + return "vec3f(0.5)"; case GX_CC_KONST: { switch (stage.kcSel) { - DEFAULT_FATAL("invalid kcSel {}", static_cast(stage.kcSel)); + DEFAULT_FATAL("invalid kcSel {}", underlying(stage.kcSel)); case GX_TEV_KCSEL_8_8: - return "vec3(1.0)"; + return "vec3f(1.0)"; case GX_TEV_KCSEL_7_8: - return "vec3(7.0/8.0)"; + return "vec3f(7.0/8.0)"; case GX_TEV_KCSEL_6_8: - return "vec3(6.0/8.0)"; + return "vec3f(6.0/8.0)"; case GX_TEV_KCSEL_5_8: - return "vec3(5.0/8.0)"; + return "vec3f(5.0/8.0)"; case GX_TEV_KCSEL_4_8: - return "vec3(4.0/8.0)"; + return "vec3f(4.0/8.0)"; case GX_TEV_KCSEL_3_8: - return "vec3(3.0/8.0)"; + return "vec3f(3.0/8.0)"; case GX_TEV_KCSEL_2_8: - return "vec3(2.0/8.0)"; + return "vec3f(2.0/8.0)"; case GX_TEV_KCSEL_1_8: - return "vec3(1.0/8.0)"; + return "vec3f(1.0/8.0)"; case GX_TEV_KCSEL_K0: return "ubuf.kcolor0.rgb"; case GX_TEV_KCSEL_K1: @@ -225,41 +222,41 @@ static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const Shade case GX_TEV_KCSEL_K3: return "ubuf.kcolor3.rgb"; case GX_TEV_KCSEL_K0_R: - return "vec3(ubuf.kcolor0.r)"; + return "vec3f(ubuf.kcolor0.r)"; case GX_TEV_KCSEL_K1_R: - return "vec3(ubuf.kcolor1.r)"; + return "vec3f(ubuf.kcolor1.r)"; case GX_TEV_KCSEL_K2_R: - return "vec3(ubuf.kcolor2.r)"; + return "vec3f(ubuf.kcolor2.r)"; case GX_TEV_KCSEL_K3_R: - return "vec3(ubuf.kcolor3.r)"; + return "vec3f(ubuf.kcolor3.r)"; case GX_TEV_KCSEL_K0_G: - return "vec3(ubuf.kcolor0.g)"; + return "vec3f(ubuf.kcolor0.g)"; case GX_TEV_KCSEL_K1_G: - return "vec3(ubuf.kcolor1.g)"; + return "vec3f(ubuf.kcolor1.g)"; case GX_TEV_KCSEL_K2_G: - return "vec3(ubuf.kcolor2.g)"; + return "vec3f(ubuf.kcolor2.g)"; case GX_TEV_KCSEL_K3_G: - return "vec3(ubuf.kcolor3.g)"; + return "vec3f(ubuf.kcolor3.g)"; case GX_TEV_KCSEL_K0_B: - return "vec3(ubuf.kcolor0.b)"; + return "vec3f(ubuf.kcolor0.b)"; case GX_TEV_KCSEL_K1_B: - return "vec3(ubuf.kcolor1.b)"; + return "vec3f(ubuf.kcolor1.b)"; case GX_TEV_KCSEL_K2_B: - return "vec3(ubuf.kcolor2.b)"; + return "vec3f(ubuf.kcolor2.b)"; case GX_TEV_KCSEL_K3_B: - return "vec3(ubuf.kcolor3.b)"; + return "vec3f(ubuf.kcolor3.b)"; case GX_TEV_KCSEL_K0_A: - return "vec3(ubuf.kcolor0.a)"; + return "vec3f(ubuf.kcolor0.a)"; case GX_TEV_KCSEL_K1_A: - return "vec3(ubuf.kcolor1.a)"; + return "vec3f(ubuf.kcolor1.a)"; case GX_TEV_KCSEL_K2_A: - return "vec3(ubuf.kcolor2.a)"; + return "vec3f(ubuf.kcolor2.a)"; case GX_TEV_KCSEL_K3_A: - return "vec3(ubuf.kcolor3.a)"; + return "vec3f(ubuf.kcolor3.a)"; } } case GX_CC_ZERO: - return "vec3(0.0)"; + return "vec3f(0.0)"; } } @@ -334,7 +331,7 @@ static void alpha_arg_reg_info(GXTevAlphaArg arg, const TevStage& stage, ShaderI static std::string alpha_arg_reg(GXTevAlphaArg arg, size_t stageIdx, const ShaderConfig& config, const TevStage& stage) { switch (arg) { - DEFAULT_FATAL("invalid alpha arg {}", static_cast(arg)); + DEFAULT_FATAL("invalid alpha arg {}", underlying(arg)); case GX_CA_APREV: return "prev.a"; case GX_CA_A0: @@ -346,7 +343,7 @@ static std::string alpha_arg_reg(GXTevAlphaArg arg, size_t stageIdx, const Shade case GX_CA_TEXA: { CHECK(stage.texMapId != GX_TEXMAP_NULL, "unmapped texture for stage {}", stageIdx); CHECK(stage.texMapId >= GX_TEXMAP0 && stage.texMapId <= GX_TEXMAP7, "invalid texture {} for stage {}", - static_cast(stage.texMapId), stageIdx); + underlying(stage.texMapId), stageIdx); const auto& swap = config.tevSwapTable[stage.tevSwapTex]; return fmt::format("sampled{}.{}", stageIdx, chan_comp(swap.alpha)); } @@ -356,14 +353,14 @@ static std::string alpha_arg_reg(GXTevAlphaArg arg, size_t stageIdx, const Shade return "0.0"; } CHECK(stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1, "invalid color channel {} for stage {}", - static_cast(stage.channelId), stageIdx); + underlying(stage.channelId), stageIdx); u32 idx = stage.channelId - GX_COLOR0A0; const auto& swap = config.tevSwapTable[stage.tevSwapRas]; return fmt::format("rast{}.{}", idx, chan_comp(swap.alpha)); } case GX_CA_KONST: { switch (stage.kaSel) { - DEFAULT_FATAL("invalid kaSel {}", static_cast(stage.kaSel)); + DEFAULT_FATAL("invalid kaSel {}", underlying(stage.kaSel)); case GX_TEV_KASEL_8_8: return "1.0"; case GX_TEV_KASEL_7_8: @@ -421,7 +418,7 @@ static std::string alpha_arg_reg(GXTevAlphaArg arg, size_t stageIdx, const Shade static std::string_view tev_op(GXTevOp op) { switch (op) { - DEFAULT_FATAL("unimplemented tev op {}", static_cast(op)); + DEFAULT_FATAL("unimplemented tev op {}", underlying(op)); case GX_TEV_ADD: return ""sv; case GX_TEV_SUB: @@ -431,7 +428,7 @@ static std::string_view tev_op(GXTevOp op) { static std::string_view tev_bias(GXTevBias bias) { switch (bias) { - DEFAULT_FATAL("invalid tev bias {}", static_cast(bias)); + DEFAULT_FATAL("invalid tev bias {}", underlying(bias)); case GX_TB_ZERO: return ""sv; case GX_TB_ADDHALF: @@ -444,7 +441,7 @@ static std::string_view tev_bias(GXTevBias bias) { static std::string alpha_compare(GXCompare comp, u8 ref, bool& valid) { const float fref = ref / 255.f; switch (comp) { - DEFAULT_FATAL("invalid alpha comp {}", static_cast(comp)); + DEFAULT_FATAL("invalid alpha comp {}", underlying(comp)); case GX_NEVER: return "false"s; case GX_LESS: @@ -467,7 +464,7 @@ static std::string alpha_compare(GXCompare comp, u8 ref, bool& valid) { static std::string_view tev_scale(GXTevScale scale) { switch (scale) { - DEFAULT_FATAL("invalid tev scale {}", static_cast(scale)); + DEFAULT_FATAL("invalid tev scale {}", underlying(scale)); case GX_CS_SCALE_1: return ""sv; case GX_CS_SCALE_2: @@ -484,9 +481,9 @@ static inline std::string vtx_attr(const ShaderConfig& config, GXAttr attr) { if (type == GX_NONE) { if (attr == GX_VA_NRM) { // Default normal - return "vec3(1.0, 0.0, 0.0)"s; + return "vec3f(1.0, 0.0, 0.0)"s; } - UNLIKELY FATAL("unmapped vtx attr {}", static_cast(attr)); + UNLIKELY FATAL("unmapped vtx attr {}", underlying(attr)); } if (attr == GX_VA_POS) { return "in_pos"s; @@ -502,7 +499,7 @@ static inline std::string vtx_attr(const ShaderConfig& config, GXAttr attr) { const auto idx = attr - GX_VA_TEX0; return fmt::format("in_tex{}_uv", idx); } - UNLIKELY FATAL("unhandled vtx attr {}", static_cast(attr)); + UNLIKELY FATAL("unhandled vtx attr {}", underlying(attr)); } static inline std::string texture_conversion(const TextureConfig& tex, u32 stageIdx, u32 texMapId) { @@ -520,7 +517,7 @@ static inline std::string texture_conversion(const TextureConfig& tex, u32 stage // FIXME HACK if (!is_palette_format(tex.loadFmt)) { // Perform intensity conversion - out += fmt::format("\n sampled{0} = vec4(intensityF32(sampled{0}.rgb), 0.f, 0.f, 1.f);", stageIdx); + out += fmt::format("\n sampled{0} = vec4f(intensityF32(sampled{0}.rgb), 0.f, 0.f, 1.f);", stageIdx); } break; } @@ -531,7 +528,7 @@ static inline std::string texture_conversion(const TextureConfig& tex, u32 stage case GX_TF_I8: case GX_TF_R8_PC: // Splat R to RGBA - out += fmt::format("\n sampled{0} = vec4(sampled{0}.r);", stageIdx); + out += fmt::format("\n sampled{0} = vec4f(sampled{0}.r);", stageIdx); break; } return out; @@ -560,7 +557,7 @@ ShaderInfo build_shader_info(const ShaderConfig& config) noexcept { // } ShaderInfo info{ - .uniformSize = 64 * 3, // mv, mvInv, proj + .uniformSize = sizeof(PnMtx) + sizeof(Mat4x4), // pos_mtx, nrm_mtx, proj }; for (int i = 0; i < config.tevStageCount; ++i) { const auto& stage = config.tevStages[i]; @@ -583,7 +580,7 @@ ShaderInfo build_shader_info(const ShaderConfig& config) noexcept { info.writesTevReg.set(stage.alphaOp.outReg); } } - info.uniformSize += info.loadsTevReg.count() * 16; + info.uniformSize += info.loadsTevReg.count() * sizeof(Vec4); bool lightingEnabled = false; for (int i = 0; i < info.sampledColorChannels.size(); ++i) { if (info.sampledColorChannels.test(i)) { @@ -596,27 +593,27 @@ ShaderInfo build_shader_info(const ShaderConfig& config) noexcept { } if (lightingEnabled) { // Lights + light state for all channels - info.uniformSize += 16 + (80 * GX::MaxLights); + info.uniformSize += sizeof(Vec4) + sizeof(Light) * GX::MaxLights; } for (int i = 0; i < info.sampledColorChannels.size(); ++i) { if (info.sampledColorChannels.test(i)) { const auto& cc = config.colorChannels[i * 2]; if (cc.lightingEnabled && cc.ambSrc == GX_SRC_REG) { - info.uniformSize += 16; + info.uniformSize += sizeof(Vec4); } if (cc.matSrc == GX_SRC_REG) { - info.uniformSize += 16; + info.uniformSize += sizeof(Vec4); } const auto& cca = config.colorChannels[i * 2 + 1]; if (cca.lightingEnabled && cca.ambSrc == GX_SRC_REG) { - info.uniformSize += 16; + info.uniformSize += sizeof(Vec4); } if (cca.matSrc == GX_SRC_REG) { - info.uniformSize += 16; + info.uniformSize += sizeof(Vec4); } } } - info.uniformSize += info.sampledKColors.count() * 16; + info.uniformSize += info.sampledKColors.count() * sizeof(Vec4); for (int i = 0; i < info.sampledTexCoords.size(); ++i) { if (!info.sampledTexCoords.test(i)) { continue; @@ -636,26 +633,192 @@ ShaderInfo build_shader_info(const ShaderConfig& config) noexcept { if (info.usesTexMtx.test(i)) { switch (info.texMtxTypes[i]) { case GX_TG_MTX2x4: - info.uniformSize += 32; + info.uniformSize += sizeof(Mat2x4); break; case GX_TG_MTX3x4: - info.uniformSize += 64; + info.uniformSize += sizeof(Mat3x4); break; default: break; } } } - info.uniformSize += info.usesPTTexMtx.count() * 64; + info.uniformSize += info.usesPTTexMtx.count() * sizeof(Mat3x4); if (config.fogType != GX_FOG_NONE) { info.usesFog = true; - info.uniformSize += 32; + info.uniformSize += sizeof(Fog); } - info.uniformSize += info.sampledTextures.count() * 4; + info.uniformSize += info.sampledTextures.count() * sizeof(u32); info.uniformSize = align_uniform(info.uniformSize); return info; } +struct StorageLoadResult { + std::string attrLoad; + std::string_view arrType; +}; + +auto storage_load(const StorageConfig& mapping, u32 attrIdx) -> StorageLoadResult { + const std::string_view attrName = VtxAttributeNames[mapping.attr]; + + uint8_t compCnt = 0; + GXCompType compType = GX_U8; + switch (mapping.attr) { + case GX_VA_POS: + switch (mapping.cnt) { + case GX_POS_XY: + compCnt = 2; + break; + case GX_POS_XYZ: + compCnt = 3; + break; + default: + Log.fatal("storage_load: Unsupported {} component count {}", mapping.attr, mapping.cnt); + } + switch (mapping.compType) { + case GX_U8: + case GX_S8: + case GX_U16: + case GX_S16: + case GX_F32: + compType = mapping.compType; + break; + default: + Log.fatal("storage_load: Unsupported {} component type {}", mapping.attr, mapping.compType); + } + break; + case GX_VA_NRM: + switch (mapping.cnt) { + case GX_NRM_XYZ: + compCnt = 3; + break; + default: + Log.fatal("storage_load: Unsupported {} component count {}", mapping.attr, mapping.cnt); + } + switch (mapping.compType) { + case GX_S8: + case GX_S16: + case GX_F32: + compType = mapping.compType; + break; + default: + Log.fatal("storage_load: Unsupported {} component type {}", mapping.attr, mapping.compType); + } + break; + case GX_VA_CLR0: + case GX_VA_CLR1: + switch (mapping.cnt) { + case GX_CLR_RGB: + compCnt = 3; + break; + case GX_CLR_RGBA: + compCnt = 4; + break; + default: + Log.fatal("storage_load: Unsupported {} component count {}", mapping.attr, mapping.cnt); + } + switch (mapping.compType) { + case GX_RGB8: + case GX_RGBA8: + compType = mapping.compType; + break; + default: + Log.fatal("storage_load: Unsupported {} component type {}", mapping.attr, mapping.compType); + } + break; + case GX_VA_TEX0: + case GX_VA_TEX1: + case GX_VA_TEX2: + case GX_VA_TEX3: + case GX_VA_TEX4: + case GX_VA_TEX5: + case GX_VA_TEX6: + case GX_VA_TEX7: + switch (mapping.cnt) { + case GX_TEX_S: + compCnt = 1; + break; + case GX_TEX_ST: + compCnt = 2; + break; + default: + Log.fatal("storage_load: Unsupported {} component count {}", mapping.attr, mapping.cnt); + } + switch (mapping.compType) { + case GX_U8: + case GX_S8: + case GX_U16: + case GX_S16: + case GX_F32: + compType = mapping.compType; + break; + default: + Log.fatal("storage_load: Unsupported {} component type {}", mapping.attr, mapping.compType); + } + break; + default: + Log.fatal("storage_load: Unsupported attribute {}", mapping.attr); + } + + const auto [div, rem] = std::div(attrIdx, 4); + std::string idxFetch = fmt::format("in_dl{}[{}]", div, rem); + + std::string_view arrType; + std::string attrLoad; + + switch (compType) { + case GX_U16: + switch (compCnt) { + case 2: + arrType = "u32"; + attrLoad = fmt::format("fetch_u16_2(&v_arr_{}, {}, {})", attrName, idxFetch, mapping.frac); + break; + default: + Log.fatal("storage_load: Unsupported {} count {}", compType, compCnt); + } + break; + case GX_S16: + switch (compCnt) { + case 3: + arrType = "i32"; + attrLoad = fmt::format("fetch_i16_3(&v_arr_{}, {}, {})", attrName, idxFetch, mapping.frac); + break; + default: + Log.fatal("storage_load: Unsupported {} count {}", compType, compCnt); + } + break; + case GX_F32: + switch (compCnt) { + case 1: + arrType = "f32"; + attrLoad = fmt::format("v_arr_{}[{}]", attrName, idxFetch); + break; + case 2: + arrType = "vec2f"; + attrLoad = fmt::format("v_arr_{}[{}]", attrName, idxFetch); + break; + case 3: + arrType = "f32"; + attrLoad = fmt::format("fetch_f32_3(&v_arr_{}, {})", attrName, idxFetch); + break; + case 4: + arrType = "vec4f"; + attrLoad = fmt::format("v_arr_{}[{}]", attrName, idxFetch); + break; + default: + Log.fatal("storage_load: Unsupported {} count {}", compType, compCnt); + } + break; + default: + Log.fatal("storage_load: Unimplemented {}", compType); + } + + return { + .attrLoad = attrLoad, + .arrType = arrType, + }; +} + wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& info) noexcept { const auto hash = xxh3_hash(config); const auto it = g_gxCachedShaders.find(hash); @@ -727,33 +890,19 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in if (config.indexedAttributeCount > 0) { // Display list attributes int currAttrIdx = 0; - for (GXAttr attr{}; attr < MaxVtxAttr; attr = GXAttr(attr + 1)) { + for (GXAttr attr{}; attr < MaxVtxAttr; attr = static_cast(attr + 1)) { // Indexed attributes if (config.vtxAttrs[attr] != GX_INDEX8 && config.vtxAttrs[attr] != GX_INDEX16) { continue; } - const auto [div, rem] = std::div(currAttrIdx, 4); - std::string_view attrName; - bool addUniformBinding = true; - if (config.attrMapping[attr] != attr) { - attrName = VtxAttributeNames[config.attrMapping[attr]]; - addUniformBinding = false; - } else { - attrName = VtxAttributeNames[attr]; - } - vtxXfrAttrsPre += - fmt::format("\n var {} = v_arr_{}[in_dl{}[{}]];", vtx_attr(config, attr), attrName, div, rem); - if (addUniformBinding) { - std::string_view arrType; - if (attr == GX_VA_POS || attr == GX_VA_NRM) { - arrType = "vec3"; - } else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) { - arrType = "vec2"; - } - uniformBindings += fmt::format(FMT_STRING("\n@group(0) @binding({})" - "\nvar v_arr_{}: array<{}>;"), - uniBindingIdx++, attrName, arrType); - } + const auto& mapping = config.attrMapping[attr]; + std::string_view attrName = VtxAttributeNames[mapping.attr]; + const auto result = storage_load(mapping, currAttrIdx); + vtxXfrAttrsPre += fmt::format("\n var {} = {};", vtx_attr(config, attr), result.attrLoad); + uniformBindings += fmt::format( + "\n@group(0) @binding({})" + "\nvar v_arr_{}: array<{}>;", + uniBindingIdx++, attrName, result.arrType); ++currAttrIdx; } auto [num4xAttrArrays, rem] = std::div(currAttrIdx, 4); @@ -769,7 +918,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in } else { vtxInAttrs += "\n "; } - vtxInAttrs += fmt::format("@location({}) in_dl{}: vec4", locIdx++, i); + vtxInAttrs += fmt::format("@location({}) in_dl{}: vec4u", locIdx++, i); } for (u32 i = 0; i < num2xAttrArrays; ++i) { if (locIdx > 0) { @@ -777,7 +926,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in } else { vtxInAttrs += "\n "; } - vtxInAttrs += fmt::format("@location({}) in_dl{}: vec2", locIdx++, num4xAttrArrays + i); + vtxInAttrs += fmt::format("@location({}) in_dl{}: vec2u", locIdx++, num4xAttrArrays + i); } } for (GXAttr attr{}; attr < MaxVtxAttr; attr = GXAttr(attr + 1)) { @@ -791,23 +940,27 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in vtxInAttrs += "\n "; } if (attr == GX_VA_POS) { - vtxInAttrs += fmt::format("@location({}) in_pos: vec3", locIdx++); + vtxInAttrs += fmt::format("@location({}) in_pos: vec3f", locIdx++); } else if (attr == GX_VA_NRM) { - vtxInAttrs += fmt::format("@location({}) in_nrm: vec3", locIdx++); + vtxInAttrs += fmt::format("@location({}) in_nrm: vec3f", locIdx++); } else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) { - vtxInAttrs += fmt::format("@location({}) in_clr{}: vec4", locIdx++, attr - GX_VA_CLR0); + vtxInAttrs += fmt::format("@location({}) in_clr{}: vec4f", locIdx++, attr - GX_VA_CLR0); } else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) { - vtxInAttrs += fmt::format("@location({}) in_tex{}_uv: vec2", locIdx++, attr - GX_VA_TEX0); + vtxInAttrs += fmt::format("@location({}) in_tex{}_uv: vec2f", locIdx++, attr - GX_VA_TEX0); } } vtxXfrAttrsPre += fmt::format( - "\n var mv_pos = mul4x3(ubuf.pos_mtx, vec4({}, 1.0));" - "\n var mv_nrm = normalize(mul4x3(ubuf.nrm_mtx, vec4({}, 0.0)));" - "\n out.pos = mul4x4(ubuf.proj, vec4(mv_pos, 1.0));" - "\n out.pos.z += out.pos.w;", + "\n var mv_pos = vec4({}, 1.0) * ubuf.pos_mtx;" + "\n var mv_nrm = normalize(vec4({}, 0.0) * ubuf.nrm_mtx);" + "\n out.pos = vec4f(mv_pos, 1.0) * ubuf.proj;", vtx_attr(config, GX_VA_POS), vtx_attr(config, GX_VA_NRM)); + if constexpr (UseReversedZ) { + vtxXfrAttrsPre += "\n out.pos.z = -out.pos.z;"; + } else { + vtxXfrAttrsPre += "\n out.pos.z += out.pos.w;"; + } if constexpr (EnableNormalVisualization) { - vtxOutAttrs += fmt::format("\n @location({}) nrm: vec3,", vtxOutIdx++); + vtxOutAttrs += fmt::format("\n @location({}) nrm: vec3f,", vtxOutIdx++); vtxXfrAttrsPre += "\n out.nrm = mv_nrm;"; } @@ -818,7 +971,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in { std::string outReg; switch (stage.colorOp.outReg) { - DEFAULT_FATAL("invalid colorOp outReg {}", static_cast(stage.colorOp.outReg)); + DEFAULT_FATAL("invalid colorOp outReg {}", underlying(stage.colorOp.outReg)); case GX_TEVPREV: outReg = "prev"; break; @@ -838,14 +991,14 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in color_arg_reg(stage.colorPass.d, idx, config, stage), tev_op(stage.colorOp.op), tev_bias(stage.colorOp.bias), tev_scale(stage.colorOp.scale)); if (stage.colorOp.clamp) { - op = fmt::format("clamp({}, vec3(0.0), vec3(1.0))", op); + op = fmt::format("clamp({}, vec3f(0.0), vec3f(1.0))", op); } - fragmentFn += fmt::format("\n // TEV stage {2}\n {0} = vec4({1}, {0}.a);", outReg, op, idx); + fragmentFn += fmt::format("\n // TEV stage {2}\n {0} = vec4f({1}, {0}.a);", outReg, op, idx); } { std::string outReg; switch (stage.alphaOp.outReg) { - DEFAULT_FATAL("invalid alphaOp outReg {}", static_cast(stage.alphaOp.outReg)); + DEFAULT_FATAL("invalid alphaOp outReg {}", underlying(stage.alphaOp.outReg)); case GX_TEVPREV: outReg = "prev.a"; break; @@ -871,17 +1024,17 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in } } if (info.loadsTevReg.test(0)) { - uniBufAttrs += "\n tevprev: vec4,"; + uniBufAttrs += "\n tevprev: vec4f,"; fragmentFnPre += "\n var prev = ubuf.tevprev;"; } else { - fragmentFnPre += "\n var prev: vec4;"; + fragmentFnPre += "\n var prev: vec4f;"; } for (int i = 1 /* Skip TEVPREV */; i < info.loadsTevReg.size(); ++i) { if (info.loadsTevReg.test(i)) { - uniBufAttrs += fmt::format("\n tevreg{}: vec4,", i - 1); + uniBufAttrs += fmt::format("\n tevreg{}: vec4f,", i - 1); fragmentFnPre += fmt::format("\n var tevreg{0} = ubuf.tevreg{0};", i - 1); } else if (info.writesTevReg.test(i)) { - fragmentFnPre += fmt::format("\n var tevreg{0}: vec4;", i - 1); + fragmentFnPre += fmt::format("\n var tevreg{0}: vec4f;", i - 1); } } bool addedLightStruct = false; @@ -903,15 +1056,15 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in uniformPre += "\n" "struct Light {\n" - " pos: vec3,\n" - " dir: vec3,\n" - " color: vec4,\n" - " cos_att: vec3,\n" - " dist_att: vec3,\n" + " pos: vec3f,\n" + " dir: vec3f,\n" + " color: vec4f,\n" + " cos_att: vec3f,\n" + " dist_att: vec3f,\n" "};"; if (UsePerPixelLighting) { - vtxOutAttrs += fmt::format("\n @location({}) mv_pos: vec3,", vtxOutIdx++); - vtxOutAttrs += fmt::format("\n @location({}) mv_nrm: vec3,", vtxOutIdx++); + vtxOutAttrs += fmt::format("\n @location({}) mv_pos: vec3f,", vtxOutIdx++); + vtxOutAttrs += fmt::format("\n @location({}) mv_nrm: vec3f,", vtxOutIdx++); vtxXfrAttrs += fmt::format(FMT_STRING(R"""( out.mv_pos = mv_pos; out.mv_nrm = mv_nrm;)""")); @@ -920,16 +1073,16 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in } if (cc.lightingEnabled && cc.ambSrc == GX_SRC_REG) { - uniBufAttrs += fmt::format("\n cc{0}_amb: vec4,", i); + uniBufAttrs += fmt::format("\n cc{0}_amb: vec4f,", i); } if (cc.matSrc == GX_SRC_REG) { - uniBufAttrs += fmt::format("\n cc{0}_mat: vec4,", i); + uniBufAttrs += fmt::format("\n cc{0}_mat: vec4f,", i); } if (cca.lightingEnabled && cca.ambSrc == GX_SRC_REG) { - uniBufAttrs += fmt::format("\n cc{0}a_amb: vec4,", i); + uniBufAttrs += fmt::format("\n cc{0}a_amb: vec4f,", i); } if (cca.matSrc == GX_SRC_REG) { - uniBufAttrs += fmt::format("\n cc{0}a_mat: vec4,", i); + uniBufAttrs += fmt::format("\n cc{0}a_mat: vec4f,", i); } // Output vertex color if necessary @@ -937,7 +1090,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in if (((cc.lightingEnabled && cc.ambSrc == GX_SRC_VTX) || cc.matSrc == GX_SRC_VTX || (cca.lightingEnabled && cca.matSrc == GX_SRC_VTX) || cca.matSrc == GX_SRC_VTX)) { if (UsePerPixelLighting) { - vtxOutAttrs += fmt::format("\n @location({}) clr{}: vec4,", vtxOutIdx++, vtxColorIdx); + vtxOutAttrs += fmt::format("\n @location({}) clr{}: vec4f,", vtxOutIdx++, vtxColorIdx); vtxXfrAttrs += fmt::format("\n out.clr{} = {};", vtxColorIdx, vtx_attr(config, static_cast(GX_VA_CLR0 + vtxColorIdx))); } @@ -969,11 +1122,11 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in if (cc.attnFn == GX_AF_NONE) { lightAttnFn = "attn = 1.0;"; } else if (cc.attnFn == GX_AF_SPOT) { - lightAttnFn = fmt::format(FMT_STRING(R"""( + lightAttnFn = fmt::format(R"""( var cosine = max(0.0, dot(ldir, light.dir)); - var cos_attn = dot(light.cos_att, vec3(1.0, cosine, cosine * cosine)); - var dist_attn = dot(light.dist_att, vec3(1.0, dist, dist2)); - attn = max(0.0, cos_attn / dist_attn);)""")); + var cos_attn = dot(light.cos_att, vec3f(1.0, cosine, cosine * cosine)); + var dist_attn = dot(light.dist_att, vec3f(1.0, dist, dist2)); + attn = max(0.0, cos_attn / dist_attn);)"""); } else if (cc.attnFn == GX_AF_SPEC) { diffFn = GX_DF_NONE; FATAL("AF_SPEC unimplemented"); @@ -1001,7 +1154,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in outVar = fmt::format("out.cc{}", i); posVar = "mv_pos"; } - auto lightFunc = fmt::format(FMT_STRING(R"""( + auto lightFunc = fmt::format(R"""( {{ var lighting = {5}; for (var i = 0u; i < {1}u; i++) {{ @@ -1016,14 +1169,14 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in lighting = lighting + (attn * diff * light.color); }} // TODO alpha lighting - {6} = vec4(({4} * clamp(lighting, vec4(0.0), vec4(1.0))).xyz, {4}.a); - }})"""), + {6} = vec4f(({4} * clamp(lighting, vec4f(0.0), vec4f(1.0))).xyz, {4}.a); + }})""", i, GX::MaxLights, lightAttnFn, lightDiffFn, matSrc, ambSrc, outVar, posVar); if (UsePerPixelLighting) { - fragmentFnPre += fmt::format("\n var rast{}: vec4;", i); + fragmentFnPre += fmt::format("\n var rast{}: vec4f;", i); fragmentFnPre += lightFunc; } else { - vtxOutAttrs += fmt::format("\n @location({}) cc{}: vec4,", vtxOutIdx++, i); + vtxOutAttrs += fmt::format("\n @location({}) cc{}: vec4f,", vtxOutIdx++, i); vtxXfrAttrs += lightFunc; fragmentFnPre += fmt::format("\n var rast{0} = in.cc{0};", i); } @@ -1032,7 +1185,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in // Color will already be written to clr{} fragmentFnPre += fmt::format("\n var rast{0} = in.clr{0};", vtxColorIdx); } else { - vtxOutAttrs += fmt::format("\n @location({}) cc{}: vec4,", vtxOutIdx++, i); + vtxOutAttrs += fmt::format("\n @location({}) cc{}: vec4f,", vtxOutIdx++, i); vtxXfrAttrs += fmt::format("\n out.cc{} = {};", i, vtx_attr(config, GXAttr(GX_VA_CLR0 + vtxColorIdx))); fragmentFnPre += fmt::format("\n var rast{0} = in.cc{0};", i); } @@ -1046,7 +1199,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in } for (int i = 0; i < info.sampledKColors.size(); ++i) { if (info.sampledKColors.test(i)) { - uniBufAttrs += fmt::format("\n kcolor{}: vec4,", i); + uniBufAttrs += fmt::format("\n kcolor{}: vec4f,", i); } } for (int i = 0; i < info.sampledTexCoords.size(); ++i) { @@ -1054,22 +1207,21 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in continue; } const auto& tcg = config.tcgs[i]; - vtxOutAttrs += fmt::format("\n @location({}) tex{}_uv: vec2,", vtxOutIdx++, i); + vtxOutAttrs += fmt::format("\n @location({}) tex{}_uv: vec2f,", vtxOutIdx++, i); if (tcg.src >= GX_TG_TEX0 && tcg.src <= GX_TG_TEX7) { - vtxXfrAttrs += fmt::format("\n var tc{} = vec4({}, 0.0, 1.0);", i, + vtxXfrAttrs += fmt::format("\n var tc{} = vec4f({}, 0.0, 1.0);", i, vtx_attr(config, GXAttr(GX_VA_TEX0 + (tcg.src - GX_TG_TEX0)))); } else if (tcg.src == GX_TG_POS) { - vtxXfrAttrs += fmt::format("\n var tc{} = vec4(in_pos, 1.0);", i); + vtxXfrAttrs += fmt::format("\n var tc{} = vec4f(in_pos, 1.0);", i); } else if (tcg.src == GX_TG_NRM) { - vtxXfrAttrs += fmt::format("\n var tc{} = vec4(in_nrm, 1.0);", i); + vtxXfrAttrs += fmt::format("\n var tc{} = vec4f(in_nrm, 1.0);", i); } else - UNLIKELY FATAL("unhandled tcg src {}", static_cast(tcg.src)); + UNLIKELY FATAL("unhandled tcg src {}", underlying(tcg.src)); if (tcg.mtx == GX_IDENTITY) { vtxXfrAttrs += fmt::format("\n var tc{0}_tmp = tc{0}.xyz;", i); } else { u32 texMtxIdx = (tcg.mtx - GX_TEXMTX0) / 3; - vtxXfrAttrs += fmt::format("\n var tc{0}_tmp = mul{2}(ubuf.texmtx{1}, tc{0});", i, texMtxIdx, - info.texMtxTypes[texMtxIdx] == GX_TG_MTX3x4 ? "4x3" : "4x2"); + vtxXfrAttrs += fmt::format("\n var tc{0}_tmp = tc{0} * ubuf.texmtx{1};", i, texMtxIdx); } if (tcg.normalize) { vtxXfrAttrs += fmt::format("\n tc{0}_tmp = normalize(tc{0}_tmp);", i); @@ -1078,8 +1230,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in vtxXfrAttrs += fmt::format("\n var tc{0}_proj = tc{0}_tmp;", i); } else { u32 postMtxIdx = (tcg.postMtx - GX_PTTEXMTX0) / 3; - vtxXfrAttrs += - fmt::format("\n var tc{0}_proj = mul4x3(ubuf.postmtx{1}, vec4(tc{0}_tmp.xyz, 1.0));", i, postMtxIdx); + vtxXfrAttrs += fmt::format("\n var tc{0}_proj = vec4f(tc{0}_tmp.xyz, 1.0) * ubuf.postmtx{1};", i, postMtxIdx); } vtxXfrAttrs += fmt::format("\n out.tex{0}_uv = tc{0}_proj.xy;", i); } @@ -1091,13 +1242,13 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in || !info.sampledTextures.test(stage.texMapId)) { continue; } - std::string uvIn = fmt::format("in.tex{0}_uv", static_cast(stage.texCoordId)); + std::string uvIn = fmt::format("in.tex{0}_uv", underlying(stage.texCoordId)); const auto& texConfig = config.textureConfig[stage.texMapId]; if (is_palette_format(texConfig.loadFmt)) { std::string_view suffix; if (!is_palette_format(texConfig.copyFmt)) { switch (texConfig.loadFmt) { - DEFAULT_FATAL("unimplemented palette format {}", static_cast(texConfig.loadFmt)); + DEFAULT_FATAL("unimplemented palette format {}", texConfig.loadFmt); case GX_TF_C4: suffix = "I4"sv; break; @@ -1110,37 +1261,37 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in } } fragmentFnPre += fmt::format("\n var sampled{0} = textureSamplePalette{3}(tex{1}, tex{1}_samp, {2}, tlut{1});", - i, static_cast(stage.texMapId), uvIn, suffix); + i, underlying(stage.texMapId), uvIn, suffix); } else { fragmentFnPre += fmt::format("\n var sampled{0} = textureSampleBias(tex{1}, tex{1}_samp, {2}, ubuf.tex{1}_lod);", i, - static_cast(stage.texMapId), uvIn); + underlying(stage.texMapId), uvIn); } fragmentFnPre += texture_conversion(texConfig, i, stage.texMapId); } for (int i = 0; i < info.usesTexMtx.size(); ++i) { if (info.usesTexMtx.test(i)) { switch (info.texMtxTypes[i]) { - DEFAULT_FATAL("unhandled tex mtx type {}", static_cast(info.texMtxTypes[i])); + DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i])); case GX_TG_MTX2x4: - uniBufAttrs += fmt::format("\n texmtx{}: mtx4x2,", i); + uniBufAttrs += fmt::format("\n texmtx{}: mat2x4f,", i); break; case GX_TG_MTX3x4: - uniBufAttrs += fmt::format("\n texmtx{}: mtx4x3,", i); + uniBufAttrs += fmt::format("\n texmtx{}: mat3x4f,", i); break; } } } for (int i = 0; i < info.usesPTTexMtx.size(); ++i) { if (info.usesPTTexMtx.test(i)) { - uniBufAttrs += fmt::format("\n postmtx{}: mtx4x3,", i); + uniBufAttrs += fmt::format("\n postmtx{}: mat3x4f,", i); } } if (info.usesFog) { uniformPre += "\n" "struct Fog {\n" - " color: vec4,\n" + " color: vec4f,\n" " a: f32,\n" " b: f32,\n" " c: f32,\n" @@ -1148,9 +1299,11 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in "}"; uniBufAttrs += "\n fog: Fog,"; - fragmentFn += "\n // Fog\n var fogF = clamp((ubuf.fog.a / (ubuf.fog.b - in.pos.z)) - ubuf.fog.c, 0.0, 1.0);"; + fragmentFn += + fmt::format("\n // Fog\n var fogF = clamp((ubuf.fog.a / (ubuf.fog.b - {})) - ubuf.fog.c, 0.0, 1.0);", + UseReversedZ ? "(1.0 - in.pos.z)" : "in.pos.z"); switch (config.fogType) { - DEFAULT_FATAL("invalid fog type {}", static_cast(config.fogType)); + DEFAULT_FATAL("invalid fog type {}", underlying(config.fogType)); case GX_FOG_PERSP_LIN: case GX_FOG_ORTHO_LIN: fragmentFn += "\n var fogZ = fogF;"; @@ -1174,7 +1327,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in "\n var fogZ = exp2(-8.0 * fogF * fogF);"; break; } - fragmentFn += "\n prev = vec4(mix(prev.rgb, ubuf.fog.color.rgb, clamp(fogZ, 0.0, 1.0)), prev.a);"; + fragmentFn += "\n prev = vec4f(mix(prev.rgb, ubuf.fog.color.rgb, clamp(fogZ, 0.0, 1.0)), prev.a);"; } size_t texBindIdx = 0; for (int i = 0; i < info.sampledTextures.size(); ++i) { @@ -1183,23 +1336,27 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in } uniBufAttrs += fmt::format("\n tex{}_lod: f32,", i); - sampBindings += fmt::format(FMT_STRING("\n@group(1) @binding({})\n" - "var tex{}_samp: sampler;"), - texBindIdx, i); + sampBindings += fmt::format( + "\n@group(1) @binding({})\n" + "var tex{}_samp: sampler;", + texBindIdx, i); const auto& texConfig = config.textureConfig[i]; if (is_palette_format(texConfig.loadFmt)) { - texBindings += fmt::format(FMT_STRING("\n@group(2) @binding({})\n" - "var tex{}: texture_2d<{}>;"), - texBindIdx, i, is_palette_format(texConfig.copyFmt) ? "i32"sv : "f32"sv); + texBindings += fmt::format( + "\n@group(2) @binding({})\n" + "var tex{}: texture_2d<{}>;", + texBindIdx, i, is_palette_format(texConfig.copyFmt) ? "i32"sv : "f32"sv); ++texBindIdx; - texBindings += fmt::format(FMT_STRING("\n@group(2) @binding({})\n" - "var tlut{}: texture_2d;"), - texBindIdx, i); + texBindings += fmt::format( + "\n@group(2) @binding({})\n" + "var tlut{}: texture_2d;", + texBindIdx, i); } else { - texBindings += fmt::format(FMT_STRING("\n@group(2) @binding({})\n" - "var tex{}: texture_2d;"), - texBindIdx, i); + texBindings += fmt::format( + "\n@group(2) @binding({})\n" + "var tex{}: texture_2d;", + texBindIdx, i); } ++texBindIdx; } @@ -1212,7 +1369,7 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in if (comp0Valid || comp1Valid) { fragmentFn += "\n // Alpha compare"; switch (config.alphaCompare.op) { - DEFAULT_FATAL("invalid alpha compare op {}", static_cast(config.alphaCompare.op)); + DEFAULT_FATAL("invalid alpha compare op {}", underlying(config.alphaCompare.op)); case GX_AOP_AND: fragmentFn += fmt::format("\n if (!({} && {})) {{ discard; }}", comp0, comp1); break; @@ -1229,82 +1386,104 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in } } if constexpr (EnableNormalVisualization) { - fragmentFn += "\n prev = vec4(in.nrm, prev.a);"; + fragmentFn += "\n prev = vec4f(in.nrm, prev.a);"; } - const auto shaderSource = fmt::format(FMT_STRING(R"""( -struct mtx4x4 {{ mx: vec4, my: vec4, mz: vec4, mw: vec4 }}; -struct mtx4x3 {{ mx: vec4, my: vec4, mz: vec4, mw: vec4 }}; -struct mtx4x2 {{ mx: vec4, my: vec4, }}; -// TODO convert these to row major -fn mul4x4(m: mtx4x4, v: vec4) -> vec4 {{ - var mx = vec4(m.mx.x, m.my.x, m.mz.x, m.mw.x); - var my = vec4(m.mx.y, m.my.y, m.mz.y, m.mw.y); - var mz = vec4(m.mx.z, m.my.z, m.mz.z, m.mw.z); - var mw = vec4(m.mx.w, m.my.w, m.mz.w, m.mw.w); - return vec4(dot(mx, v), dot(my, v), dot(mz, v), dot(mw, v)); + const auto shaderSource = fmt::format(R"""( +fn fetch_f32_3(p: ptr>, idx: u32) -> vec3 {{ + var start = idx * 3; + return vec3( + p[start], + p[start + 1], + p[start + 2], + ); }} -fn mul4x3(m: mtx4x3, v: vec4) -> vec3 {{ - var mx = vec4(m.mx.x, m.my.x, m.mz.x, m.mw.x); - var my = vec4(m.mx.y, m.my.y, m.mz.y, m.mw.y); - var mz = vec4(m.mx.z, m.my.z, m.mz.z, m.mw.z); - return vec3(dot(mx, v), dot(my, v), dot(mz, v)); +fn fetch_u8_2(p: ptr>, idx: u32, frac: u32) -> vec2 {{ + var v0 = p[idx / 2]; + var r = (idx % 2) != 0; + var o0 = select(extractBits(v0, 0, 8), extractBits(v0, 16, 8), r); + var o1 = select(extractBits(v0, 8, 8), extractBits(v0, 24, 8), r); + return vec2( + f32(o0) / f32(1 << frac), + f32(o1) / f32(1 << frac), + ); }} -fn mul4x2(m: mtx4x2, v: vec4) -> vec2 {{ - return vec2(dot(m.mx, v), dot(m.my, v)); +fn fetch_u16_2(p: ptr>, idx: u32, frac: u32) -> vec2 {{ + var v0 = p[idx]; + var o0 = extractBits(v0, 0, 16); + var o1 = extractBits(v0, 16, 16); + return vec2( + f32(o0) / f32(1 << frac), + f32(o1) / f32(1 << frac), + ); +}} +fn fetch_i16_3(p: ptr>, idx: u32, frac: u32) -> vec3 {{ + var n = idx * 3; + var d = n / 2; + var r = (n % 2) != 0; + var v0 = p[d]; + var v1 = p[d + 1]; + var o0 = select(extractBits(v0, 0, 16), extractBits(v0, 16, 16), r); + var o1 = select(extractBits(v0, 16, 16), extractBits(v1, 0, 16), r); + var o2 = select(extractBits(v1, 0, 16), extractBits(v1, 16, 16), r); + return vec3( + f32(o0) / f32(1 << frac), + f32(o1) / f32(1 << frac), + f32(o2) / f32(1 << frac), + ); }} {10} struct Uniform {{ - pos_mtx: mtx4x3, - nrm_mtx: mtx4x3, - proj: mtx4x4,{0} + pos_mtx: mat3x4f, + nrm_mtx: mat3x4f, + proj: mat4x4f,{0} }}; @group(0) @binding(0) var ubuf: Uniform;{3}{1}{2} struct VertexOutput {{ - @builtin(position) pos: vec4,{4} + @builtin(position) pos: vec4f,{4} }}; -fn intensityF32(rgb: vec3) -> f32 {{ +fn intensityF32(rgb: vec3f) -> f32 {{ // RGB to intensity conversion // https://github.com/dolphin-emu/dolphin/blob/4cd48e609c507e65b95bca5afb416b59eaf7f683/Source/Core/VideoCommon/TextureConverterShaderGen.cpp#L237-L241 return dot(rgb, vec3(0.257, 0.504, 0.098)) + 16.0 / 255.0; }} -fn intensityI4(rgb: vec3) -> i32 {{ +fn intensityI4(rgb: vec3f) -> i32 {{ return i32(intensityF32(rgb) * 16.f); }} -fn textureSamplePalette(tex: texture_2d, samp: sampler, uv: vec2, tlut: texture_2d) -> vec4 {{ +fn textureSamplePalette(tex: texture_2d, samp: sampler, uv: vec2f, tlut: texture_2d) -> vec4f {{ // Gather index values var i = textureGather(0, tex, samp, uv); // Load palette colors - var c0 = textureLoad(tlut, vec2(i[0], 0), 0); - var c1 = textureLoad(tlut, vec2(i[1], 0), 0); - var c2 = textureLoad(tlut, vec2(i[2], 0), 0); - var c3 = textureLoad(tlut, vec2(i[3], 0), 0); + var c0 = textureLoad(tlut, vec2i(i[0], 0), 0); + var c1 = textureLoad(tlut, vec2i(i[1], 0), 0); + var c2 = textureLoad(tlut, vec2i(i[2], 0), 0); + var c3 = textureLoad(tlut, vec2i(i[3], 0), 0); // Perform bilinear filtering - var f = fract(uv * vec2(textureDimensions(tex)) + 0.5); + var f = fract(uv * vec2f(textureDimensions(tex)) + 0.5); var t0 = mix(c3, c2, f.x); var t1 = mix(c0, c1, f.x); return mix(t0, t1, f.y); }} -fn textureSamplePaletteI4(tex: texture_2d, samp: sampler, uv: vec2, tlut: texture_2d) -> vec4 {{ +fn textureSamplePaletteI4(tex: texture_2d, samp: sampler, uv: vec2f, tlut: texture_2d) -> vec4f {{ // Gather RGB channels var iR = textureGather(0, tex, samp, uv); var iG = textureGather(1, tex, samp, uv); var iB = textureGather(2, tex, samp, uv); // Perform intensity conversion - var i0 = intensityI4(vec3(iR[0], iG[0], iB[0])); - var i1 = intensityI4(vec3(iR[1], iG[1], iB[1])); - var i2 = intensityI4(vec3(iR[2], iG[2], iB[2])); - var i3 = intensityI4(vec3(iR[3], iG[3], iB[3])); + var i0 = intensityI4(vec3f(iR[0], iG[0], iB[0])); + var i1 = intensityI4(vec3f(iR[1], iG[1], iB[1])); + var i2 = intensityI4(vec3f(iR[2], iG[2], iB[2])); + var i3 = intensityI4(vec3f(iR[3], iG[3], iB[3])); // Load palette colors - var c0 = textureLoad(tlut, vec2(i0, 0), 0); - var c1 = textureLoad(tlut, vec2(i1, 0), 0); - var c2 = textureLoad(tlut, vec2(i2, 0), 0); - var c3 = textureLoad(tlut, vec2(i3, 0), 0); + var c0 = textureLoad(tlut, vec2i(i0, 0), 0); + var c1 = textureLoad(tlut, vec2i(i1, 0), 0); + var c2 = textureLoad(tlut, vec2i(i2, 0), 0); + var c3 = textureLoad(tlut, vec2i(i3, 0), 0); // Perform bilinear filtering - var f = fract(uv * vec2(textureDimensions(tex)) + 0.5); + var f = fract(uv * vec2f(textureDimensions(tex)) + 0.5); var t0 = mix(c3, c2, f.x); var t1 = mix(c0, c1, f.x); return mix(t0, t1, f.y); @@ -1318,10 +1497,10 @@ fn vs_main({5} }} @fragment -fn fs_main(in: VertexOutput) -> @location(0) vec4 {{{8}{7} +fn fs_main(in: VertexOutput) -> @location(0) vec4f {{{8}{7} return prev; }} -)"""), +)""", uniBufAttrs, sampBindings, texBindings, uniformBindings, vtxOutAttrs, vtxInAttrs, vtxXfrAttrs, fragmentFn, fragmentFnPre, vtxXfrAttrsPre, uniformPre); if (EnableDebugPrints) { diff --git a/lib/gfx/model/shader.cpp b/lib/gfx/model/shader.cpp index acd4aba..80cd9b5 100644 --- a/lib/gfx/model/shader.cpp +++ b/lib/gfx/model/shader.cpp @@ -1,60 +1,29 @@ #include "shader.hpp" #include "../../webgpu/gpu.hpp" +#include "../gx_fmt.hpp" #include namespace aurora::gfx::model { static Module Log("aurora::gfx::model"); -template -constexpr T bswap16(T val) noexcept { - static_assert(sizeof(T) == sizeof(u16)); - union { - u16 u; - T t; - } v{.t = val}; -#if __GNUC__ - v.u = __builtin_bswap16(v.u); -#elif _WIN32 - v.u = _byteswap_ushort(v.u); -#else - v.u = (v.u << 8) | ((v.u >> 8) & 0xFF); -#endif - return v.t; -} -template -constexpr T bswap32(T val) noexcept { - static_assert(sizeof(T) == sizeof(u32)); - union { - u32 u; - T t; - } v{.t = val}; -#if __GNUC__ - v.u = __builtin_bswap32(v.u); -#elif _WIN32 - v.u = _byteswap_ulong(v.u); -#else - v.u = ((v.u & 0x0000FFFF) << 16) | ((v.u & 0xFFFF0000) >> 16) | ((v.u & 0x00FF00FF) << 8) | ((v.u & 0xFF00FF00) >> 8); -#endif - return v.t; -} - using IndexedAttrs = std::array; struct DisplayListCache { ByteBuffer vtxBuf; ByteBuffer idxBuf; IndexedAttrs indexedAttrs; + GXVtxFmt fmt; - DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs) - : vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs) {} + DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs, GXVtxFmt fmt) + : vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs), fmt(fmt) {} }; static absl::flat_hash_map sCachedDisplayLists; static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount, IndexedAttrs& indexedAttrs) { - using aurora::gfx::gx::g_gxState; + using gx::g_gxState; struct { u8 count; GXCompType type; @@ -66,14 +35,13 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) { const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr]; switch (g_gxState.vtxDesc[attr]) { - DEFAULT_FATAL("unhandled attribute type {}", static_cast(g_gxState.vtxDesc[attr])); + DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]); case GX_NONE: break; case GX_DIRECT: #define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3)) switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) { - DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", static_cast(attr), static_cast(attrFmt.cnt), - static_cast(attrFmt.type)); + DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type); case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32): case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32): attrArrays[attr].count = 3; @@ -150,12 +118,10 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u for (u32 v = 0; v < vtxCount; ++v) { for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) { if (g_gxState.vtxDesc[attr] == GX_INDEX8) { - u16 index = *ptr; - buf.append(&index, 2); + buf.append(static_cast(*ptr)); ++ptr; } else if (g_gxState.vtxDesc[attr] == GX_INDEX16) { - u16 index = bswap16(*reinterpret_cast(ptr)); - buf.append(&index, 2); + buf.append(bswap(*reinterpret_cast(ptr))); ptr += 2; } if (g_gxState.vtxDesc[attr] != GX_DIRECT) { @@ -182,7 +148,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u break; case GX_U16: for (int i = 0; i < count; ++i) { - const auto value = bswap16(reinterpret_cast(ptr)[i]); + const auto value = bswap(reinterpret_cast(ptr)[i]); out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); } buf.append(out.data(), sizeof(f32) * count); @@ -190,7 +156,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u break; case GX_S16: for (int i = 0; i < count; ++i) { - const auto value = bswap16(reinterpret_cast(ptr)[i]); + const auto value = bswap(reinterpret_cast(ptr)[i]); out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); } buf.append(out.data(), sizeof(f32) * count); @@ -198,7 +164,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u break; case GX_F32: for (int i = 0; i < count; ++i) { - out[i] = bswap32(reinterpret_cast(ptr)[i]); + out[i] = bswap(reinterpret_cast(ptr)[i]); } buf.append(out.data(), sizeof(f32) * count); ptr += count * sizeof(f32); @@ -227,7 +193,7 @@ static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u buf.reserve_extra(vtxCount * sizeof(u16)); for (u16 v = 0; v < vtxCount; ++v) { const u16 idx = vtxStart + v; - buf.append(&idx, sizeof(u16)); + buf.append(idx); ++numIndices; } } else if (prim == GX_TRIANGLEFAN) { @@ -235,29 +201,26 @@ static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u for (u16 v = 0; v < vtxCount; ++v) { const u16 idx = vtxStart + v; if (v < 3) { - buf.append(&idx, sizeof(u16)); + buf.append(idx); ++numIndices; continue; } - const std::array idxs{vtxStart, u16(idx - 1), idx}; - buf.append(idxs.data(), sizeof(u16) * 3); + buf.append(std::array{vtxStart, static_cast(idx - 1), idx}); numIndices += 3; } } else if (prim == GX_TRIANGLESTRIP) { - buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16)); + buf.reserve_extra(((static_cast(vtxCount) - 3) * 3 + 3) * sizeof(u16)); for (u16 v = 0; v < vtxCount; ++v) { const u16 idx = vtxStart + v; if (v < 3) { - buf.append(&idx, sizeof(u16)); + buf.append(idx); ++numIndices; continue; } if ((v & 1) == 0) { - const std::array idxs{u16(idx - 2), u16(idx - 1), idx}; - buf.append(idxs.data(), sizeof(u16) * 3); + buf.append(std::array{static_cast(idx - 2), static_cast(idx - 1), idx}); } else { - const std::array idxs{u16(idx - 1), u16(idx - 2), idx}; - buf.append(idxs.data(), sizeof(u16) * 3); + buf.append(std::array{static_cast(idx - 1), static_cast(idx - 2), idx}); } numIndices += 3; } @@ -271,6 +234,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { Range vertRange, idxRange; u32 numIndices = 0; IndexedAttrs indexedAttrs{}; + GXVtxFmt fmt = GX_MAX_VTXFMT; auto it = sCachedDisplayLists.find(hash); if (it != sCachedDisplayLists.end()) { const auto& cache = it->second; @@ -278,6 +242,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size()); idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size()); indexedAttrs = cache.indexedAttrs; + fmt = cache.fmt; } else { const u8* data = dlStart; u32 pos = 0; @@ -302,8 +267,12 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { case GX_DRAW_TRIANGLE_STRIP: case GX_DRAW_TRIANGLE_FAN: { const auto prim = static_cast(opcode); - const auto fmt = static_cast(cmd & GX_VAT_MASK); - u16 vtxCount = bswap16(*reinterpret_cast(data + pos)); + const auto newFmt = static_cast(cmd & GX_VAT_MASK); + if (fmt != GX_MAX_VTXFMT && fmt != newFmt) { + FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt); + } + fmt = newFmt; + u16 vtxCount = bswap(*reinterpret_cast(data + pos)); pos += 2; pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount, indexedAttrs); numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount); @@ -319,22 +288,16 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { } vertRange = push_verts(vtxBuf.data(), vtxBuf.size()); idxRange = push_indices(idxBuf.data(), idxBuf.size()); - sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs); + sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs, fmt); } gx::BindGroupRanges ranges{}; - int lastIndexedAttr = -1; for (int i = 0; i < GX_VA_MAX_ATTR; ++i) { if (!indexedAttrs[i]) { continue; } auto& array = gx::g_gxState.arrays[i]; - if (lastIndexedAttr >= 0 && array == gx::g_gxState.arrays[lastIndexedAttr]) { - // Reuse range from last attribute in shader - // Don't set the output range, so it remains unbound - const auto range = gx::g_gxState.arrays[lastIndexedAttr].cachedRange; - array.cachedRange = range; - } else if (array.cachedRange.size > 0) { + if (array.cachedRange.size > 0) { // Use the currently cached range ranges.vaRanges[i] = array.cachedRange; } else { @@ -343,11 +306,10 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { ranges.vaRanges[i] = range; array.cachedRange = range; } - lastIndexedAttr = i; } model::PipelineConfig config{}; - populate_pipeline_config(config, GX_TRIANGLES); + populate_pipeline_config(config, GX_TRIANGLES, fmt); const auto info = gx::build_shader_info(config.shaderConfig); const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges); const auto pipeline = pipeline_ref(config); @@ -366,7 +328,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { State construct_state() { return {}; } -wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config) { +wgpu::RenderPipeline create_pipeline(const State& state, const PipelineConfig& config) { const auto info = build_shader_info(config.shaderConfig); // TODO remove const auto shader = build_shader(config.shaderConfig, info); @@ -385,7 +347,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const // Indexed attributes for (u32 i = 0; i < num4xAttr; ++i) { vtxAttrs[shaderLocation] = { - .format = wgpu::VertexFormat::Sint16x4, + .format = wgpu::VertexFormat::Uint16x4, .offset = offset, .shaderLocation = shaderLocation, }; @@ -394,7 +356,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const } for (u32 i = 0; i < num2xAttr; ++i) { vtxAttrs[shaderLocation] = { - .format = wgpu::VertexFormat::Sint16x2, + .format = wgpu::VertexFormat::Uint16x2, .offset = offset, .shaderLocation = shaderLocation, }; diff --git a/lib/gfx/stream/shader.cpp b/lib/gfx/stream/shader.cpp deleted file mode 100644 index 842a03f..0000000 --- a/lib/gfx/stream/shader.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "shader.hpp" - -#include "../../webgpu/gpu.hpp" - -namespace aurora::gfx::stream { -static Module Log("aurora::gfx::stream"); - -using webgpu::g_device; - -wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config) { - const auto info = build_shader_info(config.shaderConfig); // TODO remove - const auto shader = build_shader(config.shaderConfig, info); - - std::array attributes{}; - attributes[0] = wgpu::VertexAttribute{ - .format = wgpu::VertexFormat::Float32x3, - .offset = 0, - .shaderLocation = 0, - }; - uint64_t offset = 12; - uint32_t shaderLocation = 1; - if (config.shaderConfig.vtxAttrs[GX_VA_NRM] == GX_DIRECT) { - attributes[shaderLocation] = wgpu::VertexAttribute{ - .format = wgpu::VertexFormat::Float32x3, - .offset = offset, - .shaderLocation = shaderLocation, - }; - offset += 12; - shaderLocation++; - } - if (config.shaderConfig.vtxAttrs[GX_VA_CLR0] == GX_DIRECT) { - attributes[shaderLocation] = wgpu::VertexAttribute{ - .format = wgpu::VertexFormat::Float32x4, - .offset = offset, - .shaderLocation = shaderLocation, - }; - offset += 16; - shaderLocation++; - } - for (int i = GX_VA_TEX0; i < GX_VA_TEX7; ++i) { - if (config.shaderConfig.vtxAttrs[i] != GX_DIRECT) { - continue; - } - attributes[shaderLocation] = wgpu::VertexAttribute{ - .format = wgpu::VertexFormat::Float32x2, - .offset = offset, - .shaderLocation = shaderLocation, - }; - offset += 8; - shaderLocation++; - } - const std::array vertexBuffers{wgpu::VertexBufferLayout{ - .arrayStride = offset, - .attributeCount = shaderLocation, - .attributes = attributes.data(), - }}; - - return build_pipeline(config, info, vertexBuffers, shader, "Stream Pipeline"); -} - -State construct_state() { return {}; } - -void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { - if (!bind_pipeline(data.pipeline, pass)) { - return; - } - - const std::array offsets{data.uniformRange.offset}; - pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data()); - if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) { - pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup)); - pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup)); - } - pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size); - pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint16, data.indexRange.offset, data.indexRange.size); - if (data.dstAlpha != UINT32_MAX) { - const wgpu::Color color{0.f, 0.f, 0.f, data.dstAlpha / 255.f}; - pass.SetBlendConstant(&color); - } - pass.DrawIndexed(data.indexCount); -} -} // namespace aurora::gfx::stream diff --git a/lib/gfx/stream/shader.hpp b/lib/gfx/stream/shader.hpp deleted file mode 100644 index ffbde72..0000000 --- a/lib/gfx/stream/shader.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include "../common.hpp" -#include "../gx.hpp" - -namespace aurora::gfx::stream { -struct DrawData { - PipelineRef pipeline; - Range vertRange; - Range uniformRange; - Range indexRange; - uint32_t indexCount; - gx::GXBindGroups bindGroups; - u32 dstAlpha; -}; - -struct PipelineConfig : public gx::PipelineConfig {}; - -struct State {}; - -State construct_state(); -wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config); -void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass); -} // namespace aurora::gfx::stream diff --git a/lib/gfx/texture_convert.cpp b/lib/gfx/texture_convert.cpp index 6148ea9..d0bff21 100644 --- a/lib/gfx/texture_convert.cpp +++ b/lib/gfx/texture_convert.cpp @@ -66,17 +66,6 @@ static size_t ComputeMippedBlockCountDXT1(uint32_t w, uint32_t h, uint32_t mips) return ret; } -template -constexpr T bswap16(T val) noexcept { -#if __GNUC__ - return __builtin_bswap16(val); -#elif _WIN32 - return _byteswap_ushort(val); -#else - return (val = (val << 8) | ((val >> 8) & 0xFF)); -#endif -} - template concept TextureDecoder = requires(T) { typename T::Source; @@ -178,15 +167,15 @@ struct TextureDecoderIA4 { }; struct TextureDecoderIA8 { - using Source = uint8_t; + using Source = uint16_t; using Target = RGBA8; static constexpr uint32_t Frac = 1; - static constexpr uint32_t BlockWidth = 8; + static constexpr uint32_t BlockWidth = 4; static constexpr uint32_t BlockHeight = 4; static void decode_texel(Target* target, const Source* in, const uint32_t x) { - const auto texel = bswap16(in[x]); + const auto texel = bswap(in[x]); const uint8_t intensity = texel >> 8; target[x].r = intensity; target[x].g = intensity; @@ -228,7 +217,7 @@ struct TextureDecoderRGB565 { static constexpr uint32_t BlockHeight = 4; static void decode_texel(Target* target, const Source* in, const uint32_t x) { - const auto texel = bswap16(in[x]); + const auto texel = bswap(in[x]); target[x].r = ExpandTo8<5>(texel >> 11 & 0x1f); target[x].g = ExpandTo8<6>(texel >> 5 & 0x3f); target[x].b = ExpandTo8<5>(texel & 0x1f); @@ -245,7 +234,7 @@ struct TextureDecoderRGB5A3 { static constexpr uint32_t BlockHeight = 4; static void decode_texel(Target* target, const Source* in, const uint32_t x) { - const auto texel = bswap16(in[x]); + const auto texel = bswap(in[x]); if ((texel & 0x8000) != 0) { target[x].r = ExpandTo8<5>(texel >> 10 & 0x1f); target[x].g = ExpandTo8<5>(texel >> 5 & 0x1f); @@ -322,8 +311,8 @@ static ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mip for (uint32_t y = 0; y < 2; ++y) { DXT1Block* target = targetMip + (baseY + y) * w + baseX; for (size_t x = 0; x < 2; ++x) { - target[x].color1 = bswap16(in[x].color1); - target[x].color2 = bswap16(in[x].color2); + target[x].color1 = bswap(in[x].color1); + target[x].color2 = bswap(in[x].color2); for (size_t i = 0; i < 4; ++i) { std::array ind; const uint8_t packed = in[x].lines[i]; @@ -365,8 +354,8 @@ static ByteBuffer BuildRGBA8FromCMPR(uint32_t width, uint32_t height, uint32_t m for (uint32_t yb = 0; yb < 8; yb += 4) { for (uint32_t xb = 0; xb < 8; xb += 4) { // CMPR difference: Big-endian color1/2 - const uint16_t color1 = bswap16(*reinterpret_cast(src)); - const uint16_t color2 = bswap16(*reinterpret_cast(src + 2)); + const uint16_t color1 = bswap(*reinterpret_cast(src)); + const uint16_t color2 = bswap(*reinterpret_cast(src + 2)); src += 4; // Fill in first two colors in color table. @@ -480,4 +469,4 @@ ByteBuffer convert_tlut(u32 format, uint32_t width, ArrayRef data) { return DecodeLinear(width, data); } } -} // namespace aurora::gfx +} // namespace aurora::gfx \ No newline at end of file diff --git a/lib/internal.hpp b/lib/internal.hpp index 5423030..162c16b 100644 --- a/lib/internal.hpp +++ b/lib/internal.hpp @@ -6,6 +6,8 @@ #include #include +#include +#include #include using namespace std::string_view_literals; @@ -21,6 +23,46 @@ using namespace std::string_view_literals; #endif #endif +template + requires(sizeof(T) == sizeof(uint16_t) && std::is_arithmetic_v) +constexpr T bswap(T val) noexcept { + union { + uint16_t u; + T t; + } v{.t = val}; +#if __GNUC__ + v.u = __builtin_bswap16(v.u); +#elif _WIN32 + v.u = _byteswap_ushort(v.u); +#else + v.u = (v.u << 8) | ((v.u >> 8) & 0xFF); +#endif + return v.t; +} + +template + requires(sizeof(T) == sizeof(uint32_t) && std::is_arithmetic_v) +constexpr T bswap(T val) noexcept { + union { + uint32_t u; + T t; + } v{.t = val}; +#if __GNUC__ + v.u = __builtin_bswap32(v.u); +#elif _WIN32 + v.u = _byteswap_ulong(v.u); +#else + v.u = ((v.u & 0x0000FFFF) << 16) | ((v.u & 0xFFFF0000) >> 16) | ((v.u & 0x00FF00FF) << 8) | ((v.u & 0xFF00FF00) >> 8); +#endif + return v.t; +} + +template + requires(std::is_enum_v) +auto underlying(T value) -> std::underlying_type_t { + return static_cast>(value); +} + #ifndef ALIGN #define ALIGN(x, a) (((x) + ((a) - 1)) & ~((a) - 1)) #endif @@ -33,11 +75,7 @@ using namespace std::string_view_literals; #else #define UNLIKELY #endif -#define FATAL(msg, ...) \ - { \ - Log.fatal(msg, ##__VA_ARGS__); \ - unreachable(); \ - } +#define FATAL(msg, ...) Log.fatal(msg, ##__VA_ARGS__); #define ASSERT(cond, msg, ...) \ if (!(cond)) \ UNLIKELY FATAL(msg, ##__VA_ARGS__) diff --git a/lib/logging.hpp b/lib/logging.hpp index bed551c..09cae88 100644 --- a/lib/logging.hpp +++ b/lib/logging.hpp @@ -4,15 +4,9 @@ #include #include -#include -#ifdef __GNUC__ -[[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); } -#elif defined(_MSC_VER) -[[noreturn]] __forceinline void unreachable() { __assume(false); } -#else -#error Unknown compiler -#endif +#include +#include namespace aurora { void log_internal(AuroraLogLevel level, const char* module, const char* message, unsigned int len) noexcept; @@ -50,7 +44,7 @@ struct Module { template [[noreturn]] void fatal(fmt::format_string fmt, T&&... args) noexcept { report(LOG_FATAL, fmt, std::forward(args)...); - unreachable(); + std::abort(); } }; } // namespace aurora diff --git a/lib/webgpu/gpu.cpp b/lib/webgpu/gpu.cpp index 3944fc0..cce583d 100644 --- a/lib/webgpu/gpu.cpp +++ b/lib/webgpu/gpu.cpp @@ -385,15 +385,12 @@ bool initialize(AuroraBackend auroraBackend) { g_adapter.GetLimits(&supportedLimits); const wgpu::Limits requiredLimits{ // Use "best" supported alignments - .maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 - ? WGPU_LIMIT_U32_UNDEFINED - : supportedLimits.maxTextureDimension1D, - .maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 - ? WGPU_LIMIT_U32_UNDEFINED - : supportedLimits.maxTextureDimension2D, - .maxTextureDimension3D = supportedLimits.maxTextureDimension3D == 0 - ? WGPU_LIMIT_U32_UNDEFINED - : supportedLimits.maxTextureDimension3D, + .maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 ? WGPU_LIMIT_U32_UNDEFINED + : supportedLimits.maxTextureDimension1D, + .maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 ? WGPU_LIMIT_U32_UNDEFINED + : supportedLimits.maxTextureDimension2D, + .maxTextureDimension3D = supportedLimits.maxTextureDimension3D == 0 ? WGPU_LIMIT_U32_UNDEFINED + : supportedLimits.maxTextureDimension3D, .minUniformBufferOffsetAlignment = supportedLimits.minUniformBufferOffsetAlignment == 0 ? WGPU_LIMIT_U32_UNDEFINED : supportedLimits.minUniformBufferOffsetAlignment, @@ -401,6 +398,12 @@ bool initialize(AuroraBackend auroraBackend) { ? WGPU_LIMIT_U32_UNDEFINED : supportedLimits.minStorageBufferOffsetAlignment, }; + Log.info( + "Using limits\n maxTextureDimension1D: {}\n maxTextureDimension2D: {}\n maxTextureDimension3D: {}\n " + "minUniformBufferOffsetAlignment: {}\n minStorageBufferOffsetAlignment: {}", + requiredLimits.maxTextureDimension1D, requiredLimits.maxTextureDimension2D, + requiredLimits.maxTextureDimension3D, requiredLimits.minUniformBufferOffsetAlignment, + requiredLimits.minStorageBufferOffsetAlignment); std::vector requiredFeatures; wgpu::SupportedFeatures supportedFeatures; g_adapter.GetFeatures(&supportedFeatures); @@ -442,22 +445,20 @@ bool initialize(AuroraBackend auroraBackend) { }); deviceDescriptor.SetUncapturedErrorCallback( [](const wgpu::Device& device, wgpu::ErrorType type, wgpu::StringView message) { - FATAL("WebGPU error {}: {}", static_cast(type), message); - }); - deviceDescriptor.SetDeviceLostCallback( - wgpu::CallbackMode::AllowSpontaneous, - [](const wgpu::Device& device, wgpu::DeviceLostReason reason, wgpu::StringView message) { - Log.warn("Device lost: {}", message); - }); - const auto future = g_adapter.RequestDevice( - &deviceDescriptor, wgpu::CallbackMode::WaitAnyOnly, - [](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) { - if (status == wgpu::RequestDeviceStatus::Success) { - g_device = std::move(device); - } else { - Log.warn("Device request failed: {}", message); - } + FATAL("WebGPU error {}: {}", underlying(type), message); }); + deviceDescriptor.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous, + [](const wgpu::Device& device, wgpu::DeviceLostReason reason, + wgpu::StringView message) { Log.warn("Device lost: {}", message); }); + const auto future = + g_adapter.RequestDevice(&deviceDescriptor, wgpu::CallbackMode::WaitAnyOnly, + [](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) { + if (status == wgpu::RequestDeviceStatus::Success) { + g_device = std::move(device); + } else { + Log.warn("Device request failed: {}", message); + } + }); const auto status = g_instance.WaitAny(future, 5000000000); if (status != wgpu::WaitStatus::Success) { Log.error("Failed to create device: {}", magic_enum::enum_name(status));