diff --git a/cmake/aurora_gx.cmake b/cmake/aurora_gx.cmake index 2de474c..8a8939e 100644 --- a/cmake/aurora_gx.cmake +++ b/cmake/aurora_gx.cmake @@ -4,6 +4,8 @@ add_library(aurora_gx STATIC lib/gfx/gx.cpp lib/gfx/gx_shader.cpp lib/gfx/texture_convert.cpp + lib/gfx/display_list.cpp + lib/gfx/shader_info.cpp lib/gfx/model/shader.cpp lib/dolphin/gx/GXBump.cpp lib/dolphin/gx/GXCull.cpp diff --git a/include/dolphin/gx/GXEnum.h b/include/dolphin/gx/GXEnum.h index 902ee64..426c099 100644 --- a/include/dolphin/gx/GXEnum.h +++ b/include/dolphin/gx/GXEnum.h @@ -751,6 +751,13 @@ typedef enum { GX_MAX_TLUTFMT, } GXTlutFmt; +typedef enum _GXTexCacheSize { + GX_TEXCACHE_32K, + GX_TEXCACHE_128K, + GX_TEXCACHE_512K, + GX_TEXCACHE_NONE +} GXTexCacheSize; + #ifdef __cplusplus } #endif diff --git a/include/dolphin/gx/GXStruct.h b/include/dolphin/gx/GXStruct.h index ecb650c..9991c9a 100644 --- a/include/dolphin/gx/GXStruct.h +++ b/include/dolphin/gx/GXStruct.h @@ -93,6 +93,14 @@ typedef struct { s16 a; } GXColorS10; +typedef struct _GXTexRegion { + u32 dummy[4]; +} GXTexRegion; + +typedef struct _GXTlutRegion { + u32 dummy[4]; +} GXTlutRegion; + #ifdef __cplusplus } #endif diff --git a/include/dolphin/gx/GXTexture.h b/include/dolphin/gx/GXTexture.h index 942811b..c7cc8e5 100644 --- a/include/dolphin/gx/GXTexture.h +++ b/include/dolphin/gx/GXTexture.h @@ -8,6 +8,8 @@ extern "C" { #endif +typedef GXTexRegion* (*GXTexRegionCallback)(const GXTexObj* obj, GXTexMapID id); + void GXInitTexObj(GXTexObj* obj, const void* data, u16 width, u16 height, u32 format, GXTexWrapMode wrapS, GXTexWrapMode wrapT, GXBool mipmap); void GXInitTexObjCI(GXTexObj* obj, const void* data, u16 width, u16 height, GXCITexFmt format, GXTexWrapMode wrapS, @@ -21,6 +23,11 @@ void GXInvalidateTexAll(); void GXInitTexObjWrapMode(GXTexObj* obj, GXTexWrapMode s, GXTexWrapMode t); void GXInitTlutObj(GXTlutObj* obj, const void* data, GXTlutFmt format, u16 entries); void GXLoadTlut(const GXTlutObj* obj, GXTlut idx); +void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts); +void GXInitTexCacheRegion(GXTexRegion* region, GXBool is_32b_mipmap, u32 tmem_even, GXTexCacheSize size_even, + u32 tmem_odd, GXTexCacheSize size_odd); +GXTexRegionCallback GXSetTexRegionCallback(GXTexRegionCallback callback); +void GXInvalidateTexRegion(const GXTexRegion* region); #ifdef __cplusplus } diff --git a/lib/dolphin/gx/GXTev.cpp b/lib/dolphin/gx/GXTev.cpp index aa96cbe..37d9620 100644 --- a/lib/dolphin/gx/GXTev.cpp +++ b/lib/dolphin/gx/GXTev.cpp @@ -77,7 +77,9 @@ void GXSetTevOrder(GXTevStageID id, GXTexCoordID tcid, GXTexMapID tmid, GXChanne update_gx_state(stage.channelId, cid); } -// TODO GXSetZTexture +void GXSetZTexture(GXZTexOp op, GXTexFmt fmt, u32 bias) { + // TODO +} void GXSetNumTevStages(u8 num) { update_gx_state(g_gxState.numTevStages, num); } diff --git a/lib/dolphin/gx/GXTexture.cpp b/lib/dolphin/gx/GXTexture.cpp index 8ad1ab9..628a281 100644 --- a/lib/dolphin/gx/GXTexture.cpp +++ b/lib/dolphin/gx/GXTexture.cpp @@ -233,7 +233,9 @@ void GXInvalidateTexAll() { // TODO GXSetTexRegionCallback // TODO GXSetTlutRegionCallback // TODO GXLoadTexObjPreLoaded -// TODO GXSetTexCoordScaleManually +void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts) { + // TODO +} // TODO GXSetTexCoordCylWrap // TODO GXSetTexCoordBias } \ No newline at end of file diff --git a/lib/dolphin/gx/GXVert.cpp b/lib/dolphin/gx/GXVert.cpp index e4af007..c96b330 100644 --- a/lib/dolphin/gx/GXVert.cpp +++ b/lib/dolphin/gx/GXVert.cpp @@ -3,6 +3,7 @@ #include "aurora/math.hpp" #include "../../gfx/model/shader.hpp" #include "../../gfx/gx_fmt.hpp" +#include "../../gfx/shader_info.hpp" #include #include diff --git a/lib/gfx/display_list.cpp b/lib/gfx/display_list.cpp new file mode 100644 index 0000000..a946c34 --- /dev/null +++ b/lib/gfx/display_list.cpp @@ -0,0 +1,288 @@ +#include "display_list.hpp" + +#include "gx.hpp" +#include "gx_fmt.hpp" + +namespace aurora::gfx::gx { +static Module Log("aurora::gfx::model"); + +struct DisplayListCache { + ByteBuffer vtxBuf; + ByteBuffer idxBuf; + GXVtxFmt fmt; + + DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, GXVtxFmt fmt) + : vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), fmt(fmt) {} +}; + +static absl::flat_hash_map sCachedDisplayLists; + +static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount) { + using gx::g_gxState; + struct { + u8 count; + GXCompType type; + } attrArrays[GX_VA_MAX_ATTR] = {}; + u32 vtxSize = 0; + u32 outVtxSize = 0; + + // Calculate attribute offsets and vertex size + for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) { + const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr]; + switch (g_gxState.vtxDesc[attr]) { + DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]); + case GX_NONE: + break; + case GX_DIRECT: +#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3)) + switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) { + DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type); + case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32): + case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32): + attrArrays[attr].count = 3; + attrArrays[attr].type = GX_F32; + vtxSize += 12; + outVtxSize += 12; + break; + case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_S16): + case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_S16): + attrArrays[attr].count = 3; + attrArrays[attr].type = GX_S16; + vtxSize += 6; + outVtxSize += 12; + break; + case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_F32): + case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_F32): + case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_F32): + case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_F32): + case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_F32): + case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_F32): + case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_F32): + case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_F32): + attrArrays[attr].count = 2; + attrArrays[attr].type = GX_F32; + vtxSize += 8; + outVtxSize += 8; + break; + case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_S16): + case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_S16): + case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_S16): + case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_S16): + case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_S16): + case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_S16): + case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_S16): + case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_S16): + attrArrays[attr].count = 2; + attrArrays[attr].type = GX_S16; + vtxSize += 4; + outVtxSize += 8; + break; + case COMBINE(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8): + case COMBINE(GX_VA_CLR1, GX_CLR_RGBA, GX_RGBA8): + attrArrays[attr].count = 4; + attrArrays[attr].type = GX_RGBA8; + vtxSize += 4; + outVtxSize += 16; + break; + } +#undef COMBINE + break; + case GX_INDEX8: + ++vtxSize; + outVtxSize += 2; + break; + case GX_INDEX16: + vtxSize += 2; + outVtxSize += 2; + break; + } + } + // Align to 4 + int rem = outVtxSize % 4; + int padding = 0; + if (rem != 0) { + padding = 4 - rem; + outVtxSize += padding; + } + + // Build vertex buffer + buf.reserve_extra(vtxCount * outVtxSize); + std::array out{}; + for (u32 v = 0; v < vtxCount; ++v) { + for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) { + if (g_gxState.vtxDesc[attr] == GX_INDEX8) { + buf.append(static_cast(*ptr)); + ++ptr; + } else if (g_gxState.vtxDesc[attr] == GX_INDEX16) { + buf.append(bswap(*reinterpret_cast(ptr))); + ptr += 2; + } + if (g_gxState.vtxDesc[attr] != GX_DIRECT) { + continue; + } + const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr]; + u8 count = attrArrays[attr].count; + switch (attrArrays[attr].type) { + case GX_U8: + for (int i = 0; i < count; ++i) { + const auto value = reinterpret_cast(ptr)[i]; + out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); + } + buf.append(out.data(), sizeof(f32) * count); + ptr += count; + break; + case GX_S8: + for (int i = 0; i < count; ++i) { + const auto value = reinterpret_cast(ptr)[i]; + out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); + } + buf.append(out.data(), sizeof(f32) * count); + ptr += count; + break; + case GX_U16: + for (int i = 0; i < count; ++i) { + const auto value = bswap(reinterpret_cast(ptr)[i]); + out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); + } + buf.append(out.data(), sizeof(f32) * count); + ptr += count * sizeof(u16); + break; + case GX_S16: + for (int i = 0; i < count; ++i) { + const auto value = bswap(reinterpret_cast(ptr)[i]); + out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); + } + buf.append(out.data(), sizeof(f32) * count); + ptr += count * sizeof(s16); + break; + case GX_F32: + for (int i = 0; i < count; ++i) { + out[i] = bswap(reinterpret_cast(ptr)[i]); + } + buf.append(out.data(), sizeof(f32) * count); + ptr += count * sizeof(f32); + break; + case GX_RGBA8: + out[0] = static_cast(ptr[0]) / 255.f; + out[1] = static_cast(ptr[1]) / 255.f; + out[2] = static_cast(ptr[2]) / 255.f; + out[3] = static_cast(ptr[3]) / 255.f; + buf.append(out.data(), sizeof(f32) * 4); + ptr += sizeof(u32); + break; + } + } + if (padding > 0) { + buf.append_zeroes(padding); + } + } + + return vtxSize; +} + +static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u16 vtxCount) { + u16 numIndices = 0; + if (prim == GX_TRIANGLES) { + buf.reserve_extra(vtxCount * sizeof(u16)); + for (u16 v = 0; v < vtxCount; ++v) { + const u16 idx = vtxStart + v; + buf.append(idx); + ++numIndices; + } + } else if (prim == GX_TRIANGLEFAN) { + buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16)); + for (u16 v = 0; v < vtxCount; ++v) { + const u16 idx = vtxStart + v; + if (v < 3) { + buf.append(idx); + ++numIndices; + continue; + } + buf.append(std::array{vtxStart, static_cast(idx - 1), idx}); + numIndices += 3; + } + } else if (prim == GX_TRIANGLESTRIP) { + buf.reserve_extra(((static_cast(vtxCount) - 3) * 3 + 3) * sizeof(u16)); + for (u16 v = 0; v < vtxCount; ++v) { + const u16 idx = vtxStart + v; + if (v < 3) { + buf.append(idx); + ++numIndices; + continue; + } + if ((v & 1) == 0) { + buf.append(std::array{static_cast(idx - 2), static_cast(idx - 1), idx}); + } else { + buf.append(std::array{static_cast(idx - 1), static_cast(idx - 2), idx}); + } + numIndices += 3; + } + } else + UNLIKELY FATAL("unsupported primitive type {}", static_cast(prim)); + return numIndices; +} + +auto process_display_list(const u8* dlStart, u32 dlSize) -> DisplayListResult { + const auto hash = xxh3_hash_s(dlStart, dlSize, 0); + Range vertRange, idxRange; + u32 numIndices = 0; + GXVtxFmt fmt = GX_MAX_VTXFMT; + auto it = sCachedDisplayLists.find(hash); + if (it != sCachedDisplayLists.end()) { + const auto& cache = it->second; + numIndices = cache.idxBuf.size() / 2; + vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size()); + idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size()); + fmt = cache.fmt; + } else { + const u8* data = dlStart; + u32 pos = 0; + ByteBuffer vtxBuf; + ByteBuffer idxBuf; + u16 vtxStart = 0; + + while (pos < dlSize) { + u8 cmd = data[pos++]; + + u8 opcode = cmd & GX_OPCODE_MASK; + switch (opcode) { + DEFAULT_FATAL("unimplemented opcode: {}", opcode); + case GX_NOP: + continue; + case GX_DRAW_QUADS: + case GX_DRAW_TRIANGLES: + case GX_DRAW_TRIANGLE_STRIP: + case GX_DRAW_TRIANGLE_FAN: { + const auto prim = static_cast(opcode); + const auto newFmt = static_cast(cmd & GX_VAT_MASK); + if (fmt != GX_MAX_VTXFMT && fmt != newFmt) { + FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt); + } + fmt = newFmt; + u16 vtxCount = bswap(*reinterpret_cast(data + pos)); + pos += 2; + pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount); + numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount); + vtxStart += vtxCount; + break; + } + case GX_DRAW_LINES: + case GX_DRAW_LINE_STRIP: + case GX_DRAW_POINTS: + FATAL("unimplemented prim type: {}", opcode); + break; + } + } + vertRange = push_verts(vtxBuf.data(), vtxBuf.size()); + idxRange = push_indices(idxBuf.data(), idxBuf.size()); + sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), fmt); + } + + return { + .vertRange = vertRange, + .idxRange = idxRange, + .numIndices = numIndices, + .fmt = fmt, + }; +} +} // namespace aurora::gfx::gx \ No newline at end of file diff --git a/lib/gfx/display_list.hpp b/lib/gfx/display_list.hpp new file mode 100644 index 0000000..4a8774f --- /dev/null +++ b/lib/gfx/display_list.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "gx.hpp" + +namespace aurora::gfx::gx { +struct DisplayListResult { + Range vertRange; + Range idxRange; + u32 numIndices; + GXVtxFmt fmt; +}; + +auto process_display_list(const u8* dlStart, u32 dlSize) -> DisplayListResult; +}; // namespace aurora::gfx::gx diff --git a/lib/gfx/gx.cpp b/lib/gfx/gx.cpp index 577d260..99df6e4 100644 --- a/lib/gfx/gx.cpp +++ b/lib/gfx/gx.cpp @@ -316,104 +316,6 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV }; } -Range build_uniform(const ShaderInfo& info) noexcept { - auto [buf, range] = map_uniform(info.uniformSize); - { - buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]); - buf.append(g_gxState.proj); - } - for (int i = 0; i < info.loadsTevReg.size(); ++i) { - if (!info.loadsTevReg.test(i)) { - continue; - } - buf.append(g_gxState.colorRegs[i]); - } - if (info.lightingEnabled) { - // Lights - static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights); - buf.append(g_gxState.lights); - // Light state for all channels - for (int i = 0; i < 4; ++i) { - buf.append(g_gxState.colorChannelState[i].lightMask.to_ulong()); - } - } - for (int i = 0; i < info.sampledColorChannels.size(); ++i) { - if (!info.sampledColorChannels.test(i)) { - continue; - } - const auto& ccc = g_gxState.colorChannelConfig[i]; - const auto& ccs = g_gxState.colorChannelState[i]; - if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) { - buf.append(ccs.ambColor); - } - if (ccc.matSrc == GX_SRC_REG) { - buf.append(ccs.matColor); - } - const auto& ccca = g_gxState.colorChannelConfig[i + GX_ALPHA0]; - const auto& ccsa = g_gxState.colorChannelState[i + GX_ALPHA0]; - if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) { - buf.append(ccsa.ambColor); - } - if (ccca.matSrc == GX_SRC_REG) { - buf.append(ccsa.matColor); - } - } - for (int i = 0; i < info.sampledKColors.size(); ++i) { - if (!info.sampledKColors.test(i)) { - continue; - } - buf.append(g_gxState.kcolors[i]); - } - for (int i = 0; i < info.usesTexMtx.size(); ++i) { - if (!info.usesTexMtx.test(i)) { - continue; - } - switch (info.texMtxTypes[i]) { - DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i])); - case GX_TG_MTX2x4: - if (std::holds_alternative>(g_gxState.texMtxs[i])) { - buf.append(std::get>(g_gxState.texMtxs[i])); - } else - UNLIKELY FATAL("expected 2x4 mtx in idx {}", i); - break; - case GX_TG_MTX3x4: - if (std::holds_alternative>(g_gxState.texMtxs[i])) { - buf.append(std::get>(g_gxState.texMtxs[i])); - } else - UNLIKELY FATAL("expected 3x4 mtx in idx {}", i); - break; - } - } - for (int i = 0; i < info.usesPTTexMtx.size(); ++i) { - if (!info.usesPTTexMtx.test(i)) { - continue; - } - buf.append(g_gxState.ptTexMtxs[i]); - } - if (info.usesFog) { - const auto& state = g_gxState.fog; - Fog fog{.color = state.color}; - if (state.nearZ != state.farZ && state.startZ != state.endZ) { - const float depthRange = state.farZ - state.nearZ; - const float fogRange = state.endZ - state.startZ; - fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange); - fog.b = state.farZ / depthRange; - fog.c = state.startZ / fogRange; - } - buf.append(fog); - } - for (int i = 0; i < info.sampledTextures.size(); ++i) { - if (!info.sampledTextures.test(i)) { - continue; - } - const auto& tex = get_texture(static_cast(i)); - CHECK(tex, "unbound texture {}", i); - buf.append(tex.texObj.lodBias); - } - g_gxState.stateDirty = false; - return range; -} - static absl::flat_hash_map sUniformBindGroupLayouts; static absl::flat_hash_map> sTextureBindGroupLayouts; diff --git a/lib/gfx/gx.hpp b/lib/gfx/gx.hpp index 6a18100..4c5d16d 100644 --- a/lib/gfx/gx.hpp +++ b/lib/gfx/gx.hpp @@ -429,10 +429,7 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info, ArrayRef vtxBuffers, wgpu::ShaderModule shader, const char* label) noexcept; -ShaderInfo build_shader_info(const ShaderConfig& config) noexcept; wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& info) noexcept; -// Range build_vertex_buffer(const GXShaderInfo& info) noexcept; -Range build_uniform(const ShaderInfo& info) noexcept; GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept; GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config, const BindGroupRanges& ranges) noexcept; diff --git a/lib/gfx/gx_shader.cpp b/lib/gfx/gx_shader.cpp index 963be07..3aad44a 100644 --- a/lib/gfx/gx_shader.cpp +++ b/lib/gfx/gx_shader.cpp @@ -38,102 +38,6 @@ static inline std::string_view chan_comp(GXTevColorChan chan) noexcept { } } -static void color_arg_reg_info(GXTevColorArg arg, const TevStage& stage, ShaderInfo& info) { - switch (arg) { - case GX_CC_CPREV: - case GX_CC_APREV: - if (!info.writesTevReg.test(GX_TEVPREV)) { - info.loadsTevReg.set(GX_TEVPREV); - } - break; - case GX_CC_C0: - case GX_CC_A0: - if (!info.writesTevReg.test(GX_TEVREG0)) { - info.loadsTevReg.set(GX_TEVREG0); - } - break; - case GX_CC_C1: - case GX_CC_A1: - if (!info.writesTevReg.test(GX_TEVREG1)) { - info.loadsTevReg.set(GX_TEVREG1); - } - break; - case GX_CC_C2: - case GX_CC_A2: - if (!info.writesTevReg.test(GX_TEVREG2)) { - info.loadsTevReg.set(GX_TEVREG2); - } - break; - case GX_CC_TEXC: - case GX_CC_TEXA: - CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound"); - CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound"); - info.sampledTexCoords.set(stage.texCoordId); - info.sampledTextures.set(stage.texMapId); - break; - case GX_CC_RASC: - case GX_CC_RASA: - if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) { - info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0); - } - break; - case GX_CC_KONST: - switch (stage.kcSel) { - case GX_TEV_KCSEL_K0: - case GX_TEV_KCSEL_K0_R: - case GX_TEV_KCSEL_K0_G: - case GX_TEV_KCSEL_K0_B: - case GX_TEV_KCSEL_K0_A: - info.sampledKColors.set(0); - break; - case GX_TEV_KCSEL_K1: - case GX_TEV_KCSEL_K1_R: - case GX_TEV_KCSEL_K1_G: - case GX_TEV_KCSEL_K1_B: - case GX_TEV_KCSEL_K1_A: - info.sampledKColors.set(1); - break; - case GX_TEV_KCSEL_K2: - case GX_TEV_KCSEL_K2_R: - case GX_TEV_KCSEL_K2_G: - case GX_TEV_KCSEL_K2_B: - case GX_TEV_KCSEL_K2_A: - info.sampledKColors.set(2); - break; - case GX_TEV_KCSEL_K3: - case GX_TEV_KCSEL_K3_R: - case GX_TEV_KCSEL_K3_G: - case GX_TEV_KCSEL_K3_B: - case GX_TEV_KCSEL_K3_A: - info.sampledKColors.set(3); - break; - default: - break; - } - break; - default: - break; - } -} - -static bool formatHasAlpha(u32 format) { - switch (format) { - case GX_TF_IA4: - case GX_TF_IA8: - case GX_TF_RGB5A3: - case GX_TF_RGBA8: - case GX_TF_CMPR: - case GX_CTF_RA4: - case GX_CTF_RA8: - case GX_CTF_YUVA8: - case GX_CTF_A8: - case GX_TF_RGBA8_PC: - return true; - default: - return false; - } -} - static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const ShaderConfig& config, const TevStage& stage) { switch (arg) { @@ -260,74 +164,6 @@ static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const Shade } } -static void alpha_arg_reg_info(GXTevAlphaArg arg, const TevStage& stage, ShaderInfo& info) { - switch (arg) { - case GX_CA_APREV: - if (!info.writesTevReg.test(GX_TEVPREV)) { - info.loadsTevReg.set(GX_TEVPREV); - } - break; - case GX_CA_A0: - if (!info.writesTevReg.test(GX_TEVREG0)) { - info.loadsTevReg.set(GX_TEVREG0); - } - break; - case GX_CA_A1: - if (!info.writesTevReg.test(GX_TEVREG1)) { - info.loadsTevReg.set(GX_TEVREG1); - } - break; - case GX_CA_A2: - if (!info.writesTevReg.test(GX_TEVREG2)) { - info.loadsTevReg.set(GX_TEVREG2); - } - break; - case GX_CA_TEXA: - CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound"); - CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound"); - info.sampledTexCoords.set(stage.texCoordId); - info.sampledTextures.set(stage.texMapId); - break; - case GX_CA_RASA: - if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) { - info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0); - } - break; - case GX_CA_KONST: - switch (stage.kaSel) { - case GX_TEV_KASEL_K0_R: - case GX_TEV_KASEL_K0_G: - case GX_TEV_KASEL_K0_B: - case GX_TEV_KASEL_K0_A: - info.sampledKColors.set(0); - break; - case GX_TEV_KASEL_K1_R: - case GX_TEV_KASEL_K1_G: - case GX_TEV_KASEL_K1_B: - case GX_TEV_KASEL_K1_A: - info.sampledKColors.set(1); - break; - case GX_TEV_KASEL_K2_R: - case GX_TEV_KASEL_K2_G: - case GX_TEV_KASEL_K2_B: - case GX_TEV_KASEL_K2_A: - info.sampledKColors.set(2); - break; - case GX_TEV_KASEL_K3_R: - case GX_TEV_KASEL_K3_G: - case GX_TEV_KASEL_K3_B: - case GX_TEV_KASEL_K3_A: - info.sampledKColors.set(3); - break; - default: - break; - } - break; - default: - break; - } -} - static std::string alpha_arg_reg(GXTevAlphaArg arg, size_t stageIdx, const ShaderConfig& config, const TevStage& stage) { switch (arg) { @@ -549,109 +385,6 @@ constexpr std::array VtxAttributeNames{ "pos_mtx_array", "nrm_mtx_array", "tex_mtx_array", "light_array", "nbt", }; -ShaderInfo build_shader_info(const ShaderConfig& config) noexcept { - // const auto hash = xxh3_hash(config); - // const auto it = g_gxCachedShaders.find(hash); - // if (it != g_gxCachedShaders.end()) { - // return it->second.second; - // } - - ShaderInfo info{ - .uniformSize = sizeof(PnMtx) + sizeof(Mat4x4), // pos_mtx, nrm_mtx, proj - }; - for (int i = 0; i < config.tevStageCount; ++i) { - const auto& stage = config.tevStages[i]; - // Color pass - color_arg_reg_info(stage.colorPass.a, stage, info); - color_arg_reg_info(stage.colorPass.b, stage, info); - color_arg_reg_info(stage.colorPass.c, stage, info); - color_arg_reg_info(stage.colorPass.d, stage, info); - info.writesTevReg.set(stage.colorOp.outReg); - - // Alpha pass - alpha_arg_reg_info(stage.alphaPass.a, stage, info); - alpha_arg_reg_info(stage.alphaPass.b, stage, info); - alpha_arg_reg_info(stage.alphaPass.c, stage, info); - alpha_arg_reg_info(stage.alphaPass.d, stage, info); - if (!info.writesTevReg.test(stage.alphaOp.outReg)) { - // If we're writing alpha to a register that's not been - // written to in the shader, load from uniform buffer - info.loadsTevReg.set(stage.alphaOp.outReg); - info.writesTevReg.set(stage.alphaOp.outReg); - } - } - info.uniformSize += info.loadsTevReg.count() * sizeof(Vec4); - for (int i = 0; i < info.sampledColorChannels.size(); ++i) { - if (info.sampledColorChannels.test(i)) { - const auto& cc = config.colorChannels[i]; - const auto& cca = config.colorChannels[i + GX_ALPHA0]; - if (cc.lightingEnabled || cca.lightingEnabled) { - info.lightingEnabled = true; - } - } - } - if (info.lightingEnabled) { - // Lights + light state for all channels - info.uniformSize += 16 + sizeof(Light) * GX::MaxLights; - } - for (int i = 0; i < info.sampledColorChannels.size(); ++i) { - if (info.sampledColorChannels.test(i)) { - const auto& cc = config.colorChannels[i]; - if (cc.lightingEnabled && cc.ambSrc == GX_SRC_REG) { - info.uniformSize += sizeof(Vec4); - } - if (cc.matSrc == GX_SRC_REG) { - info.uniformSize += sizeof(Vec4); - } - const auto& cca = config.colorChannels[i + GX_ALPHA0]; - if (cca.lightingEnabled && cca.ambSrc == GX_SRC_REG) { - info.uniformSize += sizeof(Vec4); - } - if (cca.matSrc == GX_SRC_REG) { - info.uniformSize += sizeof(Vec4); - } - } - } - info.uniformSize += info.sampledKColors.count() * sizeof(Vec4); - for (int i = 0; i < info.sampledTexCoords.size(); ++i) { - if (!info.sampledTexCoords.test(i)) { - continue; - } - const auto& tcg = config.tcgs[i]; - if (tcg.mtx != GX_IDENTITY) { - u32 texMtxIdx = (tcg.mtx - GX_TEXMTX0) / 3; - info.usesTexMtx.set(texMtxIdx); - info.texMtxTypes[texMtxIdx] = tcg.type; - } - if (tcg.postMtx != GX_PTIDENTITY) { - u32 postMtxIdx = (tcg.postMtx - GX_PTTEXMTX0) / 3; - info.usesPTTexMtx.set(postMtxIdx); - } - } - for (int i = 0; i < info.usesTexMtx.size(); ++i) { - if (info.usesTexMtx.test(i)) { - switch (info.texMtxTypes[i]) { - case GX_TG_MTX2x4: - info.uniformSize += sizeof(Mat2x4); - break; - case GX_TG_MTX3x4: - info.uniformSize += sizeof(Mat3x4); - break; - default: - break; - } - } - } - info.uniformSize += info.usesPTTexMtx.count() * sizeof(Mat3x4); - if (config.fogType != GX_FOG_NONE) { - info.usesFog = true; - info.uniformSize += sizeof(Fog); - } - info.uniformSize += info.sampledTextures.count() * sizeof(u32); - info.uniformSize = align_uniform(info.uniformSize); - return info; -} - struct StorageLoadResult { std::string attrLoad; std::string_view arrType; @@ -947,6 +680,8 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in vtxInAttrs += fmt::format("@location({}) in_clr{}: vec4f", locIdx++, attr - GX_VA_CLR0); } else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) { vtxInAttrs += fmt::format("@location({}) in_tex{}_uv: vec2f", locIdx++, attr - GX_VA_TEX0); + } else { + FATAL("unhandled vtx attr {}", underlying(attr)); } } vtxXfrAttrsPre += fmt::format( @@ -1416,7 +1151,7 @@ fn fetch_i16_3(p: ptr>, idx: u32, frac: u32) -> vec3 {{ var o0 = select(extractBits(v0, 0, 16), extractBits(v0, 16, 16), r); var o1 = select(extractBits(v0, 16, 16), extractBits(v1, 0, 16), r); var o2 = select(extractBits(v1, 0, 16), extractBits(v1, 16, 16), r); - return vec3(f32(o0), f32(o1), f32(o2)) / f32(1 << frac); + return vec3(f32(o0), f32(o1), f32(o2)) / f32(1u << frac); }} {10} struct Uniform {{ diff --git a/lib/gfx/model/shader.cpp b/lib/gfx/model/shader.cpp index 80cd9b5..da26c9d 100644 --- a/lib/gfx/model/shader.cpp +++ b/lib/gfx/model/shader.cpp @@ -2,298 +2,20 @@ #include "../../webgpu/gpu.hpp" #include "../gx_fmt.hpp" +#include "../display_list.hpp" +#include "../shader_info.hpp" #include namespace aurora::gfx::model { static Module Log("aurora::gfx::model"); -using IndexedAttrs = std::array; -struct DisplayListCache { - ByteBuffer vtxBuf; - ByteBuffer idxBuf; - IndexedAttrs indexedAttrs; - GXVtxFmt fmt; - - DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs, GXVtxFmt fmt) - : vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs), fmt(fmt) {} -}; - -static absl::flat_hash_map sCachedDisplayLists; - -static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount, - IndexedAttrs& indexedAttrs) { - using gx::g_gxState; - struct { - u8 count; - GXCompType type; - } attrArrays[GX_VA_MAX_ATTR] = {}; - u32 vtxSize = 0; - u32 outVtxSize = 0; - - // Calculate attribute offsets and vertex size - for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) { - const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr]; - switch (g_gxState.vtxDesc[attr]) { - DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]); - case GX_NONE: - break; - case GX_DIRECT: -#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3)) - switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) { - DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type); - case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32): - case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32): - attrArrays[attr].count = 3; - attrArrays[attr].type = GX_F32; - vtxSize += 12; - outVtxSize += 12; - break; - case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_S16): - case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_S16): - attrArrays[attr].count = 3; - attrArrays[attr].type = GX_S16; - vtxSize += 6; - outVtxSize += 12; - break; - case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_F32): - case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_F32): - case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_F32): - case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_F32): - case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_F32): - case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_F32): - case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_F32): - case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_F32): - attrArrays[attr].count = 2; - attrArrays[attr].type = GX_F32; - vtxSize += 8; - outVtxSize += 8; - break; - case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_S16): - case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_S16): - case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_S16): - case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_S16): - case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_S16): - case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_S16): - case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_S16): - case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_S16): - attrArrays[attr].count = 2; - attrArrays[attr].type = GX_S16; - vtxSize += 4; - outVtxSize += 8; - break; - case COMBINE(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8): - case COMBINE(GX_VA_CLR1, GX_CLR_RGBA, GX_RGBA8): - attrArrays[attr].count = 4; - attrArrays[attr].type = GX_RGBA8; - vtxSize += 4; - outVtxSize += 16; - break; - } -#undef COMBINE - break; - case GX_INDEX8: - ++vtxSize; - outVtxSize += 2; - indexedAttrs[attr] = true; - break; - case GX_INDEX16: - vtxSize += 2; - outVtxSize += 2; - indexedAttrs[attr] = true; - break; - } - } - // Align to 4 - int rem = outVtxSize % 4; - int padding = 0; - if (rem != 0) { - padding = 4 - rem; - outVtxSize += padding; - } - - // Build vertex buffer - buf.reserve_extra(vtxCount * outVtxSize); - std::array out{}; - for (u32 v = 0; v < vtxCount; ++v) { - for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) { - if (g_gxState.vtxDesc[attr] == GX_INDEX8) { - buf.append(static_cast(*ptr)); - ++ptr; - } else if (g_gxState.vtxDesc[attr] == GX_INDEX16) { - buf.append(bswap(*reinterpret_cast(ptr))); - ptr += 2; - } - if (g_gxState.vtxDesc[attr] != GX_DIRECT) { - continue; - } - const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr]; - u8 count = attrArrays[attr].count; - switch (attrArrays[attr].type) { - case GX_U8: - for (int i = 0; i < count; ++i) { - const auto value = reinterpret_cast(ptr)[i]; - out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); - } - buf.append(out.data(), sizeof(f32) * count); - ptr += count; - break; - case GX_S8: - for (int i = 0; i < count; ++i) { - const auto value = reinterpret_cast(ptr)[i]; - out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); - } - buf.append(out.data(), sizeof(f32) * count); - ptr += count; - break; - case GX_U16: - for (int i = 0; i < count; ++i) { - const auto value = bswap(reinterpret_cast(ptr)[i]); - out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); - } - buf.append(out.data(), sizeof(f32) * count); - ptr += count * sizeof(u16); - break; - case GX_S16: - for (int i = 0; i < count; ++i) { - const auto value = bswap(reinterpret_cast(ptr)[i]); - out[i] = static_cast(value) / static_cast(1 << attrFmt.frac); - } - buf.append(out.data(), sizeof(f32) * count); - ptr += count * sizeof(s16); - break; - case GX_F32: - for (int i = 0; i < count; ++i) { - out[i] = bswap(reinterpret_cast(ptr)[i]); - } - buf.append(out.data(), sizeof(f32) * count); - ptr += count * sizeof(f32); - break; - case GX_RGBA8: - out[0] = static_cast(ptr[0]) / 255.f; - out[1] = static_cast(ptr[1]) / 255.f; - out[2] = static_cast(ptr[2]) / 255.f; - out[3] = static_cast(ptr[3]) / 255.f; - buf.append(out.data(), sizeof(f32) * 4); - ptr += sizeof(u32); - break; - } - } - if (padding > 0) { - buf.append_zeroes(padding); - } - } - - return vtxSize; -} - -static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u16 vtxCount) { - u16 numIndices = 0; - if (prim == GX_TRIANGLES) { - buf.reserve_extra(vtxCount * sizeof(u16)); - for (u16 v = 0; v < vtxCount; ++v) { - const u16 idx = vtxStart + v; - buf.append(idx); - ++numIndices; - } - } else if (prim == GX_TRIANGLEFAN) { - buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16)); - for (u16 v = 0; v < vtxCount; ++v) { - const u16 idx = vtxStart + v; - if (v < 3) { - buf.append(idx); - ++numIndices; - continue; - } - buf.append(std::array{vtxStart, static_cast(idx - 1), idx}); - numIndices += 3; - } - } else if (prim == GX_TRIANGLESTRIP) { - buf.reserve_extra(((static_cast(vtxCount) - 3) * 3 + 3) * sizeof(u16)); - for (u16 v = 0; v < vtxCount; ++v) { - const u16 idx = vtxStart + v; - if (v < 3) { - buf.append(idx); - ++numIndices; - continue; - } - if ((v & 1) == 0) { - buf.append(std::array{static_cast(idx - 2), static_cast(idx - 1), idx}); - } else { - buf.append(std::array{static_cast(idx - 1), static_cast(idx - 2), idx}); - } - numIndices += 3; - } - } else - UNLIKELY FATAL("unsupported primitive type {}", static_cast(prim)); - return numIndices; -} - void queue_surface(const u8* dlStart, u32 dlSize) noexcept { - const auto hash = xxh3_hash_s(dlStart, dlSize, 0); - Range vertRange, idxRange; - u32 numIndices = 0; - IndexedAttrs indexedAttrs{}; - GXVtxFmt fmt = GX_MAX_VTXFMT; - auto it = sCachedDisplayLists.find(hash); - if (it != sCachedDisplayLists.end()) { - const auto& cache = it->second; - numIndices = cache.idxBuf.size() / 2; - vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size()); - idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size()); - indexedAttrs = cache.indexedAttrs; - fmt = cache.fmt; - } else { - const u8* data = dlStart; - u32 pos = 0; - ByteBuffer vtxBuf; - ByteBuffer idxBuf; - u16 vtxStart = 0; - - while (pos < dlSize) { - u8 cmd = data[pos++]; - - u8 opcode = cmd & GX_OPCODE_MASK; - switch (opcode) { - DEFAULT_FATAL("unimplemented opcode: {}", opcode); - case GX_NOP: - continue; - case GX_LOAD_BP_REG: - // TODO? - pos += 4; - break; - case GX_DRAW_QUADS: - case GX_DRAW_TRIANGLES: - case GX_DRAW_TRIANGLE_STRIP: - case GX_DRAW_TRIANGLE_FAN: { - const auto prim = static_cast(opcode); - const auto newFmt = static_cast(cmd & GX_VAT_MASK); - if (fmt != GX_MAX_VTXFMT && fmt != newFmt) { - FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt); - } - fmt = newFmt; - u16 vtxCount = bswap(*reinterpret_cast(data + pos)); - pos += 2; - pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount, indexedAttrs); - numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount); - vtxStart += vtxCount; - break; - } - case GX_DRAW_LINES: - case GX_DRAW_LINE_STRIP: - case GX_DRAW_POINTS: - FATAL("unimplemented prim type: {}", opcode); - break; - } - } - vertRange = push_verts(vtxBuf.data(), vtxBuf.size()); - idxRange = push_indices(idxBuf.data(), idxBuf.size()); - sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs, fmt); - } + const auto result = aurora::gfx::gx::process_display_list(dlStart, dlSize); gx::BindGroupRanges ranges{}; for (int i = 0; i < GX_VA_MAX_ATTR; ++i) { - if (!indexedAttrs[i]) { + if (gx::g_gxState.vtxDesc[i] != GX_INDEX8 && gx::g_gxState.vtxDesc[i] != GX_INDEX16) { continue; } auto& array = gx::g_gxState.arrays[i]; @@ -309,18 +31,18 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { } model::PipelineConfig config{}; - populate_pipeline_config(config, GX_TRIANGLES, fmt); + populate_pipeline_config(config, GX_TRIANGLES, result.fmt); const auto info = gx::build_shader_info(config.shaderConfig); const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges); const auto pipeline = pipeline_ref(config); push_draw_command(model::DrawData{ .pipeline = pipeline, - .vertRange = vertRange, - .idxRange = idxRange, + .vertRange = result.vertRange, + .idxRange = result.idxRange, .dataRanges = ranges, .uniformRange = build_uniform(info), - .indexCount = numIndices, + .indexCount = result.numIndices, .bindGroups = bindGroups, .dstAlpha = gx::g_gxState.dstAlpha, }); diff --git a/lib/gfx/shader_info.cpp b/lib/gfx/shader_info.cpp new file mode 100644 index 0000000..f60b21a --- /dev/null +++ b/lib/gfx/shader_info.cpp @@ -0,0 +1,345 @@ +#include "shader_info.hpp" + +namespace aurora::gfx::gx { +namespace { +Module Log("aurora::gfx::gx"); + +void color_arg_reg_info(GXTevColorArg arg, const TevStage& stage, ShaderInfo& info) { + switch (arg) { + case GX_CC_CPREV: + case GX_CC_APREV: + if (!info.writesTevReg.test(GX_TEVPREV)) { + info.loadsTevReg.set(GX_TEVPREV); + } + break; + case GX_CC_C0: + case GX_CC_A0: + if (!info.writesTevReg.test(GX_TEVREG0)) { + info.loadsTevReg.set(GX_TEVREG0); + } + break; + case GX_CC_C1: + case GX_CC_A1: + if (!info.writesTevReg.test(GX_TEVREG1)) { + info.loadsTevReg.set(GX_TEVREG1); + } + break; + case GX_CC_C2: + case GX_CC_A2: + if (!info.writesTevReg.test(GX_TEVREG2)) { + info.loadsTevReg.set(GX_TEVREG2); + } + break; + case GX_CC_TEXC: + case GX_CC_TEXA: + CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound"); + CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound"); + info.sampledTexCoords.set(stage.texCoordId); + info.sampledTextures.set(stage.texMapId); + break; + case GX_CC_RASC: + case GX_CC_RASA: + if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) { + info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0); + } + break; + case GX_CC_KONST: + switch (stage.kcSel) { + case GX_TEV_KCSEL_K0: + case GX_TEV_KCSEL_K0_R: + case GX_TEV_KCSEL_K0_G: + case GX_TEV_KCSEL_K0_B: + case GX_TEV_KCSEL_K0_A: + info.sampledKColors.set(0); + break; + case GX_TEV_KCSEL_K1: + case GX_TEV_KCSEL_K1_R: + case GX_TEV_KCSEL_K1_G: + case GX_TEV_KCSEL_K1_B: + case GX_TEV_KCSEL_K1_A: + info.sampledKColors.set(1); + break; + case GX_TEV_KCSEL_K2: + case GX_TEV_KCSEL_K2_R: + case GX_TEV_KCSEL_K2_G: + case GX_TEV_KCSEL_K2_B: + case GX_TEV_KCSEL_K2_A: + info.sampledKColors.set(2); + break; + case GX_TEV_KCSEL_K3: + case GX_TEV_KCSEL_K3_R: + case GX_TEV_KCSEL_K3_G: + case GX_TEV_KCSEL_K3_B: + case GX_TEV_KCSEL_K3_A: + info.sampledKColors.set(3); + break; + default: + break; + } + break; + default: + break; + } +} + +void alpha_arg_reg_info(GXTevAlphaArg arg, const TevStage& stage, ShaderInfo& info) { + switch (arg) { + case GX_CA_APREV: + if (!info.writesTevReg.test(GX_TEVPREV)) { + info.loadsTevReg.set(GX_TEVPREV); + } + break; + case GX_CA_A0: + if (!info.writesTevReg.test(GX_TEVREG0)) { + info.loadsTevReg.set(GX_TEVREG0); + } + break; + case GX_CA_A1: + if (!info.writesTevReg.test(GX_TEVREG1)) { + info.loadsTevReg.set(GX_TEVREG1); + } + break; + case GX_CA_A2: + if (!info.writesTevReg.test(GX_TEVREG2)) { + info.loadsTevReg.set(GX_TEVREG2); + } + break; + case GX_CA_TEXA: + CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound"); + CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound"); + info.sampledTexCoords.set(stage.texCoordId); + info.sampledTextures.set(stage.texMapId); + break; + case GX_CA_RASA: + if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) { + info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0); + } + break; + case GX_CA_KONST: + switch (stage.kaSel) { + case GX_TEV_KASEL_K0_R: + case GX_TEV_KASEL_K0_G: + case GX_TEV_KASEL_K0_B: + case GX_TEV_KASEL_K0_A: + info.sampledKColors.set(0); + break; + case GX_TEV_KASEL_K1_R: + case GX_TEV_KASEL_K1_G: + case GX_TEV_KASEL_K1_B: + case GX_TEV_KASEL_K1_A: + info.sampledKColors.set(1); + break; + case GX_TEV_KASEL_K2_R: + case GX_TEV_KASEL_K2_G: + case GX_TEV_KASEL_K2_B: + case GX_TEV_KASEL_K2_A: + info.sampledKColors.set(2); + break; + case GX_TEV_KASEL_K3_R: + case GX_TEV_KASEL_K3_G: + case GX_TEV_KASEL_K3_B: + case GX_TEV_KASEL_K3_A: + info.sampledKColors.set(3); + break; + default: + break; + } + break; + default: + break; + } +} +} // namespace + +ShaderInfo build_shader_info(const ShaderConfig& config) noexcept { + ShaderInfo info{ + .uniformSize = sizeof(PnMtx) + sizeof(Mat4x4), // pos_mtx, nrm_mtx, proj + }; + for (int i = 0; i < config.tevStageCount; ++i) { + const auto& stage = config.tevStages[i]; + // Color pass + color_arg_reg_info(stage.colorPass.a, stage, info); + color_arg_reg_info(stage.colorPass.b, stage, info); + color_arg_reg_info(stage.colorPass.c, stage, info); + color_arg_reg_info(stage.colorPass.d, stage, info); + info.writesTevReg.set(stage.colorOp.outReg); + + // Alpha pass + alpha_arg_reg_info(stage.alphaPass.a, stage, info); + alpha_arg_reg_info(stage.alphaPass.b, stage, info); + alpha_arg_reg_info(stage.alphaPass.c, stage, info); + alpha_arg_reg_info(stage.alphaPass.d, stage, info); + if (!info.writesTevReg.test(stage.alphaOp.outReg)) { + // If we're writing alpha to a register that's not been + // written to in the shader, load from uniform buffer + info.loadsTevReg.set(stage.alphaOp.outReg); + info.writesTevReg.set(stage.alphaOp.outReg); + } + } + info.uniformSize += info.loadsTevReg.count() * sizeof(Vec4); + for (int i = 0; i < info.sampledColorChannels.size(); ++i) { + if (info.sampledColorChannels.test(i)) { + const auto& cc = config.colorChannels[i]; + const auto& cca = config.colorChannels[i + GX_ALPHA0]; + if (cc.lightingEnabled || cca.lightingEnabled) { + info.lightingEnabled = true; + } + } + } + if (info.lightingEnabled) { + // Lights + light state for all channels + info.uniformSize += 16 + sizeof(Light) * GX::MaxLights; + } + for (int i = 0; i < info.sampledColorChannels.size(); ++i) { + if (info.sampledColorChannels.test(i)) { + const auto& cc = config.colorChannels[i]; + if (cc.lightingEnabled && cc.ambSrc == GX_SRC_REG) { + info.uniformSize += sizeof(Vec4); + } + if (cc.matSrc == GX_SRC_REG) { + info.uniformSize += sizeof(Vec4); + } + const auto& cca = config.colorChannels[i + GX_ALPHA0]; + if (cca.lightingEnabled && cca.ambSrc == GX_SRC_REG) { + info.uniformSize += sizeof(Vec4); + } + if (cca.matSrc == GX_SRC_REG) { + info.uniformSize += sizeof(Vec4); + } + } + } + info.uniformSize += info.sampledKColors.count() * sizeof(Vec4); + for (int i = 0; i < info.sampledTexCoords.size(); ++i) { + if (!info.sampledTexCoords.test(i)) { + continue; + } + const auto& tcg = config.tcgs[i]; + if (tcg.mtx != GX_IDENTITY) { + u32 texMtxIdx = (tcg.mtx - GX_TEXMTX0) / 3; + info.usesTexMtx.set(texMtxIdx); + info.texMtxTypes[texMtxIdx] = tcg.type; + } + if (tcg.postMtx != GX_PTIDENTITY) { + u32 postMtxIdx = (tcg.postMtx - GX_PTTEXMTX0) / 3; + info.usesPTTexMtx.set(postMtxIdx); + } + } + for (int i = 0; i < info.usesTexMtx.size(); ++i) { + if (info.usesTexMtx.test(i)) { + switch (info.texMtxTypes[i]) { + case GX_TG_MTX2x4: + info.uniformSize += sizeof(Mat2x4); + break; + case GX_TG_MTX3x4: + info.uniformSize += sizeof(Mat3x4); + break; + default: + break; + } + } + } + info.uniformSize += info.usesPTTexMtx.count() * sizeof(Mat3x4); + if (config.fogType != GX_FOG_NONE) { + info.usesFog = true; + info.uniformSize += sizeof(Fog); + } + info.uniformSize += info.sampledTextures.count() * sizeof(u32); + info.uniformSize = align_uniform(info.uniformSize); + return info; +} + +Range build_uniform(const ShaderInfo& info) noexcept { + auto [buf, range] = map_uniform(info.uniformSize); + { + buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]); + buf.append(g_gxState.proj); + } + for (int i = 0; i < info.loadsTevReg.size(); ++i) { + if (info.loadsTevReg.test(i)) { + buf.append(g_gxState.colorRegs[i]); + } + } + if (info.lightingEnabled) { + // Lights + static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights); + buf.append(g_gxState.lights); + // Light state for all channels + for (int i = 0; i < 4; ++i) { + buf.append(g_gxState.colorChannelState[i].lightMask.to_ulong()); + } + } + for (int i = 0; i < info.sampledColorChannels.size(); ++i) { + if (!info.sampledColorChannels.test(i)) { + continue; + } + const auto& ccc = g_gxState.colorChannelConfig[i]; + const auto& ccs = g_gxState.colorChannelState[i]; + if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) { + buf.append(ccs.ambColor); + } + if (ccc.matSrc == GX_SRC_REG) { + buf.append(ccs.matColor); + } + const auto& ccca = g_gxState.colorChannelConfig[i + GX_ALPHA0]; + const auto& ccsa = g_gxState.colorChannelState[i + GX_ALPHA0]; + if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) { + buf.append(ccsa.ambColor); + } + if (ccca.matSrc == GX_SRC_REG) { + buf.append(ccsa.matColor); + } + } + for (int i = 0; i < info.sampledKColors.size(); ++i) { + if (info.sampledKColors.test(i)) { + buf.append(g_gxState.kcolors[i]); + } + } + for (int i = 0; i < info.usesTexMtx.size(); ++i) { + if (!info.usesTexMtx.test(i)) { + continue; + } + switch (info.texMtxTypes[i]) { + DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i])); + case GX_TG_MTX2x4: + if (std::holds_alternative>(g_gxState.texMtxs[i])) { + buf.append(std::get>(g_gxState.texMtxs[i])); + } else + UNLIKELY FATAL("expected 2x4 mtx in idx {}", i); + break; + case GX_TG_MTX3x4: + if (std::holds_alternative>(g_gxState.texMtxs[i])) { + buf.append(std::get>(g_gxState.texMtxs[i])); + } else + UNLIKELY FATAL("expected 3x4 mtx in idx {}", i); + break; + } + } + for (int i = 0; i < info.usesPTTexMtx.size(); ++i) { + if (info.usesPTTexMtx.test(i)) { + buf.append(g_gxState.ptTexMtxs[i]); + } + } + if (info.usesFog) { + const auto& state = g_gxState.fog; + Fog fog{.color = state.color}; + if (state.nearZ != state.farZ && state.startZ != state.endZ) { + const float depthRange = state.farZ - state.nearZ; + const float fogRange = state.endZ - state.startZ; + fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange); + fog.b = state.farZ / depthRange; + fog.c = state.startZ / fogRange; + } + buf.append(fog); + } + for (int i = 0; i < info.sampledTextures.size(); ++i) { + if (!info.sampledTextures.test(i)) { + continue; + } + const auto& tex = get_texture(static_cast(i)); + CHECK(tex, "unbound texture {}", i); + buf.append(tex.texObj.lodBias); + } + g_gxState.stateDirty = false; + return range; +} +} // namespace aurora::gfx::gx diff --git a/lib/gfx/shader_info.hpp b/lib/gfx/shader_info.hpp new file mode 100644 index 0000000..a68673f --- /dev/null +++ b/lib/gfx/shader_info.hpp @@ -0,0 +1,8 @@ +#pragma once + +#include "gx.hpp" + +namespace aurora::gfx::gx { +ShaderInfo build_shader_info(const ShaderConfig& config) noexcept; +Range build_uniform(const ShaderInfo& info) noexcept; +}; // namespace aurora::gfx::gx diff --git a/lib/webgpu/gpu.cpp b/lib/webgpu/gpu.cpp index 9293e68..66b118d 100644 --- a/lib/webgpu/gpu.cpp +++ b/lib/webgpu/gpu.cpp @@ -384,7 +384,7 @@ bool initialize(AuroraBackend auroraBackend) { wgpu::Limits supportedLimits{}; g_adapter.GetLimits(&supportedLimits); const wgpu::Limits requiredLimits{ - // Use "best" supported alignments + // Use "best" supported limits .maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 ? WGPU_LIMIT_U32_UNDEFINED : supportedLimits.maxTextureDimension1D, .maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 ? WGPU_LIMIT_U32_UNDEFINED @@ -393,18 +393,12 @@ bool initialize(AuroraBackend auroraBackend) { : supportedLimits.maxTextureDimension3D, .maxTextureArrayLayers = supportedLimits.maxTextureArrayLayers == 0 ? WGPU_LIMIT_U32_UNDEFINED : supportedLimits.maxTextureArrayLayers, - .maxBindGroupsPlusVertexBuffers = supportedLimits.maxBindGroupsPlusVertexBuffers == 0 - ? WGPU_LIMIT_U32_UNDEFINED - : supportedLimits.maxBindGroupsPlusVertexBuffers, - .maxBindingsPerBindGroup = supportedLimits.maxBindGroupsPlusVertexBuffers == 0 - ? WGPU_LIMIT_U32_UNDEFINED - : supportedLimits.maxBindGroupsPlusVertexBuffers, - .maxDynamicUniformBuffersPerPipelineLayout = supportedLimits.maxDynamicUniformBuffersPerPipelineLayout == 0 - ? WGPU_LIMIT_U32_UNDEFINED - : supportedLimits.maxDynamicUniformBuffersPerPipelineLayout, .maxDynamicStorageBuffersPerPipelineLayout = supportedLimits.maxDynamicStorageBuffersPerPipelineLayout == 0 ? WGPU_LIMIT_U32_UNDEFINED : supportedLimits.maxDynamicStorageBuffersPerPipelineLayout, + .maxStorageBuffersPerShaderStage = supportedLimits.maxStorageBuffersPerShaderStage == 0 + ? WGPU_LIMIT_U32_UNDEFINED + : supportedLimits.maxStorageBuffersPerShaderStage, .minUniformBufferOffsetAlignment = supportedLimits.minUniformBufferOffsetAlignment == 0 ? WGPU_LIMIT_U32_UNDEFINED : supportedLimits.minUniformBufferOffsetAlignment, @@ -413,11 +407,19 @@ bool initialize(AuroraBackend auroraBackend) { : supportedLimits.minStorageBufferOffsetAlignment, }; Log.info( - "Using limits\n maxTextureDimension1D: {}\n maxTextureDimension2D: {}\n maxTextureDimension3D: {}\n " - "minUniformBufferOffsetAlignment: {}\n minStorageBufferOffsetAlignment: {}", + "Using limits:" + "\n maxTextureDimension1D: {}" + "\n maxTextureDimension2D: {}" + "\n maxTextureDimension3D: {}" + "\n maxTextureArrayLayers: {}" + "\n maxDynamicStorageBuffersPerPipelineLayout: {}" + "\n maxStorageBuffersPerShaderStage: {}" + "\n minUniformBufferOffsetAlignment: {}" + "\n minStorageBufferOffsetAlignment: {}", requiredLimits.maxTextureDimension1D, requiredLimits.maxTextureDimension2D, - requiredLimits.maxTextureDimension3D, requiredLimits.minUniformBufferOffsetAlignment, - requiredLimits.minStorageBufferOffsetAlignment); + requiredLimits.maxTextureDimension3D, requiredLimits.maxTextureArrayLayers, + requiredLimits.maxDynamicStorageBuffersPerPipelineLayout, requiredLimits.maxStorageBuffersPerShaderStage, + requiredLimits.minUniformBufferOffsetAlignment, requiredLimits.minStorageBufferOffsetAlignment); std::vector requiredFeatures; wgpu::SupportedFeatures supportedFeatures; g_adapter.GetFeatures(&supportedFeatures); @@ -432,6 +434,9 @@ bool initialize(AuroraBackend auroraBackend) { /* clang-format off */ #if _WIN32 "use_dxc", +#ifdef NDEBUG + "emit_hlsl_debug_symbols", +#endif #endif #ifdef NDEBUG "skip_validation", @@ -453,9 +458,7 @@ bool initialize(AuroraBackend auroraBackend) { #endif .requiredFeatureCount = requiredFeatures.size(), .requiredFeatures = requiredFeatures.data(), -#ifdef WEBGPU_DAWN .requiredLimits = &requiredLimits, -#endif }); deviceDescriptor.SetUncapturedErrorCallback( [](const wgpu::Device& device, wgpu::ErrorType type, wgpu::StringView message) {