diff --git a/lib/dolphin/GXLighting.cpp b/lib/dolphin/GXLighting.cpp index 1fa2219..d361d0c 100644 --- a/lib/dolphin/GXLighting.cpp +++ b/lib/dolphin/GXLighting.cpp @@ -134,9 +134,9 @@ void GXLoadLightObjImm(GXLightObj* light_, GXLightID id) { auto* light = reinterpret_cast(light_); realLight.pos = {light->px, light->py, light->pz}; realLight.dir = {light->nx, light->ny, light->nz}; + realLight.color = from_gx_color(light->color); realLight.cosAtt = {light->a0, light->a1, light->a2}; realLight.distAtt = {light->k0, light->k1, light->k2}; - realLight.color = from_gx_color(light->color); update_gx_state(g_gxState.lights[idx], realLight); } diff --git a/lib/dolphin/GXTexture.cpp b/lib/dolphin/GXTexture.cpp index 784a2a4..b00e90c 100644 --- a/lib/dolphin/GXTexture.cpp +++ b/lib/dolphin/GXTexture.cpp @@ -109,7 +109,7 @@ void GXLoadTexObj(GXTexObj* obj_, GXTexMapID id) { obj->dataInvalidated = false; } g_gxState.textures[id] = {*obj}; - // TODO stateDirty? + g_gxState.stateDirty = true; // TODO only if changed? } u32 GXGetTexBufferSize(u16 width, u16 height, u32 fmt, GXBool mips, u8 maxLod) { diff --git a/lib/dolphin/GXVert.cpp b/lib/dolphin/GXVert.cpp index 52d610d..80b0a0e 100644 --- a/lib/dolphin/GXVert.cpp +++ b/lib/dolphin/GXVert.cpp @@ -21,13 +21,15 @@ static inline GXAttr next_attr(size_t begin) { struct SStreamState { GXPrimitive primitive; u16 vertexCount = 0; + u16 vertexStart = 0; aurora::ByteBuffer vertexBuffer; std::vector indices; #ifndef NDEBUG GXAttr nextAttr; #endif - explicit SStreamState(GXPrimitive primitive, u16 numVerts, u16 vertexSize) noexcept : primitive(primitive) { + explicit SStreamState(GXPrimitive primitive, u16 numVerts, u16 vertexSize, u16 vertexStart) noexcept + : primitive(primitive), vertexStart(vertexStart) { vertexBuffer.reserve_extra(size_t(numVerts) * vertexSize); if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) { indices.reserve((u32(numVerts) - 3) * 3 + 3); @@ -43,6 +45,7 @@ struct SStreamState { }; static std::optional sStreamState; +static u16 lastVertexStart = 0; void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) { #ifndef NDEBUG @@ -73,7 +76,7 @@ void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) { Log.report(LOG_FATAL, FMT_STRING("no vtx attributes enabled?")); unreachable(); } - sStreamState.emplace(primitive, nVerts, vertexSize); + sStreamState.emplace(primitive, nVerts, vertexSize, g_gxState.stateDirty ? 0 : lastVertexStart); } static inline void check_attr_order(GXAttr attr) noexcept { @@ -96,26 +99,27 @@ void GXPosition3f32(float x, float y, float z) { state.vertexBuffer.append(&x, sizeof(float)); state.vertexBuffer.append(&y, sizeof(float)); state.vertexBuffer.append(&z, sizeof(float)); + auto curVertex = state.vertexStart + state.vertexCount; if (state.primitive == GX_TRIANGLES || state.vertexCount < 3) { // pass } else if (state.primitive == GX_TRIANGLEFAN) { - state.indices.push_back(0); - state.indices.push_back(state.vertexCount - 1); + state.indices.push_back(state.vertexStart); + state.indices.push_back(curVertex - 1); } else if (state.primitive == GX_TRIANGLESTRIP) { if ((state.vertexCount & 1) == 0) { - state.indices.push_back(state.vertexCount - 2); - state.indices.push_back(state.vertexCount - 1); + state.indices.push_back(curVertex - 2); + state.indices.push_back(curVertex - 1); } else { - state.indices.push_back(state.vertexCount - 1); - state.indices.push_back(state.vertexCount - 2); + state.indices.push_back(curVertex - 1); + state.indices.push_back(curVertex - 2); } } else if (state.primitive == GX_QUADS) { if ((state.vertexCount & 3) == 3) { - state.indices.push_back(state.vertexCount - 3); - state.indices.push_back(state.vertexCount - 1); + state.indices.push_back(curVertex - 3); + state.indices.push_back(curVertex - 1); } } - state.indices.push_back(state.vertexCount); + state.indices.push_back(curVertex); ++state.vertexCount; } @@ -171,18 +175,27 @@ void GXEnd() { } const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size()); const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices}); - aurora::gfx::stream::PipelineConfig config{}; - populate_pipeline_config(config, GX_TRIANGLES); - const auto info = aurora::gfx::gx::build_shader_info(config.shaderConfig); - const auto pipeline = aurora::gfx::pipeline_ref(config); - aurora::gfx::push_draw_command(aurora::gfx::stream::DrawData{ - .pipeline = pipeline, - .vertRange = vertRange, - .uniformRange = build_uniform(info), - .indexRange = indexRange, - .indexCount = static_cast(sStreamState->indices.size()), - .bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, {}), - .dstAlpha = g_gxState.dstAlpha, - }); + if (g_gxState.stateDirty) { + aurora::gfx::stream::PipelineConfig config{}; + populate_pipeline_config(config, GX_TRIANGLES); + const auto info = aurora::gfx::gx::build_shader_info(config.shaderConfig); + const auto pipeline = aurora::gfx::pipeline_ref(config); + aurora::gfx::push_draw_command(aurora::gfx::stream::DrawData{ + .pipeline = pipeline, + .vertRange = vertRange, + .uniformRange = build_uniform(info), + .indexRange = indexRange, + .indexCount = static_cast(sStreamState->indices.size()), + .bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, {}), + .dstAlpha = g_gxState.dstAlpha, + }); + } else { + aurora::gfx::merge_draw_command(aurora::gfx::stream::DrawData{ + .vertRange = vertRange, + .indexRange = indexRange, + .indexCount = static_cast(sStreamState->indices.size()), + }); + } + lastVertexStart = sStreamState->vertexStart + sStreamState->vertexCount; sStreamState.reset(); } diff --git a/lib/gfx/common.cpp b/lib/gfx/common.cpp index ad9cb78..a448f46 100644 --- a/lib/gfx/common.cpp +++ b/lib/gfx/common.cpp @@ -122,19 +122,25 @@ static ByteBuffer g_verts; static ByteBuffer g_uniforms; static ByteBuffer g_indices; static ByteBuffer g_storage; -static ByteBuffer g_staticStorage; static ByteBuffer g_textureUpload; WGPUBuffer g_vertexBuffer; WGPUBuffer g_uniformBuffer; WGPUBuffer g_indexBuffer; WGPUBuffer g_storageBuffer; -size_t g_staticStorageLastSize = 0; static std::array g_stagingBuffers; static WGPUSupportedLimits g_cachedLimits; static ShaderState g_state; static PipelineRef g_currentPipeline; +// for imgui debug +size_t g_drawCallCount; +size_t g_mergedDrawCallCount; +size_t g_lastVertSize; +size_t g_lastUniformSize; +size_t g_lastIndexSize; +size_t g_lastStorageSize; + using CommandList = std::vector; struct ClipRect { int32_t x; @@ -217,8 +223,25 @@ static inline void push_command(CommandType type, const Command::Data& data) { .data = data, }); } +static inline Command& get_last_draw_command(ShaderType type) { + if (g_currentRenderPass == UINT32_MAX) { + Log.report(LOG_FATAL, FMT_STRING("No last command")); + unreachable(); + } + auto& last = g_renderPasses[g_currentRenderPass].commands.back(); + if (last.type != CommandType::Draw || last.data.draw.type != type) { + Log.report(LOG_FATAL, FMT_STRING("Last command invalid: {} {}, expected {} {}"), magic_enum::enum_name(last.type), + magic_enum::enum_name(last.data.draw.type), magic_enum::enum_name(CommandType::Draw), + magic_enum::enum_name(type)); + unreachable(); + } + return last; +} -static void push_draw_command(ShaderDrawCommand data) { push_command(CommandType::Draw, Command::Data{.draw = data}); } +static void push_draw_command(ShaderDrawCommand data) { + push_command(CommandType::Draw, Command::Data{.draw = data}); + ++g_drawCallCount; +} static Command::Data::SetViewportCommand g_cachedViewport; void set_viewport(float left, float top, float width, float height, float znear, float zfar) noexcept { @@ -272,6 +295,22 @@ void push_draw_command(stream::DrawData data) { push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data}); } template <> +void merge_draw_command(stream::DrawData data) { + auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream; + if (last.vertRange.offset + last.vertRange.size != data.vertRange.offset) { + Log.report(LOG_FATAL, FMT_STRING("Invalid merge range: {} -> {}"), last.vertRange.offset + last.vertRange.size, + data.vertRange.offset); + } + if (last.indexRange.offset + last.indexRange.size != data.indexRange.offset) { + Log.report(LOG_FATAL, FMT_STRING("Invalid merge range: {} -> {}"), last.indexRange.offset + last.indexRange.size, + data.indexRange.offset); + } + last.vertRange.size += data.vertRange.size; + last.indexRange.size += data.indexRange.size; + last.indexCount += data.indexCount; + ++g_mergedDrawCallCount; +} +template <> PipelineRef pipeline_ref(stream::PipelineConfig config) { return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); }); } @@ -515,6 +554,9 @@ void begin_frame() { mapBuffer(g_storage, StorageBufferSize); mapBuffer(g_textureUpload, TextureUploadSize); + g_drawCallCount = 0; + g_mergedDrawCallCount = 0; + g_renderPasses.emplace_back(); g_renderPasses[0].clearColor = gx::g_gxState.clearColor; g_currentRenderPass = 0; @@ -522,19 +564,13 @@ void begin_frame() { // push_command(CommandType::SetScissor, Command::Data{.setScissor = g_cachedScissor}); } -// for imgui debug -size_t g_lastVertSize; -size_t g_lastUniformSize; -size_t g_lastIndexSize; -size_t g_lastStorageSize; - void end_frame(WGPUCommandEncoder cmd) { uint64_t bufferOffset = 0; const auto writeBuffer = [&](ByteBuffer& buf, WGPUBuffer& out, uint64_t size, std::string_view label) { const auto writeSize = buf.size(); // Only need to copy this many bytes if (writeSize > 0) { wgpuCommandEncoderCopyBufferToBuffer(cmd, g_stagingBuffers[currentStagingBuffer], bufferOffset, out, 0, - writeSize); + ALIGN(writeSize, 4)); buf.clear(); } bufferOffset += size; @@ -736,8 +772,8 @@ static inline Range map(ByteBuffer& target, size_t length, size_t alignment) { target.append_zeroes(length + padding); return {static_cast(begin), static_cast(length + padding)}; } -Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 4); } -Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 4); } +Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 0); } +Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 0); } Range push_uniform(const uint8_t* data, size_t length) { return push(g_uniforms, data, length, g_cachedLimits.limits.minUniformBufferOffsetAlignment); } diff --git a/lib/gfx/common.hpp b/lib/gfx/common.hpp index 732db6c..1a7bbd2 100644 --- a/lib/gfx/common.hpp +++ b/lib/gfx/common.hpp @@ -127,7 +127,6 @@ extern WGPUBuffer g_vertexBuffer; extern WGPUBuffer g_uniformBuffer; extern WGPUBuffer g_indexBuffer; extern WGPUBuffer g_storageBuffer; -extern size_t g_staticStorageLastSize; using BindGroupRef = HashType; using PipelineRef = HashType; @@ -188,6 +187,8 @@ template const State& get_state(); template void push_draw_command(DrawData data); +template +void merge_draw_command(DrawData data); template PipelineRef pipeline_ref(PipelineConfig config); diff --git a/lib/gfx/gx.cpp b/lib/gfx/gx.cpp index 1a12e6a..9e4f0e4 100644 --- a/lib/gfx/gx.cpp +++ b/lib/gfx/gx.cpp @@ -473,6 +473,7 @@ Range build_uniform(const ShaderInfo& info) noexcept { } buf.append(&tex.texObj.lodBias, 4); } + g_gxState.stateDirty = false; return range; }