#include "shader.hpp" #include "../../gpu.hpp" #include "../common.hpp" #include #include namespace aurora::gfx::model { static logvisor::Module Log("aurora::gfx::model"); static const std::vector* vtxData; static const std::vector* nrmData; static const std::vector>* tex0TcData; static const std::vector>* tcData; static std::optional cachedVtxRange; static std::optional cachedNrmRange; static std::optional cachedPackedTcRange; static std::optional cachedTcRange; static inline void read_vert(ByteBuffer& out, const u8* data) noexcept { size_t offset = 0; for (const auto& type : gx::g_gxState.vtxDesc) { if (type == GX::INDEX8) { const auto v = static_cast(data[offset]); // expand to s16 out.append(&v, 2); ++offset; } else if (type == GX::INDEX16) { const s16 v = metaforce::SBig(*reinterpret_cast(data + offset)); out.append(&v, 2); offset += 2; } } constexpr size_t align = 4; // Sint16x2 if (offset % align != 0) { out.append_zeroes(align - (offset % align)); } } static absl::flat_hash_map> sCachedDisplayLists; void queue_surface(const u8* dlStart, u32 dlSize) noexcept { const auto hash = xxh3_hash_s(dlStart, dlSize, 0); Range vertRange, idxRange; u32 numIndices = 0; auto it = sCachedDisplayLists.find(hash); if (it != sCachedDisplayLists.end()) { const auto& [verts, indices] = it->second; numIndices = indices.size() / 2; vertRange = push_verts(verts.data(), verts.size()); idxRange = push_indices(indices.data(), indices.size()); } else { ByteBuffer vtxBuf; ByteBuffer idxBuf; u8 inVtxSize = 0; u8 outVtxSize = 0; for (const auto& type : gx::g_gxState.vtxDesc) { if (type == GX::NONE || type == GX::DIRECT) { continue; } if (type == GX::INDEX8) { ++inVtxSize; outVtxSize += 2; } else if (type == GX::INDEX16) { inVtxSize += 2; outVtxSize += 2; } else { Log.report(logvisor::Fatal, FMT_STRING("unexpected vtx type {}"), type); unreachable(); } } outVtxSize = ALIGN(outVtxSize, 4); u16 vtxStart = 0; size_t offset = 0; while (offset < dlSize - 6) { const auto header = dlStart[offset]; const auto primitive = static_cast(header & 0xF8); const auto dlVtxCount = metaforce::SBig(*reinterpret_cast(dlStart + offset + 1)); offset += 3; if (primitive == 0) { break; } if (primitive != GX::TRIANGLES && primitive != GX::TRIANGLESTRIP && primitive != GX::TRIANGLEFAN) { Log.report(logvisor::Fatal, FMT_STRING("queue_surface: unsupported primitive type {}"), primitive); unreachable(); } vtxBuf.reserve_extra(dlVtxCount * outVtxSize); if (dlVtxCount > 3 && (primitive == GX::TRIANGLEFAN || primitive == GX::TRIANGLESTRIP)) { idxBuf.reserve_extra(((u32(dlVtxCount) - 3) * 3 + 3) * 2); } else { idxBuf.reserve_extra(dlVtxCount * 2); } u16 curVert = vtxStart; for (u16 v = 0; v < dlVtxCount; ++v) { read_vert(vtxBuf, dlStart + offset); offset += inVtxSize; if (primitive == GX::TRIANGLES || v < 3) { idxBuf.append(&curVert, 2); ++numIndices; } else if (primitive == GX::TRIANGLEFAN) { const std::array idxs{ vtxStart, u16(curVert - 1), curVert, }; idxBuf.append(idxs.data(), 6); numIndices += 3; } else if (primitive == GX::TRIANGLESTRIP) { if ((v & 1) == 0) { const std::array idxs{ u16(curVert - 2), u16(curVert - 1), curVert, }; idxBuf.append(idxs.data(), 6); } else { const std::array idxs{ u16(curVert - 1), u16(curVert - 2), curVert, }; idxBuf.append(idxs.data(), 6); } numIndices += 3; } ++curVert; } vtxStart += dlVtxCount; } vertRange = push_verts(vtxBuf.data(), vtxBuf.size()); idxRange = push_indices(idxBuf.data(), idxBuf.size()); sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf)); } Range sVtxRange, sNrmRange, sTcRange, sPackedTcRange; if (cachedVtxRange) { sVtxRange = *cachedVtxRange; } else { sVtxRange = push_storage(reinterpret_cast(vtxData->data()), vtxData->size() * 16); cachedVtxRange = sVtxRange; } if (cachedNrmRange) { sNrmRange = *cachedNrmRange; } else { sNrmRange = push_storage(reinterpret_cast(nrmData->data()), nrmData->size() * 16); cachedNrmRange = sNrmRange; } if (cachedTcRange) { sTcRange = *cachedTcRange; } else { sTcRange = push_storage(reinterpret_cast(tcData->data()), tcData->size() * 8); cachedTcRange = sTcRange; } if (cachedPackedTcRange) { sPackedTcRange = *cachedPackedTcRange; } else if (tcData == tex0TcData) { sPackedTcRange = sTcRange; } else { sPackedTcRange = push_storage(reinterpret_cast(tex0TcData->data()), tex0TcData->size() * 8); cachedPackedTcRange = sPackedTcRange; } model::PipelineConfig config{}; populate_pipeline_config(config, GX::TRIANGLES); const auto info = gx::build_shader_info(config.shaderConfig); const gx::BindGroupRanges ranges{ .vtxDataRange = sVtxRange, .nrmDataRange = sNrmRange, .tcDataRange = sTcRange, .packedTcDataRange = sPackedTcRange, }; const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges); const auto pipeline = pipeline_ref(config); push_draw_command(model::DrawData{ .pipeline = pipeline, .vertRange = vertRange, .idxRange = idxRange, .dataRanges = ranges, .uniformRange = build_uniform(info), .indexCount = numIndices, .bindGroups = bindGroups, .dstAlpha = gx::g_gxState.dstAlpha, }); } State construct_state() { return {}; } wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] PipelineConfig config) { const auto info = build_shader_info(config.shaderConfig); // TODO remove const auto shader = build_shader(config.shaderConfig, info); std::array vtxAttrs{}; auto [num4xAttr, rem] = std::div(config.shaderConfig.indexedAttributeCount, 4); u32 num2xAttr = 0; if (rem > 2) { ++num4xAttr; } else if (rem > 0) { ++num2xAttr; } u32 offset = 0; for (u32 i = 0; i < num4xAttr; ++i) { vtxAttrs[i] = { .format = wgpu::VertexFormat::Sint16x4, .offset = offset, .shaderLocation = i, }; offset += 8; } for (u32 i = 0; i < num2xAttr; ++i) { const u32 idx = num4xAttr + i; vtxAttrs[idx] = { .format = wgpu::VertexFormat::Sint16x2, .offset = offset, .shaderLocation = idx, }; offset += 4; } const std::array vtxBuffers{wgpu::VertexBufferLayout{ .arrayStride = offset, .stepMode = wgpu::VertexStepMode::Vertex, .attributeCount = num4xAttr + num2xAttr, .attributes = vtxAttrs.data(), }}; return build_pipeline(config, info, vtxBuffers, shader, "Model Pipeline"); } void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { if (!bind_pipeline(data.pipeline, pass)) { return; } const std::array offsets{ data.uniformRange.offset, storage_offset(data.dataRanges.vtxDataRange), storage_offset(data.dataRanges.nrmDataRange), storage_offset(data.dataRanges.packedTcDataRange), storage_offset(data.dataRanges.tcDataRange), }; pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data()); if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) { pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup)); pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup)); } pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size); pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint16, data.idxRange.offset, data.idxRange.size); if (data.dstAlpha != UINT32_MAX) { const wgpu::Color color{0.f, 0.f, 0.f, data.dstAlpha / 255.f}; pass.SetBlendConstant(&color); } pass.DrawIndexed(data.indexCount); } } // namespace aurora::gfx::model static absl::flat_hash_map sCachedRanges; template static inline void cache_array(const void* data, Vec*& outPtr, std::optional& outRange, u8 stride) { Vec* vecPtr = static_cast(data); outPtr = vecPtr; outRange.reset(); } void GXSetArray(GX::Attr attr, const void* data, u8 stride) noexcept { using namespace aurora::gfx::model; switch (attr) { case GX::VA_POS: cache_array(data, vtxData, cachedVtxRange, stride); break; case GX::VA_NRM: cache_array(data, nrmData, cachedNrmRange, stride); break; case GX::VA_TEX0: cache_array(data, tex0TcData, cachedPackedTcRange, stride); break; case GX::VA_TEX1: cache_array(data, tcData, cachedTcRange, stride); break; default: Log.report(logvisor::Fatal, FMT_STRING("GXSetArray: invalid attr {}"), attr); unreachable(); } } void GXCallDisplayList(const void* data, u32 nbytes) noexcept { aurora::gfx::model::queue_surface(static_cast(data), nbytes); }