diff --git a/Runtime/Graphics/CCubeModel.cpp b/Runtime/Graphics/CCubeModel.cpp index 29ba2a578..9fee6aff1 100644 --- a/Runtime/Graphics/CCubeModel.cpp +++ b/Runtime/Graphics/CCubeModel.cpp @@ -259,8 +259,8 @@ void CCubeModel::EnableShadowMaps(const CTexture& shadowTex, const zeus::CTransf void CCubeModel::DisableShadowMaps() { sRenderModelShadow = false; } void CCubeModel::SetArraysCurrent() { - CGX::SetArray(GX::VA_POS, x0_modelInstance.GetVertexPointer()); - CGX::SetArray(GX::VA_NRM, x0_modelInstance.GetNormalPointer()); + CGX::SetArray(GX::VA_POS, x0_modelInstance.GetVertexPointer(), true); + CGX::SetArray(GX::VA_NRM, x0_modelInstance.GetNormalPointer(), true); SetStaticArraysCurrent(); } @@ -280,8 +280,8 @@ void CCubeModel::SetRenderModelBlack(bool v) { } void CCubeModel::SetSkinningArraysCurrent(TConstVectorRef positions, TConstVectorRef normals) { - CGX::SetArray(GX::VA_POS, positions); - CGX::SetArray(GX::VA_NRM, normals); + CGX::SetArray(GX::VA_POS, positions, false); + CGX::SetArray(GX::VA_NRM, normals, false); // colors unused SetStaticArraysCurrent(); } @@ -294,21 +294,21 @@ void CCubeModel::SetStaticArraysCurrent() { sUsingPackedLightmaps = false; } if (sUsingPackedLightmaps) { - CGX::SetArray(GX::VA_TEX0, packedTexCoords); + CGX::SetArray(GX::VA_TEX0, packedTexCoords, true); } else { - CGX::SetArray(GX::VA_TEX0, texCoords); + CGX::SetArray(GX::VA_TEX0, texCoords, true); } // TexCoord1 is currently used for all remaining - CGX::SetArray(GX::VA_TEX1, texCoords); + CGX::SetArray(GX::VA_TEX1, texCoords, true); CCubeMaterial::KillCachedViewDepState(); } void CCubeModel::SetUsingPackedLightmaps(bool v) { sUsingPackedLightmaps = v; if (v) { - CGX::SetArray(GX::VA_TEX0, x0_modelInstance.GetPackedTCPointer()); + CGX::SetArray(GX::VA_TEX0, x0_modelInstance.GetPackedTCPointer(), true); } else { - CGX::SetArray(GX::VA_TEX0, x0_modelInstance.GetTCPointer()); + CGX::SetArray(GX::VA_TEX0, x0_modelInstance.GetTCPointer(), true); } } diff --git a/Runtime/Graphics/CGX.hpp b/Runtime/Graphics/CGX.hpp index e904f9262..6e8b39674 100644 --- a/Runtime/Graphics/CGX.hpp +++ b/Runtime/Graphics/CGX.hpp @@ -123,9 +123,9 @@ static inline void SetAlphaCompare(GX::Compare comp0, u8 ref0, GX::AlphaOp op, G } template -static inline void SetArray(GX::Attr attr, const std::vector* data) noexcept { +static inline void SetArray(GX::Attr attr, const std::vector* data, bool isStatic) noexcept { if (data != nullptr && sGXState.x0_arrayPtrs[attr - GX::VA_POS] != data) { - GXSetArray(attr, data, sizeof(T)); + GXSetArray(attr, data, isStatic ? 1 : 0); } } diff --git a/aurora/CMakeLists.txt b/aurora/CMakeLists.txt index 2c54e4625..edee18b5a 100644 --- a/aurora/CMakeLists.txt +++ b/aurora/CMakeLists.txt @@ -19,11 +19,8 @@ add_library(aurora STATIC target_compile_definitions(aurora PRIVATE IMGUI_USER_CONFIG="imconfig_user.h") # IMGUI_USE_WCHAR32 target_include_directories(aurora PUBLIC include ../) target_include_directories(aurora PRIVATE ../imgui ../extern/imgui) -target_include_directories(aurora PRIVATE - ../extern/dawn/src - ../extern/dawn/third_party/abseil-cpp - ${CMAKE_CURRENT_BINARY_DIR}/dawn/gen/src) # for hacks :) -target_link_libraries(aurora PRIVATE dawn_native dawncpp webgpu_dawn zeus logvisor SDL2-static xxhash) +target_link_libraries(aurora PRIVATE dawn_native dawncpp webgpu_dawn zeus logvisor SDL2-static xxhash + absl::btree absl::flat_hash_map) if (APPLE) target_compile_definitions(aurora PRIVATE DAWN_ENABLE_BACKEND_METAL) target_sources(aurora PRIVATE lib/dawn/MetalBinding.mm) diff --git a/aurora/lib/gfx/colored_quad/shader.cpp b/aurora/lib/gfx/colored_quad/shader.cpp index c1db158fe..85ce3e67b 100644 --- a/aurora/lib/gfx/colored_quad/shader.cpp +++ b/aurora/lib/gfx/colored_quad/shader.cpp @@ -302,9 +302,9 @@ void render(const State& state, const DrawData& data, const wgpu::RenderPassEnco return; } - const std::array offsets{data.uniformRange.first}; + const std::array offsets{data.uniformRange.offset}; pass.SetBindGroup(0, state.uniformBindGroup, offsets.size(), offsets.data()); - pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.first, data.vertRange.second); + pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size); pass.Draw(4); } } // namespace aurora::gfx::colored_quad diff --git a/aurora/lib/gfx/common.cpp b/aurora/lib/gfx/common.cpp index ef27e5cdd..9ea9a2452 100644 --- a/aurora/lib/gfx/common.cpp +++ b/aurora/lib/gfx/common.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace aurora::gfx { static logvisor::Module Log("aurora::gfx"); @@ -187,10 +187,10 @@ std::mutex g_pipelineMutex; static std::thread g_pipelineThread; static std::atomic_bool g_pipelineThreadEnd; static std::condition_variable g_pipelineCv; -static std::unordered_map g_pipelines; +static absl::flat_hash_map g_pipelines; static std::deque> g_queuedPipelines; -static std::unordered_map g_cachedBindGroups; -static std::unordered_map g_cachedSamplers; +static absl::flat_hash_map g_cachedBindGroups; +static absl::flat_hash_map g_cachedSamplers; std::atomic_uint32_t queuedPipelines; std::atomic_uint32_t createdPipelines; @@ -198,10 +198,12 @@ static ByteBuffer g_verts; static ByteBuffer g_uniforms; static ByteBuffer g_indices; static ByteBuffer g_storage; +static ByteBuffer g_staticStorage; wgpu::Buffer g_vertexBuffer; wgpu::Buffer g_uniformBuffer; wgpu::Buffer g_indexBuffer; wgpu::Buffer g_storageBuffer; +size_t g_staticStorageLastSize = 0; static ShaderState g_state; static PipelineRef g_currentPipeline; @@ -213,7 +215,7 @@ static PipelineRef find_pipeline(PipelineCreateCommand command, NewPipelineCallb bool found = false; { std::scoped_lock guard{g_pipelineMutex}; - found = g_pipelines.find(hash) != g_pipelines.end(); + found = g_pipelines.contains(hash); if (!found) { const auto ref = std::find_if(g_queuedPipelines.begin(), g_queuedPipelines.end(), [=](auto v) { return v.first == hash; }); @@ -364,11 +366,10 @@ static void pipeline_worker() { // std::this_thread::sleep_for(std::chrono::milliseconds{1500}); { std::scoped_lock lock{g_pipelineMutex}; - if (g_pipelines.contains(cb.first)) { + if (!g_pipelines.try_emplace(cb.first, std::move(result)).second) { Log.report(logvisor::Fatal, FMT_STRING("Duplicate pipeline {}"), cb.first); unreachable(); } - g_pipelines[cb.first] = result; g_queuedPipelines.pop_front(); hasMore = !g_queuedPipelines.empty(); } @@ -384,7 +385,7 @@ void initialize() { const wgpu::BufferDescriptor descriptor{ .label = "Shared Uniform Buffer", .usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst, - .size = 134217728, // 128mb + .size = 5242880, // 5mb }; g_uniformBuffer = g_device.CreateBuffer(&descriptor); } @@ -392,7 +393,7 @@ void initialize() { const wgpu::BufferDescriptor descriptor{ .label = "Shared Vertex Buffer", .usage = wgpu::BufferUsage::Vertex | wgpu::BufferUsage::CopyDst, - .size = 16777216, // 16mb + .size = 5242880, // 5mb }; g_vertexBuffer = g_device.CreateBuffer(&descriptor); } @@ -400,7 +401,7 @@ void initialize() { const wgpu::BufferDescriptor descriptor{ .label = "Shared Index Buffer", .usage = wgpu::BufferUsage::Index | wgpu::BufferUsage::CopyDst, - .size = 4194304, // 4mb + .size = 2097152, // 2mb }; g_indexBuffer = g_device.CreateBuffer(&descriptor); } @@ -439,18 +440,33 @@ void shutdown() { } void render(const wgpu::RenderPassEncoder& pass) { - const auto writeBuffer = [](ByteBuffer& buf, wgpu::Buffer& out) { + const auto writeBuffer = [](ByteBuffer& buf, wgpu::Buffer& out, std::string_view label) { const auto size = buf.size(); + // Log.report(logvisor::Info, FMT_STRING("{} buffer usage: {}"), label, size); if (size > 0) { g_queue.WriteBuffer(out, 0, buf.data(), size); buf.clear(); buf.reserve_extra(size); // Reserve size from previous frame } }; - writeBuffer(g_verts, g_vertexBuffer); - writeBuffer(g_uniforms, g_uniformBuffer); - writeBuffer(g_indices, g_indexBuffer); - writeBuffer(g_storage, g_storageBuffer); + writeBuffer(g_verts, g_vertexBuffer, "Vertex"); + writeBuffer(g_uniforms, g_uniformBuffer, "Uniform"); + writeBuffer(g_indices, g_indexBuffer, "Index"); + { + const auto staticSize = g_staticStorage.size(); + if (staticSize > g_staticStorageLastSize) { + g_queue.WriteBuffer(g_storageBuffer, g_staticStorageLastSize, g_staticStorage.data() + g_staticStorageLastSize, + staticSize - g_staticStorageLastSize); + g_staticStorageLastSize = staticSize; + } + const auto size = g_storage.size(); + if (size > 0) { + g_queue.WriteBuffer(g_storageBuffer, staticSize, g_storage.data(), size); + g_storage.clear(); + g_storage.reserve_extra(size); // Reserve size from previous frame + } + // Log.report(logvisor::Info, FMT_STRING("Static storage: {}, storage: {}"), staticSize, size); + } g_currentPipeline = UINT64_MAX; @@ -498,10 +514,11 @@ bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass) { return true; } std::lock_guard guard{g_pipelineMutex}; - if (!g_pipelines.contains(ref)) { + const auto it = g_pipelines.find(ref); + if (it == g_pipelines.end()) { return false; } - pass.SetPipeline(g_pipelines[ref]); + pass.SetPipeline(it->second); g_currentPipeline = ref; return true; } @@ -522,7 +539,7 @@ static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length, target.append_zeroes(padding); } } - return {begin, begin + length + padding}; + return {static_cast(begin), static_cast(length + padding)}; } static inline Range map(ByteBuffer& target, size_t length, size_t alignment) { size_t padding = 0; @@ -534,7 +551,7 @@ static inline Range map(ByteBuffer& target, size_t length, size_t alignment) { } auto begin = target.size(); target.append_zeroes(length + padding); - return {begin, begin + length + padding}; + return {static_cast(begin), static_cast(length + padding)}; } Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 0 /* TODO? */); } Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 0 /* TODO? */); } @@ -548,48 +565,57 @@ Range push_storage(const uint8_t* data, size_t length) { g_device.GetLimits(&limits); return push(g_storage, data, length, limits.limits.minStorageBufferOffsetAlignment); } +Range push_static_storage(const uint8_t* data, size_t length) { + wgpu::SupportedLimits limits; + g_device.GetLimits(&limits); + auto range = push(g_staticStorage, data, length, limits.limits.minStorageBufferOffsetAlignment); + range.isStatic = true; + return range; +} std::pair map_verts(size_t length) { const auto range = map(g_verts, length, 0 /* TODO? */); - return {ByteBuffer{g_verts.data() + range.first, range.second - range.first}, range}; + return {ByteBuffer{g_verts.data() + range.offset, range.size}, range}; } std::pair map_indices(size_t length) { const auto range = map(g_indices, length, 0 /* TODO? */); - return {ByteBuffer{g_indices.data() + range.first, range.second - range.first}, range}; + return {ByteBuffer{g_indices.data() + range.offset, range.size}, range}; } std::pair map_uniform(size_t length) { wgpu::SupportedLimits limits; g_device.GetLimits(&limits); const auto range = map(g_uniforms, length, limits.limits.minUniformBufferOffsetAlignment); - return {ByteBuffer{g_uniforms.data() + range.first, range.second - range.first}, range}; + return {ByteBuffer{g_uniforms.data() + range.offset, range.size}, range}; } std::pair map_storage(size_t length) { wgpu::SupportedLimits limits; g_device.GetLimits(&limits); const auto range = map(g_storage, length, limits.limits.minStorageBufferOffsetAlignment); - return {ByteBuffer{g_storage.data() + range.first, range.second - range.first}, range}; + return {ByteBuffer{g_storage.data() + range.offset, range.size}, range}; } BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) { const auto id = xxh3_hash(descriptor); if (!g_cachedBindGroups.contains(id)) { - g_cachedBindGroups[id] = g_device.CreateBindGroup(&descriptor); + g_cachedBindGroups.try_emplace(id, g_device.CreateBindGroup(&descriptor)); } return id; } const wgpu::BindGroup& find_bind_group(BindGroupRef id) { - if (!g_cachedBindGroups.contains(id)) { + const auto it = g_cachedBindGroups.find(id); + if (it == g_cachedBindGroups.end()) { Log.report(logvisor::Fatal, FMT_STRING("get_bind_group: failed to locate {}"), id); unreachable(); } - return g_cachedBindGroups[id]; + return it->second; } const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) { const auto id = xxh3_hash(descriptor); - if (!g_cachedSamplers.contains(id)) { - g_cachedSamplers[id] = g_device.CreateSampler(&descriptor); + auto it = g_cachedSamplers.find(id); + if (it == g_cachedSamplers.end()) { + it = g_cachedSamplers.try_emplace(id, g_device.CreateSampler(&descriptor)).first; } - return g_cachedSamplers[id]; + return it->second; } uint32_t align_uniform(uint32_t value) { diff --git a/aurora/lib/gfx/common.hpp b/aurora/lib/gfx/common.hpp index 8f12aee3c..f43f05b72 100644 --- a/aurora/lib/gfx/common.hpp +++ b/aurora/lib/gfx/common.hpp @@ -126,6 +126,7 @@ extern wgpu::Buffer g_vertexBuffer; extern wgpu::Buffer g_uniformBuffer; extern wgpu::Buffer g_indexBuffer; extern wgpu::Buffer g_storageBuffer; +extern size_t g_staticStorageLastSize; struct TextureRef { wgpu::Texture texture; @@ -149,7 +150,14 @@ using BindGroupRef = uint64_t; using PipelineRef = uint64_t; using SamplerRef = uint64_t; using ShaderRef = uint64_t; -using Range = std::pair; +struct Range { + uint32_t offset; + uint32_t size; + bool isStatic; +}; +static inline uint32_t storage_offset(Range range) { + return range.isStatic ? range.offset : range.offset + g_staticStorageLastSize; +} enum class ShaderType { Aabb, @@ -182,9 +190,22 @@ static inline Range push_uniform(const T& data) { } Range push_storage(const uint8_t* data, size_t length); template +static inline Range push_storage(ArrayRef data) { + return push_storage(reinterpret_cast(data.data()), data.size() * sizeof(T)); +} +template static inline Range push_storage(const T& data) { return push_storage(reinterpret_cast(&data), sizeof(T)); } +Range push_static_storage(const uint8_t* data, size_t length); +template +static inline Range push_static_storage(ArrayRef data) { + return push_static_storage(reinterpret_cast(data.data()), data.size() * sizeof(T)); +} +template +static inline Range push_static_storage(const T& data) { + return push_static_storage(reinterpret_cast(&data), sizeof(T)); +} std::pair map_verts(size_t length); std::pair map_indices(size_t length); std::pair map_uniform(size_t length); diff --git a/aurora/lib/gfx/gx.cpp b/aurora/lib/gfx/gx.cpp index 02e0819a8..427b51791 100644 --- a/aurora/lib/gfx/gx.cpp +++ b/aurora/lib/gfx/gx.cpp @@ -3,7 +3,7 @@ #include "../gpu.hpp" #include "common.hpp" -#include +#include using aurora::gfx::gx::g_gxState; static logvisor::Module Log("aurora::gx"); @@ -537,8 +537,8 @@ Range build_uniform(const ShaderInfo& info) noexcept { return range; } -static std::unordered_map sUniformBindGroupLayouts; -static std::unordered_map> sTextureBindGroupLayouts; +static absl::flat_hash_map sUniformBindGroupLayouts; +static absl::flat_hash_map> sTextureBindGroupLayouts; GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config, const BindGroupRanges& ranges) noexcept { @@ -555,25 +555,25 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi wgpu::BindGroupEntry{ .binding = 1, .buffer = g_storageBuffer, - .size = ranges.vtxDataRange.second - ranges.vtxDataRange.first, + .size = ranges.vtxDataRange.size, }, // Normals wgpu::BindGroupEntry{ .binding = 2, .buffer = g_storageBuffer, - .size = ranges.nrmDataRange.second - ranges.nrmDataRange.first, + .size = ranges.nrmDataRange.size, }, // UVs wgpu::BindGroupEntry{ .binding = 3, .buffer = g_storageBuffer, - .size = ranges.tcDataRange.second - ranges.tcDataRange.first, + .size = ranges.tcDataRange.size, }, // Packed UVs wgpu::BindGroupEntry{ .binding = 4, .buffer = g_storageBuffer, - .size = ranges.packedTcDataRange.second - ranges.packedTcDataRange.first, + .size = ranges.packedTcDataRange.size, }, }; std::array samplerEntries; @@ -622,8 +622,9 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept { GXBindGroupLayouts out; u32 uniformSizeKey = info.uniformSize + (config.denormalizedVertexAttributes ? 0 : 1); - if (sUniformBindGroupLayouts.contains(uniformSizeKey)) { - out.uniformLayout = sUniformBindGroupLayouts[uniformSizeKey]; + const auto uniformIt = sUniformBindGroupLayouts.find(uniformSizeKey); + if (uniformIt != sUniformBindGroupLayouts.end()) { + out.uniformLayout = uniformIt->second; } else { const std::array uniformLayoutEntries{ wgpu::BindGroupLayoutEntry{ @@ -683,8 +684,9 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader } u32 textureCount = info.sampledTextures.count(); - if (sTextureBindGroupLayouts.contains(textureCount)) { - const auto& [sl, tl] = sTextureBindGroupLayouts[textureCount]; + const auto textureIt = sTextureBindGroupLayouts.find(textureCount); + if (textureIt != sTextureBindGroupLayouts.end()) { + const auto& [sl, tl] = textureIt->second; out.samplerLayout = sl; out.textureLayout = tl; } else { @@ -728,7 +730,7 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader } // TODO this is awkward -extern std::unordered_map> g_gxCachedShaders; +extern absl::flat_hash_map> g_gxCachedShaders; void shutdown() noexcept { // TODO we should probably store this all in g_state.gx instead sUniformBindGroupLayouts.clear(); diff --git a/aurora/lib/gfx/gx.hpp b/aurora/lib/gfx/gx.hpp index 39d9dc0e7..a69bde456 100644 --- a/aurora/lib/gfx/gx.hpp +++ b/aurora/lib/gfx/gx.hpp @@ -20,6 +20,7 @@ struct TevPass { Arg b = Default; Arg c = Default; Arg d = Default; + bool operator==(const TevPass&) const = default; }; struct TevOp { GX::TevOp op = GX::TevOp::TEV_ADD; @@ -27,6 +28,7 @@ struct TevOp { GX::TevScale scale = GX::TevScale::CS_SCALE_1; GX::TevRegID outReg = GX::TevRegID::TEVPREV; bool clamp = true; + bool operator==(const TevOp&) const = default; }; struct TevStage { TevPass colorPass; @@ -38,6 +40,7 @@ struct TevStage { GX::TexCoordID texCoordId = GX::TEXCOORD_NULL; GX::TexMapID texMapId = GX::TEXMAP_NULL; GX::ChannelID channelId = GX::COLOR_NULL; + bool operator==(const TevStage&) const = default; }; struct TextureBind { aurora::gfx::TextureHandle handle; @@ -56,6 +59,7 @@ struct ColorChannelConfig { GX::ColorSrc matSrc = GX::SRC_REG; GX::ColorSrc ambSrc = GX::SRC_REG; bool lightingEnabled = false; + bool operator==(const ColorChannelConfig&) const = default; }; // For uniform generation struct ColorChannelState { @@ -72,6 +76,7 @@ struct TcgConfig { GX::TexMtx mtx = GX::IDENTITY; GX::PTTexMtx postMtx = GX::PTIDENTITY; bool normalize = false; + bool operator==(const TcgConfig&) const = default; }; struct FogState { GX::FogType type = GX::FOG_NONE; @@ -129,6 +134,7 @@ struct ShaderConfig { std::optional alphaDiscard; bool denormalizedVertexAttributes = false; bool denormalizedHasNrm = false; // TODO this is a hack + bool operator==(const ShaderConfig&) const = default; }; struct PipelineConfig { ShaderConfig shaderConfig; diff --git a/aurora/lib/gfx/gx_shader.cpp b/aurora/lib/gfx/gx_shader.cpp index aa7e45372..7ba1d9223 100644 --- a/aurora/lib/gfx/gx_shader.cpp +++ b/aurora/lib/gfx/gx_shader.cpp @@ -3,14 +3,17 @@ #include "../gpu.hpp" #include "gx.hpp" -#include +#include namespace aurora::gfx::gx { using namespace fmt::literals; static logvisor::Module Log("aurora::gfx::gx"); -std::unordered_map> g_gxCachedShaders; +absl::flat_hash_map> g_gxCachedShaders; +#ifndef NDEBUG +static absl::flat_hash_map g_gxCachedShaderConfigs; +#endif static std::string color_arg_reg(GX::TevColorArg arg, size_t stageIdx, const TevStage& stage, ShaderInfo& info) { switch (arg) { @@ -346,8 +349,15 @@ static std::string in_uv(u32 idx) { std::pair build_shader(const ShaderConfig& config) noexcept { const auto hash = xxh3_hash(config); - if (g_gxCachedShaders.contains(hash)) { - return g_gxCachedShaders[hash]; + const auto it = g_gxCachedShaders.find(hash); + if (it != g_gxCachedShaders.end()) { +#ifndef NDEBUG + if (g_gxCachedShaderConfigs[hash] != config) { + Log.report(logvisor::Fatal, FMT_STRING("Shader collision!")); + unreachable(); + } +#endif + return it->second; } Log.report(logvisor::Info, FMT_STRING("Shader config (hash {:x}):"), hash); @@ -791,6 +801,9 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 {{ info.uniformSize = align_uniform(info.uniformSize); auto pair = std::make_pair(std::move(shader), info); g_gxCachedShaders.emplace(hash, pair); +#ifndef NDEBUG + g_gxCachedShaderConfigs.emplace(hash, config); +#endif return pair; } diff --git a/aurora/lib/gfx/model/shader.cpp b/aurora/lib/gfx/model/shader.cpp index 3559abea5..273a43da1 100644 --- a/aurora/lib/gfx/model/shader.cpp +++ b/aurora/lib/gfx/model/shader.cpp @@ -3,6 +3,7 @@ #include "../../gpu.hpp" #include "../common.hpp" +#include #include #include @@ -53,11 +54,10 @@ static const std::vector* vtxData; static const std::vector* nrmData; static const std::vector>* tex0TcData; static const std::vector>* tcData; - -// void set_vertex_buffer(const std::vector* data) noexcept { vtxData = data; } -// void set_normal_buffer(const std::vector* norm) noexcept { nrmData = norm; } -// void set_tex0_tc_buffer(const std::vector>* tcs) noexcept { tex0TcData = tcs; } -// void set_tc_buffer(const std::vector>* tcs) noexcept { tcData = tcs; } +static std::optional staticVtxRange; +static std::optional staticNrmRange; +static std::optional staticPackedTcRange; +static std::optional staticTcRange; enum class VertexFormat : u8 { F32F32, @@ -110,69 +110,95 @@ static inline std::pair readVert(const u8* data) noexcept { return {out, offset}; } +static absl::flat_hash_map, std::vector>> sCachedDisplayLists; + void queue_surface(const u8* dlStart, u32 dlSize) noexcept { - // Log.report(logvisor::Info, FMT_STRING("DL size {}"), dlSize); - std::vector verts; - std::vector indices; + const auto hash = xxh3_hash(dlStart, dlSize, 0); + Range vertRange, idxRange; + uint32_t numIndices; + auto it = sCachedDisplayLists.find(hash); + if (it != sCachedDisplayLists.end()) { + const auto& [verts, indices] = it->second; + numIndices = indices.size(); + vertRange = push_verts(ArrayRef{verts}); + idxRange = push_indices(ArrayRef{indices}); + } else { + std::vector verts; + std::vector indices; - size_t offset = 0; - while (offset < dlSize - 6) { - const auto header = dlStart[offset]; - const auto primitive = static_cast(header & 0xF8); - const auto vtxFmt = static_cast(header & 0x3); - const auto vtxCount = metaforce::SBig(*reinterpret_cast(dlStart + offset + 1)); - // Log.report(logvisor::Info, FMT_STRING("DL header prim {}, fmt {}, vtx count {}"), primitive, - // magic_enum::enum_name(vtxFmt), vtxCount); - offset += 3; + size_t offset = 0; + while (offset < dlSize - 6) { + const auto header = dlStart[offset]; + const auto primitive = static_cast(header & 0xF8); + const auto vtxFmt = static_cast(header & 0x3); + const auto vtxCount = metaforce::SBig(*reinterpret_cast(dlStart + offset + 1)); + offset += 3; - if (primitive == 0) { - break; - } - if (primitive != GX::TRIANGLES && primitive != GX::TRIANGLESTRIP && primitive != GX::TRIANGLEFAN) { - Log.report(logvisor::Fatal, FMT_STRING("queue_surface: unsupported primitive type {}"), primitive); - unreachable(); - } - - const u32 idxStart = indices.size(); - const u16 vertsStart = verts.size(); - verts.reserve(vertsStart + vtxCount); - if (vtxCount > 3 && (primitive == GX::TRIANGLEFAN || primitive == GX::TRIANGLESTRIP)) { - indices.reserve(idxStart + (u32(vtxCount) - 3) * 3 + 3); - } else { - indices.reserve(idxStart + vtxCount); - } - auto curVert = vertsStart; - for (int v = 0; v < vtxCount; ++v) { - const auto [vert, read] = readVert(dlStart + offset); - verts.push_back(vert); - offset += read; - if (primitive == GX::TRIANGLES || v < 3) { - // pass - } else if (primitive == GX::TRIANGLEFAN) { - indices.push_back(vertsStart); - indices.push_back(curVert - 1); - } else if (primitive == GX::TRIANGLESTRIP) { - if ((v & 1) == 0) { - indices.push_back(curVert - 2); - indices.push_back(curVert - 1); - } else { - indices.push_back(curVert - 1); - indices.push_back(curVert - 2); - } + if (primitive == 0) { + break; + } + if (primitive != GX::TRIANGLES && primitive != GX::TRIANGLESTRIP && primitive != GX::TRIANGLEFAN) { + Log.report(logvisor::Fatal, FMT_STRING("queue_surface: unsupported primitive type {}"), primitive); + unreachable(); + } + + const u32 idxStart = indices.size(); + const u16 vertsStart = verts.size(); + verts.reserve(vertsStart + vtxCount); + if (vtxCount > 3 && (primitive == GX::TRIANGLEFAN || primitive == GX::TRIANGLESTRIP)) { + indices.reserve(idxStart + (u32(vtxCount) - 3) * 3 + 3); + } else { + indices.reserve(idxStart + vtxCount); + } + auto curVert = vertsStart; + for (int v = 0; v < vtxCount; ++v) { + const auto [vert, read] = readVert(dlStart + offset); + verts.push_back(vert); + offset += read; + if (primitive == GX::TRIANGLES || v < 3) { + // pass + } else if (primitive == GX::TRIANGLEFAN) { + indices.push_back(vertsStart); + indices.push_back(curVert - 1); + } else if (primitive == GX::TRIANGLESTRIP) { + if ((v & 1) == 0) { + indices.push_back(curVert - 2); + indices.push_back(curVert - 1); + } else { + indices.push_back(curVert - 1); + indices.push_back(curVert - 2); + } + } + indices.push_back(curVert); + ++curVert; } - indices.push_back(curVert); - ++curVert; } + + numIndices = indices.size(); + vertRange = push_verts(ArrayRef{verts}); + idxRange = push_indices(ArrayRef{indices}); + sCachedDisplayLists.try_emplace(hash, std::move(verts), std::move(indices)); } - // Log.report(logvisor::Info, FMT_STRING("Read {} verts, {} indices"), verts.size(), indices.size()); - const auto vertRange = push_verts(ArrayRef{verts}); - const auto idxRange = push_indices(ArrayRef{indices}); - const auto sVtxRange = push_storage(reinterpret_cast(vtxData->data()), vtxData->size() * 16); - const auto sNrmRange = push_storage(reinterpret_cast(nrmData->data()), nrmData->size() * 16); - const auto sTcRange = push_storage(reinterpret_cast(tcData->data()), tcData->size() * 8); - Range sPackedTcRange; - if (tcData == tex0TcData) { + Range sVtxRange, sNrmRange, sTcRange, sPackedTcRange; + if (staticVtxRange) { + sVtxRange = *staticVtxRange; + } else { + sVtxRange = push_storage(reinterpret_cast(vtxData->data()), vtxData->size() * 16); + } + if (staticNrmRange) { + sNrmRange = *staticNrmRange; + } else { + sNrmRange = push_storage(reinterpret_cast(nrmData->data()), nrmData->size() * 16); + } + if (staticTcRange) { + sTcRange = *staticTcRange; + } else { + sTcRange = push_storage(reinterpret_cast(tcData->data()), tcData->size() * 8); + } + if (staticPackedTcRange) { + sPackedTcRange = *staticPackedTcRange; + } else if (tcData == tex0TcData) { sPackedTcRange = sTcRange; } else { sPackedTcRange = push_storage(reinterpret_cast(tex0TcData->data()), tex0TcData->size() * 8); @@ -192,12 +218,9 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { .pipeline = pipeline, .vertRange = vertRange, .idxRange = idxRange, - .sVtxRange = sVtxRange, - .sNrmRange = sNrmRange, - .sTcRange = sTcRange, - .sPackedTcRange = sPackedTcRange, + .dataRanges = ranges, .uniformRange = build_uniform(info), - .indexCount = static_cast(indices.size()), + .indexCount = numIndices, .bindGroups = info.bindGroups, }); } @@ -220,36 +243,60 @@ void render(const State& state, const DrawData& data, const wgpu::RenderPassEnco } const std::array offsets{ - data.uniformRange.first, data.sVtxRange.first, data.sNrmRange.first, - data.sTcRange.first, data.sPackedTcRange.first, + data.uniformRange.offset, + storage_offset(data.dataRanges.vtxDataRange), + storage_offset(data.dataRanges.nrmDataRange), + storage_offset(data.dataRanges.tcDataRange), + storage_offset(data.dataRanges.packedTcDataRange), }; pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data()); if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) { pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup)); pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup)); } - pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.first, data.vertRange.second); - pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint32, data.idxRange.first, data.idxRange.second); + pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size); + pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint32, data.idxRange.offset, data.idxRange.size); pass.DrawIndexed(data.indexCount); } } // namespace aurora::gfx::model +static absl::flat_hash_map sCachedRanges; +template +static inline void cache_array(const void* data, Vec*& outPtr, std::optional& outRange, u8 stride) { + Vec* vecPtr = static_cast(data); + outPtr = vecPtr; + if (stride == 1) { + const auto hash = aurora::xxh3_hash(vecPtr->data(), vecPtr->size() * sizeof(typename Vec::value_type), 0); + const auto it = sCachedRanges.find(hash); + if (it != sCachedRanges.end()) { + outRange = it->second; + } else { + const auto range = aurora::gfx::push_static_storage(aurora::ArrayRef{*vecPtr}); + sCachedRanges.try_emplace(hash, range); + outRange = range; + } + } else { + outRange.reset(); + } +} + void GXSetArray(GX::Attr attr, const void* data, u8 stride) noexcept { + using namespace aurora::gfx::model; switch (attr) { case GX::VA_POS: - aurora::gfx::model::vtxData = static_cast*>(data); + cache_array(data, vtxData, staticVtxRange, stride); break; case GX::VA_NRM: - aurora::gfx::model::nrmData = static_cast*>(data); + cache_array(data, nrmData, staticNrmRange, stride); break; case GX::VA_TEX0: - aurora::gfx::model::tex0TcData = static_cast>*>(data); + cache_array(data, tex0TcData, staticPackedTcRange, stride); break; case GX::VA_TEX1: - aurora::gfx::model::tcData = static_cast>*>(data); + cache_array(data, tcData, staticTcRange, stride); break; default: - aurora::gfx::model::Log.report(logvisor::Fatal, FMT_STRING("GXSetArray: invalid attr {}"), attr); + Log.report(logvisor::Fatal, FMT_STRING("GXSetArray: invalid attr {}"), attr); unreachable(); } } diff --git a/aurora/lib/gfx/model/shader.hpp b/aurora/lib/gfx/model/shader.hpp index a2e83bf87..fb280e91e 100644 --- a/aurora/lib/gfx/model/shader.hpp +++ b/aurora/lib/gfx/model/shader.hpp @@ -3,17 +3,12 @@ #include "../common.hpp" #include "../gx.hpp" -#include - namespace aurora::gfx::model { struct DrawData { PipelineRef pipeline; Range vertRange; Range idxRange; - Range sVtxRange; - Range sNrmRange; - Range sTcRange; - Range sPackedTcRange; + gx::BindGroupRanges dataRanges; Range uniformRange; uint32_t indexCount; gx::GXBindGroups bindGroups; diff --git a/aurora/lib/gfx/movie_player/shader.cpp b/aurora/lib/gfx/movie_player/shader.cpp index 4ed948f71..23c025205 100644 --- a/aurora/lib/gfx/movie_player/shader.cpp +++ b/aurora/lib/gfx/movie_player/shader.cpp @@ -239,7 +239,7 @@ void render(const State& state, const DrawData& data, const wgpu::RenderPassEnco pass.SetBindGroup(0, state.uniformBindGroup); pass.SetBindGroup(1, find_bind_group(data.textureBindGroup)); - pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.first, data.vertRange.second); + pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size); pass.Draw(4); } } // namespace aurora::gfx::movie_player diff --git a/aurora/lib/gfx/stream.cpp b/aurora/lib/gfx/stream.cpp index dbea1f940..46f73f025 100644 --- a/aurora/lib/gfx/stream.cpp +++ b/aurora/lib/gfx/stream.cpp @@ -4,8 +4,6 @@ #include "common.hpp" #include "gx.hpp" -#include - namespace aurora::gfx { static logvisor::Module Log("aurora::gfx::stream"); diff --git a/aurora/lib/gfx/stream/shader.cpp b/aurora/lib/gfx/stream/shader.cpp index 61be8fbf4..a6dbbc82b 100644 --- a/aurora/lib/gfx/stream/shader.cpp +++ b/aurora/lib/gfx/stream/shader.cpp @@ -6,10 +6,6 @@ #include #include -namespace aurora::gfx { -extern std::unordered_map g_gxCachedShaders; -} // namespace aurora::gfx - namespace aurora::gfx::stream { static logvisor::Module Log("aurora::gfx::stream"); @@ -66,60 +62,20 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] Pipeli return build_pipeline(config, info, vertexBuffers, shader, "Stream Pipeline"); } -State construct_state() { - const auto samplerBinding = wgpu::SamplerBindingLayout{ - .type = wgpu::SamplerBindingType::Filtering, - }; - const std::array samplerLayoutEntries{ - wgpu::BindGroupLayoutEntry{ - .binding = 0, - .visibility = wgpu::ShaderStage::Fragment, - .sampler = samplerBinding, - }, - }; - const auto samplerLayoutDescriptor = wgpu::BindGroupLayoutDescriptor{ - .label = "Stream Sampler Bind Group Layout", - .entryCount = samplerLayoutEntries.size(), - .entries = samplerLayoutEntries.data(), - }; - auto samplerLayout = g_device.CreateBindGroupLayout(&samplerLayoutDescriptor); - - const auto textureBinding = wgpu::TextureBindingLayout{ - .sampleType = wgpu::TextureSampleType::Float, - .viewDimension = wgpu::TextureViewDimension::e2D, - }; - const std::array textureLayoutEntries{ - wgpu::BindGroupLayoutEntry{ - .binding = 0, - .visibility = wgpu::ShaderStage::Fragment, - .texture = textureBinding, - }, - }; - const auto textureLayoutDescriptor = wgpu::BindGroupLayoutDescriptor{ - .label = "Stream Texture Bind Group Layout", - .entryCount = textureLayoutEntries.size(), - .entries = textureLayoutEntries.data(), - }; - auto textureLayout = g_device.CreateBindGroupLayout(&textureLayoutDescriptor); - - return { - .samplerLayout = samplerLayout, - .textureLayout = textureLayout, - }; -} +State construct_state() { return {}; } void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { if (!bind_pipeline(data.pipeline, pass)) { return; } - const std::array offsets{data.uniformRange.first}; + const std::array offsets{data.uniformRange.offset}; pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data()); if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) { pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup)); pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup)); } - pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.first, data.vertRange.second); + pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size); pass.Draw(data.vertexCount); } } // namespace aurora::gfx::stream diff --git a/aurora/lib/gfx/stream/shader.hpp b/aurora/lib/gfx/stream/shader.hpp index ba9e7a2a7..7ac66c513 100644 --- a/aurora/lib/gfx/stream/shader.hpp +++ b/aurora/lib/gfx/stream/shader.hpp @@ -3,8 +3,6 @@ #include "../common.hpp" #include "../gx.hpp" -#include - namespace aurora::gfx::stream { struct DrawData { PipelineRef pipeline; @@ -16,19 +14,7 @@ struct DrawData { struct PipelineConfig : public gx::PipelineConfig {}; -struct CachedBindGroup { - wgpu::BindGroupLayout layout; - wgpu::BindGroup bindGroup; - CachedBindGroup(wgpu::BindGroupLayout layout, wgpu::BindGroup&& group) - : layout(std::move(layout)), bindGroup(std::move(group)) {} -}; -struct State { - wgpu::BindGroupLayout samplerLayout; - wgpu::BindGroupLayout textureLayout; - mutable std::unordered_map uniform; - mutable std::unordered_map sampler; - mutable std::unordered_map shaderInfo; -}; +struct State {}; State construct_state(); wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] PipelineConfig config); diff --git a/aurora/lib/gfx/textured_quad/shader.cpp b/aurora/lib/gfx/textured_quad/shader.cpp index cdc24e3c9..30625bfe4 100644 --- a/aurora/lib/gfx/textured_quad/shader.cpp +++ b/aurora/lib/gfx/textured_quad/shader.cpp @@ -380,10 +380,10 @@ void render(const State& state, const DrawData& data, const wgpu::RenderPassEnco return; } - const std::array offsets{data.uniformRange.first}; + const std::array offsets{data.uniformRange.offset}; pass.SetBindGroup(0, state.uniformBindGroup, offsets.size(), offsets.data()); pass.SetBindGroup(1, find_bind_group(data.textureBindGroup)); - pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.first, data.vertRange.second); + pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size); pass.Draw(4); } } // namespace aurora::gfx::textured_quad diff --git a/aurora/lib/input.cpp b/aurora/lib/input.cpp index a55b4d909..106264e92 100644 --- a/aurora/lib/input.cpp +++ b/aurora/lib/input.cpp @@ -2,6 +2,7 @@ #include #include +#include #include namespace aurora::input { @@ -13,7 +14,7 @@ struct GameController { Sint32 m_index = -1; bool m_hasRumble = false; }; -std::unordered_map g_GameControllers; +absl::flat_hash_map g_GameControllers; static std::optional remap_controller_layout(std::string_view mapping) { std::string newMapping; diff --git a/aurora/lib/input.hpp b/aurora/lib/input.hpp index 1a52d0f99..e2fcaf508 100644 --- a/aurora/lib/input.hpp +++ b/aurora/lib/input.hpp @@ -1,11 +1,11 @@ #pragma once -#include #include "aurora/aurora.hpp" #include "SDL_gamecontroller.h" #include "SDL_keyboard.h" #include "SDL_keycode.h" #include "SDL_mouse.h" + namespace aurora::input { Sint32 add_controller(Sint32 which) noexcept; void remove_controller(Uint32 instance) noexcept; @@ -22,4 +22,4 @@ char translate_key(SDL_Keysym sym, SpecialKey& specialSym, ModifierKey& modifier ModifierKey translate_modifiers(Uint16 mods) noexcept; MouseButton translate_mouse_button(Uint8 button) noexcept; MouseButton translate_mouse_button_state(Uint8 state) noexcept; -} // namespace aurora::input \ No newline at end of file +} // namespace aurora::input