diff --git a/Runtime/CMain.cpp b/Runtime/CMain.cpp index 2d5edc62c..971e14fab 100644 --- a/Runtime/CMain.cpp +++ b/Runtime/CMain.cpp @@ -351,8 +351,6 @@ public: } } - OPTICK_FRAME("MainThread"); - // Check if fullscreen has been toggled, if so set the fullscreen cvar accordingly if (m_fullscreenToggleRequested) { m_cvarCommons.m_fullscreen->fromBoolean(!m_cvarCommons.getFullscreen()); diff --git a/aurora/CMakeLists.txt b/aurora/CMakeLists.txt index edee18b5a..37c699f25 100644 --- a/aurora/CMakeLists.txt +++ b/aurora/CMakeLists.txt @@ -19,8 +19,12 @@ add_library(aurora STATIC target_compile_definitions(aurora PRIVATE IMGUI_USER_CONFIG="imconfig_user.h") # IMGUI_USE_WCHAR32 target_include_directories(aurora PUBLIC include ../) target_include_directories(aurora PRIVATE ../imgui ../extern/imgui) +target_include_directories(aurora PRIVATE + ../extern/dawn/src + ../extern/dawn/third_party/abseil-cpp + ${CMAKE_BINARY_DIR}/extern/dawn/gen/src) # for hacks :) target_link_libraries(aurora PRIVATE dawn_native dawncpp webgpu_dawn zeus logvisor SDL2-static xxhash - absl::btree absl::flat_hash_map) + absl::btree absl::flat_hash_map OptickCore) if (APPLE) target_compile_definitions(aurora PRIVATE DAWN_ENABLE_BACKEND_METAL) target_sources(aurora PRIVATE lib/dawn/MetalBinding.mm) diff --git a/aurora/lib/aurora.cpp b/aurora/lib/aurora.cpp index fcaf9f907..e0dffe682 100644 --- a/aurora/lib/aurora.cpp +++ b/aurora/lib/aurora.cpp @@ -280,12 +280,18 @@ void app_run(std::unique_ptr app, Icon icon, int argc, char** argv) g_AppDelegate->onAppWindowResized(size); while (poll_events()) { + OPTICK_FRAME("MainThread"); + imgui::new_frame(g_windowSize); if (!g_AppDelegate->onAppIdle(ImGui::GetIO().DeltaTime)) { break; } - const wgpu::TextureView view = g_swapChain.GetCurrentTextureView(); + wgpu::TextureView view; + { + OPTICK_EVENT("SwapChain GetCurrentTextureView"); + view = g_swapChain.GetCurrentTextureView(); + } gfx::begin_frame(); g_AppDelegate->onAppDraw(); @@ -293,8 +299,12 @@ void app_run(std::unique_ptr app, Icon icon, int argc, char** argv) .label = "Redraw encoder", }; auto encoder = g_device.CreateCommandEncoder(&encoderDescriptor); +#if USE_OPTICK + auto prevContext = gpu::begin_cmdlist(); +#endif gfx::end_frame(encoder); { + OPTICK_EVENT("Main Render Pass"); const std::array attachments{ wgpu::RenderPassColorAttachment{ .view = view, @@ -329,6 +339,7 @@ void app_run(std::unique_ptr app, Icon icon, int argc, char** argv) pass.End(); } { + OPTICK_EVENT("ImGui Render Pass"); const std::array attachments{ wgpu::RenderPassColorAttachment{ .view = view, @@ -345,9 +356,19 @@ void app_run(std::unique_ptr app, Icon icon, int argc, char** argv) imgui::render(pass); pass.End(); } - const auto buffer = encoder.Finish(); - g_queue.Submit(1, &buffer); - g_swapChain.Present(); +#if USE_OPTICK + gpu::end_cmdlist(prevContext); +#endif + { + OPTICK_EVENT("Queue Submit"); + const auto buffer = encoder.Finish(); + g_queue.Submit(1, &buffer); + } + { + OPTICK_GPU_FLIP(gpu::get_native_swapchain()); + OPTICK_CATEGORY("Present", Optick::Category::Wait); + g_swapChain.Present(); + } g_AppDelegate->onAppPostDraw(); diff --git a/aurora/lib/dawn/BackendBinding.cpp b/aurora/lib/dawn/BackendBinding.cpp index 6face6d99..a2d1cf4c0 100644 --- a/aurora/lib/dawn/BackendBinding.cpp +++ b/aurora/lib/dawn/BackendBinding.cpp @@ -8,22 +8,23 @@ namespace aurora::gpu::utils { #if defined(DAWN_ENABLE_BACKEND_D3D12) -BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUDevice device); +BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device); #endif #if defined(DAWN_ENABLE_BACKEND_METAL) -BackendBinding* CreateMetalBinding(SDL_Window* window, WGPUDevice device); +BackendBinding* CreateMetalBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device); #endif #if defined(DAWN_ENABLE_BACKEND_NULL) -BackendBinding* CreateNullBinding(SDL_Window* window, WGPUDevice device); +BackendBinding* CreateNullBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device); #endif #if defined(DAWN_ENABLE_BACKEND_OPENGL) -BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUDevice device); +BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device); #endif #if defined(DAWN_ENABLE_BACKEND_VULKAN) -BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUDevice device); +BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device); #endif -BackendBinding::BackendBinding(SDL_Window* window, WGPUDevice device) : m_window(window), m_device(device) {} +BackendBinding::BackendBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) +: m_window(window), m_adapter(adapter), m_device(device) {} void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu::BackendType type) { if (type == wgpu::BackendType::OpenGL || type == wgpu::BackendType::OpenGLES) { @@ -45,31 +46,31 @@ void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu: } } -BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUDevice device) { +BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) { switch (type) { #if defined(DAWN_ENABLE_BACKEND_D3D12) case wgpu::BackendType::D3D12: - return CreateD3D12Binding(window, device); + return CreateD3D12Binding(window, adapter, device); #endif #if defined(DAWN_ENABLE_BACKEND_METAL) case wgpu::BackendType::Metal: - return CreateMetalBinding(window, device); + return CreateMetalBinding(window, adapter, device); #endif #if defined(DAWN_ENABLE_BACKEND_NULL) case wgpu::BackendType::Null: - return CreateNullBinding(window, device); + return CreateNullBinding(window, adapter, device); #endif #if defined(DAWN_ENABLE_BACKEND_DESKTOP_GL) case wgpu::BackendType::OpenGL: - return CreateOpenGLBinding(window, device); + return CreateOpenGLBinding(window, adapter, device); #endif #if defined(DAWN_ENABLE_BACKEND_OPENGLES) case wgpu::BackendType::OpenGLES: - return CreateOpenGLBinding(window, device); + return CreateOpenGLBinding(window, adapter, device); #endif #if defined(DAWN_ENABLE_BACKEND_VULKAN) case wgpu::BackendType::Vulkan: - return CreateVulkanBinding(window, device); + return CreateVulkanBinding(window, adapter, device); #endif default: return nullptr; diff --git a/aurora/lib/dawn/BackendBinding.hpp b/aurora/lib/dawn/BackendBinding.hpp index bfcae76b3..6ec46ba1b 100644 --- a/aurora/lib/dawn/BackendBinding.hpp +++ b/aurora/lib/dawn/BackendBinding.hpp @@ -2,6 +2,7 @@ #include #include +#include struct SDL_Window; @@ -13,15 +14,20 @@ public: virtual uint64_t GetSwapChainImplementation() = 0; virtual WGPUTextureFormat GetPreferredSwapChainTextureFormat() = 0; +#if USE_OPTICK + virtual void* GetNativeSwapChain() { return nullptr; }; + virtual Optick::GPUContext OptickSetGpuContext() { return {}; }; +#endif protected: - BackendBinding(SDL_Window* window, WGPUDevice device); + BackendBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device); SDL_Window* m_window = nullptr; + WGPUAdapter m_adapter = nullptr; WGPUDevice m_device = nullptr; }; void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu::BackendType type); -BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUDevice device); +BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUAdapter adapter, WGPUDevice device); } // namespace aurora::gpu::utils diff --git a/aurora/lib/dawn/D3D12Binding.cpp b/aurora/lib/dawn/D3D12Binding.cpp index bc347e8f4..00e1a4c13 100644 --- a/aurora/lib/dawn/D3D12Binding.cpp +++ b/aurora/lib/dawn/D3D12Binding.cpp @@ -2,11 +2,21 @@ #include #include +#include + +#if USE_OPTICK +// Internal headers +#include +#include +#define private public +#include +#undef private +#endif namespace aurora::gpu::utils { class D3D12Binding : public BackendBinding { public: - D3D12Binding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {} + D3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {} uint64_t GetSwapChainImplementation() override { if (m_swapChainImpl.userData == nullptr) { @@ -22,6 +32,19 @@ public: return dawn::native::d3d12::GetNativeSwapChainPreferredFormat(&m_swapChainImpl); } +#if USE_OPTICK + void* GetNativeSwapChain() override { + auto* impl = static_cast(m_swapChainImpl.userData); + return impl->mSwapChain.Get(); + } + + Optick::GPUContext OptickSetGpuContext() override { + auto* device = dawn::native::d3d12::ToBackend(static_cast(static_cast(m_device))); + auto* commandList = device->GetPendingCommandContext().AcquireSuccess()->GetCommandList(); + return Optick::SetGpuContext({commandList}); + } +#endif + private: DawnSwapChainImplementation m_swapChainImpl{}; @@ -30,8 +53,16 @@ private: SDL_VERSION(&wmInfo.version); SDL_GetWindowWMInfo(m_window, &wmInfo); m_swapChainImpl = dawn::native::d3d12::CreateNativeSwapChainImpl(m_device, wmInfo.info.win.window); +#if USE_OPTICK + auto* device = dawn::native::d3d12::ToBackend(static_cast(static_cast(m_device))); + auto* d3d12Device = device->GetD3D12Device(); + auto* d3d12CommandQueue = device->GetCommandQueue().Get(); + OPTICK_GPU_INIT_D3D12(d3d12Device, &d3d12CommandQueue, 1); +#endif } }; -BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUDevice device) { return new D3D12Binding(window, device); } +BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) { + return new D3D12Binding(window, adapter, device); +} } // namespace aurora::gpu::utils diff --git a/aurora/lib/dawn/MetalBinding.mm b/aurora/lib/dawn/MetalBinding.mm index e48f3be14..e3b974437 100644 --- a/aurora/lib/dawn/MetalBinding.mm +++ b/aurora/lib/dawn/MetalBinding.mm @@ -89,7 +89,7 @@ private: class MetalBinding : public BackendBinding { public: - MetalBinding(SDL_Window *window, WGPUDevice device) : BackendBinding(window, device) {} + MetalBinding(SDL_Window *window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {} uint64_t GetSwapChainImplementation() override { if (m_swapChainImpl.userData == nullptr) { @@ -104,5 +104,7 @@ private: DawnSwapChainImplementation m_swapChainImpl{}; }; -BackendBinding *CreateMetalBinding(SDL_Window *window, WGPUDevice device) { return new MetalBinding(window, device); } +BackendBinding *CreateMetalBinding(SDL_Window *window, WGPUAdapter adapter, WGPUDevice device) { + return new MetalBinding(window, adapter, device); +} } // namespace aurora::gpu::utils diff --git a/aurora/lib/dawn/OpenGLBinding.cpp b/aurora/lib/dawn/OpenGLBinding.cpp index 41c379b15..b49e56b5e 100644 --- a/aurora/lib/dawn/OpenGLBinding.cpp +++ b/aurora/lib/dawn/OpenGLBinding.cpp @@ -6,7 +6,7 @@ namespace aurora::gpu::utils { class OpenGLBinding : public BackendBinding { public: - OpenGLBinding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {} + OpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {} uint64_t GetSwapChainImplementation() override { if (m_swapChainImpl.userData == nullptr) { @@ -31,5 +31,7 @@ private: } }; -BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUDevice device) { return new OpenGLBinding(window, device); } +BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) { + return new OpenGLBinding(window, adapter, device); +} } // namespace aurora::gpu::utils diff --git a/aurora/lib/dawn/VulkanBinding.cpp b/aurora/lib/dawn/VulkanBinding.cpp index 065ba7734..374879d75 100644 --- a/aurora/lib/dawn/VulkanBinding.cpp +++ b/aurora/lib/dawn/VulkanBinding.cpp @@ -1,13 +1,24 @@ #include "BackendBinding.hpp" -#include #include +#include + +#if USE_OPTICK +// Internal headers +#include +#include +#define private public +#include +#undef private +#endif + +#include #include namespace aurora::gpu::utils { class VulkanBinding : public BackendBinding { public: - VulkanBinding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {} + VulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {} uint64_t GetSwapChainImplementation() override { if (m_swapChainImpl.userData == nullptr) { @@ -23,6 +34,19 @@ public: return dawn::native::vulkan::GetNativeSwapChainPreferredFormat(&m_swapChainImpl); } +#if USE_OPTICK + void* GetNativeSwapChain() override { + auto* impl = static_cast(m_swapChainImpl.userData); + return impl->mSwapChain; + } + + Optick::GPUContext OptickSetGpuContext() override { + auto* device = + dawn::native::vulkan::ToBackend(static_cast(static_cast(m_device))); + return Optick::SetGpuContext({device->GetPendingRecordingContext()->commandBuffer}); + } +#endif + private: DawnSwapChainImplementation m_swapChainImpl{}; @@ -32,8 +56,21 @@ private: assert(false); } m_swapChainImpl = dawn::native::vulkan::CreateNativeSwapChainImpl(m_device, surface); +#if USE_OPTICK + auto* adapter = + dawn::native::vulkan::ToBackend(static_cast(static_cast(m_adapter))); + auto* device = + dawn::native::vulkan::ToBackend(static_cast(static_cast(m_device))); + VkDevice vkDevice = device->GetVkDevice(); + VkPhysicalDevice vkPhysicalDevice = adapter->GetPhysicalDevice(); + VkQueue vkQueue = device->GetQueue(); + uint32_t queueFamily = device->GetGraphicsQueueFamily(); + OPTICK_GPU_INIT_VULKAN(&vkDevice, &vkPhysicalDevice, &vkQueue, &queueFamily, 1, nullptr); +#endif } }; -BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUDevice device) { return new VulkanBinding(window, device); } +BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) { + return new VulkanBinding(window, adapter, device); +} } // namespace aurora::gpu::utils diff --git a/aurora/lib/gfx/colored_quad/shader.cpp b/aurora/lib/gfx/colored_quad/shader.cpp index 85ce3e67b..9fdf12b68 100644 --- a/aurora/lib/gfx/colored_quad/shader.cpp +++ b/aurora/lib/gfx/colored_quad/shader.cpp @@ -298,6 +298,7 @@ DrawData make_draw_data_verts(const State& state, CameraFilterType filter_type, } void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { + OPTICK_EVENT(); if (!bind_pipeline(data.pipeline, pass)) { return; } diff --git a/aurora/lib/gfx/common.cpp b/aurora/lib/gfx/common.cpp index 00bb40e02..e785b518a 100644 --- a/aurora/lib/gfx/common.cpp +++ b/aurora/lib/gfx/common.cpp @@ -84,107 +84,44 @@ struct Command { namespace aurora { template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::colored_quad::PipelineConfig& input) { - XXH3_64bits_update(&state, &input.filterType, sizeof(gfx::colored_quad::PipelineConfig::filterType)); - XXH3_64bits_update(&state, &input.zComparison, sizeof(gfx::colored_quad::PipelineConfig::zComparison)); - XXH3_64bits_update(&state, &input.zTest, sizeof(gfx::colored_quad::PipelineConfig::zTest)); -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::textured_quad::PipelineConfig& input) { - XXH3_64bits_update(&state, &input.filterType, sizeof(gfx::textured_quad::PipelineConfig::filterType)); - XXH3_64bits_update(&state, &input.zComparison, sizeof(gfx::textured_quad::PipelineConfig::zComparison)); - XXH3_64bits_update(&state, &input.zTest, sizeof(gfx::textured_quad::PipelineConfig::zTest)); -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::movie_player::PipelineConfig& input) { - // no-op -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::gx::PipelineConfig& input) { - xxh3_update(state, input.shaderConfig); - XXH3_64bits_update(&state, &input.primitive, sizeof(gfx::gx::PipelineConfig::primitive)); - XXH3_64bits_update(&state, &input.depthFunc, sizeof(gfx::gx::PipelineConfig::depthFunc)); - XXH3_64bits_update(&state, &input.cullMode, sizeof(gfx::gx::PipelineConfig::cullMode)); - XXH3_64bits_update(&state, &input.blendMode, sizeof(gfx::gx::PipelineConfig::blendMode)); - XXH3_64bits_update(&state, &input.blendFacSrc, sizeof(gfx::gx::PipelineConfig::blendFacSrc)); - XXH3_64bits_update(&state, &input.blendFacDst, sizeof(gfx::gx::PipelineConfig::blendFacDst)); - XXH3_64bits_update(&state, &input.blendOp, sizeof(gfx::gx::PipelineConfig::blendOp)); - if (input.dstAlpha) { - XXH3_64bits_update(&state, &*input.dstAlpha, sizeof(float)); - } - XXH3_64bits_update(&state, &input.depthCompare, sizeof(gfx::gx::PipelineConfig::depthCompare)); - XXH3_64bits_update(&state, &input.depthUpdate, sizeof(gfx::gx::PipelineConfig::depthUpdate)); - XXH3_64bits_update(&state, &input.alphaUpdate, sizeof(gfx::gx::PipelineConfig::alphaUpdate)); -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::stream::PipelineConfig& input) { - xxh3_update(state, input); -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::model::PipelineConfig& input) { - xxh3_update(state, input); -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::PipelineCreateCommand& input) { - XXH3_64bits_update(&state, &input.type, sizeof(gfx::PipelineCreateCommand::type)); +inline XXH64_hash_t xxh3_hash(const gfx::PipelineCreateCommand& input, XXH64_hash_t seed) { + constexpr auto typeSize = sizeof(gfx::PipelineCreateCommand::type); switch (input.type) { case gfx::ShaderType::Aabb: // TODO break; case gfx::ShaderType::ColoredQuad: - xxh3_update(state, input.coloredQuad); - break; + return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::coloredQuad), seed); case gfx::ShaderType::TexturedQuad: - xxh3_update(state, input.texturedQuad); - break; + return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::texturedQuad), seed); case gfx::ShaderType::MoviePlayer: - xxh3_update(state, input.moviePlayer); - break; + return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::moviePlayer), seed); case gfx::ShaderType::Stream: - xxh3_update(state, input.stream); - break; + return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::stream), seed); case gfx::ShaderType::Model: - xxh3_update(state, input.model); - break; + return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::model), seed); } + return 0; } template <> -inline void xxh3_update(XXH3_state_t& state, const wgpu::BindGroupEntry& input) { - XXH3_64bits_update(&state, &input.binding, sizeof(wgpu::BindGroupEntry::binding)); - XXH3_64bits_update(&state, &input.buffer, sizeof(wgpu::BindGroupEntry::buffer)); - XXH3_64bits_update(&state, &input.offset, sizeof(wgpu::BindGroupEntry::offset)); - if (input.buffer != nullptr) { - XXH3_64bits_update(&state, &input.size, sizeof(wgpu::BindGroupEntry::size)); - } - XXH3_64bits_update(&state, &input.sampler, sizeof(wgpu::BindGroupEntry::sampler)); - XXH3_64bits_update(&state, &input.textureView, sizeof(wgpu::BindGroupEntry::textureView)); +inline XXH64_hash_t xxh3_hash(const wgpu::BindGroupDescriptor& input, XXH64_hash_t seed) { + constexpr auto offset = sizeof(void*) * 2; // skip nextInChain, label + const auto hash = xxh3_hash_s(reinterpret_cast(&input) + offset, + sizeof(wgpu::BindGroupDescriptor) - offset - sizeof(void*) /* skip entries */, seed); + // // TODO ensure size is zeroed elsewhere + // for (int i = 0; i < input.entryCount; ++i) { + // const wgpu::BindGroupEntry& entry = input.entries[i]; + // if (!entry.buffer && entry.size != 0) { + // gfx::Log.report(logvisor::Fatal, FMT_STRING("Size != 0! {}"), entry.size); + // } + // } + return xxh3_hash_s(input.entries, sizeof(wgpu::BindGroupEntry) * input.entryCount, hash); } template <> -inline void xxh3_update(XXH3_state_t& state, const wgpu::BindGroupDescriptor& input) { - if (input.label != nullptr) { - XXH3_64bits_update(&state, input.label, strlen(input.label)); - } - XXH3_64bits_update(&state, &input.layout, sizeof(wgpu::BindGroupDescriptor::layout)); - XXH3_64bits_update(&state, &input.entryCount, sizeof(wgpu::BindGroupDescriptor::entryCount)); - for (int i = 0; i < input.entryCount; ++i) { - xxh3_update(state, input.entries[i]); - } -} -template <> -inline void xxh3_update(XXH3_state_t& state, const wgpu::SamplerDescriptor& input) { - if (input.label != nullptr) { - XXH3_64bits_update(&state, input.label, strlen(input.label)); - } - XXH3_64bits_update(&state, &input.addressModeU, sizeof(wgpu::SamplerDescriptor::addressModeU)); - XXH3_64bits_update(&state, &input.addressModeV, sizeof(wgpu::SamplerDescriptor::addressModeV)); - XXH3_64bits_update(&state, &input.addressModeW, sizeof(wgpu::SamplerDescriptor::addressModeW)); - XXH3_64bits_update(&state, &input.magFilter, sizeof(wgpu::SamplerDescriptor::magFilter)); - XXH3_64bits_update(&state, &input.minFilter, sizeof(wgpu::SamplerDescriptor::minFilter)); - XXH3_64bits_update(&state, &input.mipmapFilter, sizeof(wgpu::SamplerDescriptor::mipmapFilter)); - XXH3_64bits_update(&state, &input.lodMinClamp, sizeof(wgpu::SamplerDescriptor::lodMinClamp)); - XXH3_64bits_update(&state, &input.lodMaxClamp, sizeof(wgpu::SamplerDescriptor::lodMaxClamp)); - XXH3_64bits_update(&state, &input.compare, sizeof(wgpu::SamplerDescriptor::compare)); - XXH3_64bits_update(&state, &input.maxAnisotropy, sizeof(wgpu::SamplerDescriptor::maxAnisotropy)); +inline XXH64_hash_t xxh3_hash(const wgpu::SamplerDescriptor& input, XXH64_hash_t seed) { + constexpr auto offset = sizeof(void*) * 2; // skip nextInChain, label + return xxh3_hash_s(reinterpret_cast(&input) + offset, + sizeof(wgpu::SamplerDescriptor) - offset - 2 /* skip padding */, seed); } } // namespace aurora @@ -219,6 +156,7 @@ static PipelineRef g_currentPipeline; static std::vector g_commands; static PipelineRef find_pipeline(PipelineCreateCommand command, NewPipelineCallback&& cb) { + OPTICK_EVENT(); const auto hash = xxh3_hash(command); bool found = false; { @@ -357,6 +295,7 @@ PipelineRef pipeline_ref(model::PipelineConfig config) { } static void pipeline_worker() { + OPTICK_THREAD("Pipeline Worker"); bool hasMore = false; while (true) { std::pair cb; @@ -387,6 +326,8 @@ static void pipeline_worker() { } void initialize() { + gx::initialize(); + g_pipelineThread = std::thread(pipeline_worker); const auto createBuffer = [](wgpu::Buffer& out, wgpu::BufferUsage usage, uint64_t size, const char* label) { @@ -440,6 +381,7 @@ void shutdown() { static size_t currentStagingBuffer = 0; static bool bufferMapped = false; void map_staging_buffer() { + OPTICK_EVENT(); bufferMapped = false; g_stagingBuffers[currentStagingBuffer].MapAsync( wgpu::MapMode::Write, 0, StagingBufferSize, @@ -456,9 +398,13 @@ void map_staging_buffer() { } void begin_frame() { - while (!bufferMapped) { - g_device.Tick(); + { + OPTICK_EVENT("Wait for buffer mapping"); + while (!bufferMapped) { + g_device.Tick(); + } } + OPTICK_EVENT(); size_t bufferOffset = 0; auto& stagingBuf = g_stagingBuffers[currentStagingBuffer]; const auto mapBuffer = [&](ByteBuffer& buf, uint64_t size) { @@ -472,6 +418,7 @@ void begin_frame() { } void end_frame(const wgpu::CommandEncoder& cmd) { + OPTICK_EVENT(); uint64_t bufferOffset = 0; const auto writeBuffer = [&](ByteBuffer& buf, wgpu::Buffer& out, uint64_t size, std::string_view label) { const auto writeSize = buf.size(); // Only need to copy this many bytes @@ -491,6 +438,7 @@ void end_frame(const wgpu::CommandEncoder& cmd) { } void render(const wgpu::RenderPassEncoder& pass) { + OPTICK_EVENT(); g_currentPipeline = UINT64_MAX; for (const auto& cmd : g_commands) { @@ -533,6 +481,7 @@ void render(const wgpu::RenderPassEncoder& pass) { } bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass) { + OPTICK_EVENT(); if (ref == g_currentPipeline) { return true; } @@ -547,6 +496,7 @@ bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass) { } static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length, size_t alignment) { + OPTICK_EVENT(); size_t padding = 0; if (alignment != 0) { padding = alignment - length % alignment; @@ -565,6 +515,7 @@ static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length, return {static_cast(begin), static_cast(length + padding)}; } static inline Range map(ByteBuffer& target, size_t length, size_t alignment) { + OPTICK_EVENT(); size_t padding = 0; if (alignment != 0) { padding = alignment - length % alignment; @@ -576,19 +527,28 @@ static inline Range map(ByteBuffer& target, size_t length, size_t alignment) { target.append_zeroes(length + padding); return {static_cast(begin), static_cast(length + padding)}; } -Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 0 /* TODO? */); } -Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 0 /* TODO? */); } +Range push_verts(const uint8_t* data, size_t length) { + OPTICK_EVENT(); + return push(g_verts, data, length, 0 /* TODO? */); +} +Range push_indices(const uint8_t* data, size_t length) { + OPTICK_EVENT(); + return push(g_indices, data, length, 0 /* TODO? */); +} Range push_uniform(const uint8_t* data, size_t length) { + OPTICK_EVENT(); wgpu::SupportedLimits limits; g_device.GetLimits(&limits); return push(g_uniforms, data, length, limits.limits.minUniformBufferOffsetAlignment); } Range push_storage(const uint8_t* data, size_t length) { + OPTICK_EVENT(); wgpu::SupportedLimits limits; g_device.GetLimits(&limits); return push(g_storage, data, length, limits.limits.minStorageBufferOffsetAlignment); } Range push_static_storage(const uint8_t* data, size_t length) { + OPTICK_EVENT(); wgpu::SupportedLimits limits; g_device.GetLimits(&limits); auto range = push(g_staticStorage, data, length, limits.limits.minStorageBufferOffsetAlignment); @@ -596,20 +556,24 @@ Range push_static_storage(const uint8_t* data, size_t length) { return range; } std::pair map_verts(size_t length) { + OPTICK_EVENT(); const auto range = map(g_verts, length, 0 /* TODO? */); return {ByteBuffer{g_verts.data() + range.offset, range.size}, range}; } std::pair map_indices(size_t length) { + OPTICK_EVENT(); const auto range = map(g_indices, length, 0 /* TODO? */); return {ByteBuffer{g_indices.data() + range.offset, range.size}, range}; } std::pair map_uniform(size_t length) { + OPTICK_EVENT(); wgpu::SupportedLimits limits; g_device.GetLimits(&limits); const auto range = map(g_uniforms, length, limits.limits.minUniformBufferOffsetAlignment); return {ByteBuffer{g_uniforms.data() + range.offset, range.size}, range}; } std::pair map_storage(size_t length) { + OPTICK_EVENT(); wgpu::SupportedLimits limits; g_device.GetLimits(&limits); const auto range = map(g_storage, length, limits.limits.minStorageBufferOffsetAlignment); @@ -617,6 +581,7 @@ std::pair map_storage(size_t length) { } BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) { + OPTICK_EVENT(); const auto id = xxh3_hash(descriptor); if (!g_cachedBindGroups.contains(id)) { g_cachedBindGroups.try_emplace(id, g_device.CreateBindGroup(&descriptor)); @@ -624,6 +589,7 @@ BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) { return id; } const wgpu::BindGroup& find_bind_group(BindGroupRef id) { + OPTICK_EVENT(); const auto it = g_cachedBindGroups.find(id); if (it == g_cachedBindGroups.end()) { Log.report(logvisor::Fatal, FMT_STRING("get_bind_group: failed to locate {}"), id); @@ -633,6 +599,7 @@ const wgpu::BindGroup& find_bind_group(BindGroupRef id) { } const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) { + OPTICK_EVENT(); const auto id = xxh3_hash(descriptor); auto it = g_cachedSamplers.find(id); if (it == g_cachedSamplers.end()) { @@ -642,6 +609,7 @@ const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) { } uint32_t align_uniform(uint32_t value) { + OPTICK_EVENT(); wgpu::SupportedLimits limits; g_device.GetLimits(&limits); // TODO cache const auto uniform_alignment = limits.limits.minUniformBufferOffsetAlignment; diff --git a/aurora/lib/gfx/common.hpp b/aurora/lib/gfx/common.hpp index e990ed0d2..f9cd56116 100644 --- a/aurora/lib/gfx/common.hpp +++ b/aurora/lib/gfx/common.hpp @@ -4,20 +4,32 @@ #include #include -#include +#define XXH_INLINE_ALL +#define XXH_STATIC_LINKING_ONLY +#define XXH_IMPLEMENTATION +#include +#include #ifndef ALIGN #define ALIGN(x, a) (((x) + ((a)-1)) & ~((a)-1)) #endif +#ifdef __GNUC__ +#define PACK(...) __VA_ARGS__ __attribute__((__packed__)) +#endif +#ifdef _MSC_VER +#define PACK(...) __pragma(pack(push, 1)); __VA_ARGS__; __pragma(pack(pop)) +#endif + namespace aurora { template static inline void xxh3_update(XXH3_state_t& state, const T& input); -static inline XXH64_hash_t xxh3_hash(const void* input, size_t len, XXH64_hash_t seed = 0) { +static inline XXH64_hash_t xxh3_hash_s(const void* input, size_t len, XXH64_hash_t seed = 0) { return XXH3_64bits_withSeed(input, len, seed); } template static inline XXH64_hash_t xxh3_hash(const T& input, XXH64_hash_t seed = 0) { + OPTICK_EVENT(); XXH3_state_t state; memset(&state, 0, sizeof(XXH3_state_t)); XXH3_64bits_reset_withSeed(&state, seed); diff --git a/aurora/lib/gfx/gx.cpp b/aurora/lib/gfx/gx.cpp index 427b51791..a214141cf 100644 --- a/aurora/lib/gfx/gx.cpp +++ b/aurora/lib/gfx/gx.cpp @@ -335,6 +335,7 @@ static inline wgpu::PrimitiveState to_primitive_state(GX::Primitive gx_prim, GX: wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info, ArrayRef vtxBuffers, wgpu::ShaderModule shader, zstring_view label) noexcept { + OPTICK_EVENT(); const auto depthStencil = wgpu::DepthStencilState{ .format = g_graphicsConfig.depthFormat, .depthWriteEnabled = config.depthUpdate, @@ -387,6 +388,7 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primitive, const BindGroupRanges& ranges) noexcept { + OPTICK_EVENT(); for (u8 i = 0; i < g_gxState.numTevStages; ++i) { config.shaderConfig.tevStages[i] = g_gxState.tevStages[i]; } @@ -396,7 +398,9 @@ ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primit for (u8 i = 0; i < g_gxState.numTexGens; ++i) { config.shaderConfig.tcgs[i] = g_gxState.tcgs[i]; } - config.shaderConfig.alphaDiscard = g_gxState.alphaDiscard; + if (g_gxState.alphaDiscard) { + config.shaderConfig.alphaDiscard = g_gxState.alphaDiscard; + } config.shaderConfig.fogType = g_gxState.fog.type; config = { .shaderConfig = config.shaderConfig, @@ -414,6 +418,7 @@ ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primit }; // TODO separate shader info from build_shader for async { + OPTICK_EVENT("Shader info & bind groups"); std::lock_guard lk{g_pipelineMutex}; auto [_, info] = build_shader(config.shaderConfig); info.bindGroups = build_bind_groups(info, config.shaderConfig, ranges); // TODO this is hack @@ -542,6 +547,7 @@ static absl::flat_hash_map samplerEntries; std::array textureEntries; + { + OPTICK_EVENT("Build texture entries"); + for (u32 texIdx = 0, i = 0; texIdx < info.sampledTextures.size(); ++texIdx) { if (!info.sampledTextures.test(texIdx)) { continue; @@ -597,6 +606,7 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi }; i++; } + } return { .uniformBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{ .label = "GX Uniform Bind Group", @@ -620,6 +630,7 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi } GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept { + OPTICK_EVENT(); GXBindGroupLayouts out; u32 uniformSizeKey = info.uniformSize + (config.denormalizedVertexAttributes ? 0 : 1); const auto uniformIt = sUniformBindGroupLayouts.find(uniformSizeKey); @@ -729,6 +740,10 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader return out; } +void initialize() noexcept { + memset(&g_gxState, 0, sizeof(GXState)); +} + // TODO this is awkward extern absl::flat_hash_map> g_gxCachedShaders; void shutdown() noexcept { diff --git a/aurora/lib/gfx/gx.hpp b/aurora/lib/gfx/gx.hpp index a69bde456..77c385947 100644 --- a/aurora/lib/gfx/gx.hpp +++ b/aurora/lib/gfx/gx.hpp @@ -14,23 +14,23 @@ constexpr u32 MaxTexMtx = 10; constexpr u32 MaxPTTexMtx = 20; constexpr u32 MaxTexCoord = GX::MAX_TEXCOORD; -template +PACK(template struct TevPass { Arg a = Default; Arg b = Default; Arg c = Default; Arg d = Default; bool operator==(const TevPass&) const = default; -}; -struct TevOp { +}); +PACK(struct TevOp { GX::TevOp op = GX::TevOp::TEV_ADD; GX::TevBias bias = GX::TevBias::TB_ZERO; GX::TevScale scale = GX::TevScale::CS_SCALE_1; GX::TevRegID outReg = GX::TevRegID::TEVPREV; bool clamp = true; bool operator==(const TevOp&) const = default; -}; -struct TevStage { +}); +PACK(struct TevStage { TevPass colorPass; TevPass alphaPass; TevOp colorOp; @@ -41,7 +41,7 @@ struct TevStage { GX::TexMapID texMapId = GX::TEXMAP_NULL; GX::ChannelID channelId = GX::COLOR_NULL; bool operator==(const TevStage&) const = default; -}; +}); struct TextureBind { aurora::gfx::TextureHandle handle; metaforce::EClampMode clampMode; @@ -55,37 +55,37 @@ struct TextureBind { operator bool() const noexcept { return handle; } }; // For shader generation -struct ColorChannelConfig { +PACK(struct ColorChannelConfig { GX::ColorSrc matSrc = GX::SRC_REG; GX::ColorSrc ambSrc = GX::SRC_REG; bool lightingEnabled = false; bool operator==(const ColorChannelConfig&) const = default; -}; +}); // For uniform generation -struct ColorChannelState { +PACK(struct ColorChannelState { zeus::CColor matColor = zeus::skClear; zeus::CColor ambColor = zeus::skClear; GX::LightMask lightState; -}; +}); using LightVariant = std::variant; // Mat4x4 used instead of Mat4x3 for padding purposes using TexMtxVariant = std::variant, Mat4x4>; -struct TcgConfig { +PACK(struct TcgConfig { GX::TexGenType type = GX::TG_MTX2x4; GX::TexGenSrc src = GX::MAX_TEXGENSRC; GX::TexMtx mtx = GX::IDENTITY; GX::PTTexMtx postMtx = GX::PTIDENTITY; bool normalize = false; bool operator==(const TcgConfig&) const = default; -}; -struct FogState { +}); +PACK(struct FogState { GX::FogType type = GX::FOG_NONE; float startZ = 0.f; float endZ = 0.f; float nearZ = 0.f; float farZ = 0.f; zeus::CColor color; -}; +}); struct GXState { zeus::CMatrix4f mv; @@ -123,10 +123,11 @@ extern GXState g_gxState; static inline Mat4x4 get_combined_matrix() noexcept { return g_gxState.proj * g_gxState.mv; } +void initialize() noexcept; void shutdown() noexcept; const TextureBind& get_texture(GX::TexMapID id) noexcept; -struct ShaderConfig { +PACK(struct ShaderConfig { GX::FogType fogType; std::array, MaxTevStages> tevStages; std::array colorChannels; @@ -135,8 +136,8 @@ struct ShaderConfig { bool denormalizedVertexAttributes = false; bool denormalizedHasNrm = false; // TODO this is a hack bool operator==(const ShaderConfig&) const = default; -}; -struct PipelineConfig { +}); +PACK(struct PipelineConfig { ShaderConfig shaderConfig; GX::Primitive primitive; GX::Compare depthFunc; @@ -146,7 +147,7 @@ struct PipelineConfig { GX::LogicOp blendOp; std::optional dstAlpha; bool depthCompare, depthUpdate, alphaUpdate; -}; +}); struct GXBindGroupLayouts { wgpu::BindGroupLayout uniformLayout; wgpu::BindGroupLayout samplerLayout; @@ -202,70 +203,71 @@ struct DlVert { } // namespace aurora::gfx::gx namespace aurora { -template -inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevPass& input) { - XXH3_64bits_update(&state, &input.a, sizeof(Arg)); - XXH3_64bits_update(&state, &input.b, sizeof(Arg)); - XXH3_64bits_update(&state, &input.c, sizeof(Arg)); - XXH3_64bits_update(&state, &input.d, sizeof(Arg)); -} +//template +//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevPass& input) { +// XXH3_64bits_update(&state, &input.a, sizeof(Arg)); +// XXH3_64bits_update(&state, &input.b, sizeof(Arg)); +// XXH3_64bits_update(&state, &input.c, sizeof(Arg)); +// XXH3_64bits_update(&state, &input.d, sizeof(Arg)); +//} +//template <> +//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevOp& input) { +// XXH3_64bits_update(&state, &input.op, sizeof(gfx::gx::TevOp::op)); +// XXH3_64bits_update(&state, &input.bias, sizeof(gfx::gx::TevOp::bias)); +// XXH3_64bits_update(&state, &input.scale, sizeof(gfx::gx::TevOp::scale)); +// XXH3_64bits_update(&state, &input.outReg, sizeof(gfx::gx::TevOp::outReg)); +// XXH3_64bits_update(&state, &input.clamp, sizeof(bool)); +//} +//template <> +//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevStage& input) { +// xxh3_update(state, input.colorPass); +// xxh3_update(state, input.alphaPass); +// xxh3_update(state, input.colorOp); +// xxh3_update(state, input.alphaOp); +// XXH3_64bits_update(&state, &input.kcSel, sizeof(gfx::gx::TevStage::kcSel)); +// XXH3_64bits_update(&state, &input.kaSel, sizeof(gfx::gx::TevStage::kaSel)); +// XXH3_64bits_update(&state, &input.texCoordId, sizeof(gfx::gx::TevStage::texCoordId)); +// XXH3_64bits_update(&state, &input.texMapId, sizeof(gfx::gx::TevStage::texMapId)); +// XXH3_64bits_update(&state, &input.channelId, sizeof(gfx::gx::TevStage::channelId)); +//} +//template <> +//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ColorChannelConfig& input) { +// XXH3_64bits_update(&state, &input.lightingEnabled, sizeof(gfx::gx::ColorChannelConfig::lightingEnabled)); +// XXH3_64bits_update(&state, &input.matSrc, sizeof(gfx::gx::ColorChannelConfig::matSrc)); +// if (input.lightingEnabled) { +// // Unused when lighting is disabled +// XXH3_64bits_update(&state, &input.ambSrc, sizeof(gfx::gx::ColorChannelConfig::ambSrc)); +// } +//} +//template <> +//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TcgConfig& input) { +// XXH3_64bits_update(&state, &input.type, sizeof(gfx::gx::TcgConfig::type)); +// XXH3_64bits_update(&state, &input.src, sizeof(gfx::gx::TcgConfig::src)); +// XXH3_64bits_update(&state, &input.mtx, sizeof(gfx::gx::TcgConfig::mtx)); +// XXH3_64bits_update(&state, &input.postMtx, sizeof(gfx::gx::TcgConfig::postMtx)); +// XXH3_64bits_update(&state, &input.normalize, sizeof(gfx::gx::TcgConfig::normalize)); +//} template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevOp& input) { - XXH3_64bits_update(&state, &input.op, sizeof(gfx::gx::TevOp::op)); - XXH3_64bits_update(&state, &input.bias, sizeof(gfx::gx::TevOp::bias)); - XXH3_64bits_update(&state, &input.scale, sizeof(gfx::gx::TevOp::scale)); - XXH3_64bits_update(&state, &input.outReg, sizeof(gfx::gx::TevOp::outReg)); - XXH3_64bits_update(&state, &input.clamp, sizeof(bool)); -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevStage& input) { - xxh3_update(state, input.colorPass); - xxh3_update(state, input.alphaPass); - xxh3_update(state, input.colorOp); - xxh3_update(state, input.alphaOp); - XXH3_64bits_update(&state, &input.kcSel, sizeof(gfx::gx::TevStage::kcSel)); - XXH3_64bits_update(&state, &input.kaSel, sizeof(gfx::gx::TevStage::kaSel)); - XXH3_64bits_update(&state, &input.texCoordId, sizeof(gfx::gx::TevStage::texCoordId)); - XXH3_64bits_update(&state, &input.texMapId, sizeof(gfx::gx::TevStage::texMapId)); - XXH3_64bits_update(&state, &input.channelId, sizeof(gfx::gx::TevStage::channelId)); -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ColorChannelConfig& input) { - XXH3_64bits_update(&state, &input.lightingEnabled, sizeof(gfx::gx::ColorChannelConfig::lightingEnabled)); - XXH3_64bits_update(&state, &input.matSrc, sizeof(gfx::gx::ColorChannelConfig::matSrc)); - if (input.lightingEnabled) { - // Unused when lighting is disabled - XXH3_64bits_update(&state, &input.ambSrc, sizeof(gfx::gx::ColorChannelConfig::ambSrc)); - } -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TcgConfig& input) { - XXH3_64bits_update(&state, &input.type, sizeof(gfx::gx::TcgConfig::type)); - XXH3_64bits_update(&state, &input.src, sizeof(gfx::gx::TcgConfig::src)); - XXH3_64bits_update(&state, &input.mtx, sizeof(gfx::gx::TcgConfig::mtx)); - XXH3_64bits_update(&state, &input.postMtx, sizeof(gfx::gx::TcgConfig::postMtx)); - XXH3_64bits_update(&state, &input.normalize, sizeof(gfx::gx::TcgConfig::normalize)); -} -template <> -inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ShaderConfig& input) { - for (const auto& item : input.tevStages) { - if (!item) { - break; - } - xxh3_update(state, *item); - } - for (const auto& item : input.colorChannels) { - xxh3_update(state, item); - } - for (const auto& item : input.tcgs) { - xxh3_update(state, item); - } - if (input.alphaDiscard) { - XXH3_64bits_update(&state, &*input.alphaDiscard, sizeof(float)); - } - XXH3_64bits_update(&state, &input.denormalizedVertexAttributes, - sizeof(gfx::gx::ShaderConfig::denormalizedVertexAttributes)); - XXH3_64bits_update(&state, &input.denormalizedHasNrm, sizeof(gfx::gx::ShaderConfig::denormalizedHasNrm)); - XXH3_64bits_update(&state, &input.fogType, sizeof(gfx::gx::ShaderConfig::fogType)); +inline XXH64_hash_t xxh3_hash(const gfx::gx::ShaderConfig& input, XXH64_hash_t seed) { + return xxh3_hash_s(&input, sizeof(gfx::gx::ShaderConfig), seed); +// for (const auto& item : input.tevStages) { +// if (!item) { +// break; +// } +// xxh3_update(state, *item); +// } +// for (const auto& item : input.colorChannels) { +// xxh3_update(state, item); +// } +// for (const auto& item : input.tcgs) { +// xxh3_update(state, item); +// } +// if (input.alphaDiscard) { +// XXH3_64bits_update(&state, &*input.alphaDiscard, sizeof(float)); +// } +// XXH3_64bits_update(&state, &input.denormalizedVertexAttributes, +// sizeof(gfx::gx::ShaderConfig::denormalizedVertexAttributes)); +// XXH3_64bits_update(&state, &input.denormalizedHasNrm, sizeof(gfx::gx::ShaderConfig::denormalizedHasNrm)); +// XXH3_64bits_update(&state, &input.fogType, sizeof(gfx::gx::ShaderConfig::fogType)); } } // namespace aurora diff --git a/aurora/lib/gfx/gx_shader.cpp b/aurora/lib/gfx/gx_shader.cpp index 5bc0dd78d..46a889e9e 100644 --- a/aurora/lib/gfx/gx_shader.cpp +++ b/aurora/lib/gfx/gx_shader.cpp @@ -359,6 +359,7 @@ std::pair build_shader(const ShaderConfig& confi #endif return it->second; } + OPTICK_EVENT(); Log.report(logvisor::Info, FMT_STRING("Shader config (hash {:x}):"), hash); ShaderInfo info{ @@ -636,7 +637,8 @@ var v_packed_uvs: Vec2Block; } else { vtxOutAttrs += fmt::format(FMT_STRING("\n @location({}) tex{}_uv: vec2;"), locIdx, i); if (tcg.src >= GX::TG_TEX0 && tcg.src <= GX::TG_TEX7) { - vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{} = vec4({}, 0.0, 1.0);"), i, in_uv(tcg.src - GX::TG_TEX0)); + vtxXfrAttrs += + fmt::format(FMT_STRING("\n var tc{} = vec4({}, 0.0, 1.0);"), i, in_uv(tcg.src - GX::TG_TEX0)); } else if (tcg.src == GX::TG_POS) { vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{} = vec4(obj_pos.xyz, 1.0);"), i); } else if (tcg.src == GX::TG_NRM) { @@ -663,7 +665,8 @@ var v_packed_uvs: Vec2Block; } else { u32 postMtxIdx = (tcg.postMtx - GX::PTTEXMTX0) / 3; info.usesPTTexMtx.set(postMtxIdx); - vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{0}_proj = ubuf.postmtx{1} * vec4(tc{0}_tmp.xyz, 1.0);"), i, postMtxIdx); + vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{0}_proj = ubuf.postmtx{1} * vec4(tc{0}_tmp.xyz, 1.0);"), + i, postMtxIdx); } vtxXfrAttrs += fmt::format(FMT_STRING("\n out.tex{0}_uv = tc{0}_proj.xy;"), i); fragmentFnPre += fmt::format( @@ -698,7 +701,8 @@ var v_packed_uvs: Vec2Block; if (config.fogType != GX::FOG_NONE) { info.usesFog = true; - uniformPre += "\n" + uniformPre += + "\n" "struct Fog {\n" " color: vec4;\n" " a: f32;\n" @@ -729,7 +733,8 @@ var v_packed_uvs: Vec2Block; break; case GX::FOG_PERSP_REVEXP2: case GX::FOG_ORTHO_REVEXP2: - fragmentFn += "\n fogF = 1.0 - fogF;" + fragmentFn += + "\n fogF = 1.0 - fogF;" "\n var fogZ = exp2(-8.0 * fogF * fogF);"; break; default: @@ -796,7 +801,11 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 {{ .nextInChain = &wgslDescriptor, .label = label.c_str(), }; - auto shader = gpu::g_device.CreateShaderModule(&shaderDescriptor); + wgpu::ShaderModule shader; + { + OPTICK_EVENT("Device CreateShaderModule"); + shader = gpu::g_device.CreateShaderModule(&shaderDescriptor); + } info.uniformSize = align_uniform(info.uniformSize); auto pair = std::make_pair(std::move(shader), info); diff --git a/aurora/lib/gfx/model/shader.cpp b/aurora/lib/gfx/model/shader.cpp index 1ed9f2102..c9a946db4 100644 --- a/aurora/lib/gfx/model/shader.cpp +++ b/aurora/lib/gfx/model/shader.cpp @@ -113,7 +113,8 @@ static inline std::pair readVert(const u8* data) noexcept { static absl::flat_hash_map, std::vector>> sCachedDisplayLists; void queue_surface(const u8* dlStart, u32 dlSize) noexcept { - const auto hash = xxh3_hash(dlStart, dlSize, 0); + OPTICK_EVENT(); + const auto hash = xxh3_hash_s(dlStart, dlSize, 0); Range vertRange, idxRange; uint32_t numIndices; auto it = sCachedDisplayLists.find(hash); @@ -123,6 +124,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { vertRange = push_verts(ArrayRef{verts}); idxRange = push_indices(ArrayRef{indices}); } else { + OPTICK_EVENT("Display list translation"); std::vector verts; std::vector indices; @@ -181,30 +183,34 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept { } Range sVtxRange, sNrmRange, sTcRange, sPackedTcRange; - if (staticVtxRange) { - sVtxRange = *staticVtxRange; - } else { - sVtxRange = push_storage(reinterpret_cast(vtxData->data()), vtxData->size() * 16); - } - if (staticNrmRange) { - sNrmRange = *staticNrmRange; - } else { - sNrmRange = push_storage(reinterpret_cast(nrmData->data()), nrmData->size() * 16); - } - if (staticTcRange) { - sTcRange = *staticTcRange; - } else { - sTcRange = push_storage(reinterpret_cast(tcData->data()), tcData->size() * 8); - } - if (staticPackedTcRange) { - sPackedTcRange = *staticPackedTcRange; - } else if (tcData == tex0TcData) { - sPackedTcRange = sTcRange; - } else { - sPackedTcRange = push_storage(reinterpret_cast(tex0TcData->data()), tex0TcData->size() * 8); + { + OPTICK_EVENT("Storage push"); + if (staticVtxRange) { + sVtxRange = *staticVtxRange; + } else { + sVtxRange = push_storage(reinterpret_cast(vtxData->data()), vtxData->size() * 16); + } + if (staticNrmRange) { + sNrmRange = *staticNrmRange; + } else { + sNrmRange = push_storage(reinterpret_cast(nrmData->data()), nrmData->size() * 16); + } + if (staticTcRange) { + sTcRange = *staticTcRange; + } else { + sTcRange = push_storage(reinterpret_cast(tcData->data()), tcData->size() * 8); + } + if (staticPackedTcRange) { + sPackedTcRange = *staticPackedTcRange; + } else if (tcData == tex0TcData) { + sPackedTcRange = sTcRange; + } else { + sPackedTcRange = push_storage(reinterpret_cast(tex0TcData->data()), tex0TcData->size() * 8); + } } - model::PipelineConfig config{}; + model::PipelineConfig config; + memset(&config, 0, sizeof(model::PipelineConfig)); const gx::BindGroupRanges ranges{ .vtxDataRange = sVtxRange, .nrmDataRange = sNrmRange, @@ -238,6 +244,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] Pipeli } void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { + OPTICK_EVENT(); if (!bind_pipeline(data.pipeline, pass)) { return; } @@ -281,6 +288,7 @@ static inline void cache_array(const void* data, Vec*& outPtr, std::optional sStreamState; void stream_begin(GX::Primitive primitive) noexcept { + OPTICK_EVENT(); if (sStreamState) { Log.report(logvisor::Fatal, FMT_STRING("Stream began twice!")); unreachable(); @@ -27,6 +28,7 @@ void stream_begin(GX::Primitive primitive) noexcept { void stream_vertex(metaforce::EStreamFlags flags, const zeus::CVector3f& pos, const zeus::CVector3f& nrm, const zeus::CColor& color, const zeus::CVector2f& uv) noexcept { + OPTICK_EVENT(); if (!sStreamState) { Log.report(logvisor::Fatal, FMT_STRING("Stream not started!")); unreachable(); @@ -53,9 +55,11 @@ void stream_vertex(metaforce::EStreamFlags flags, const zeus::CVector3f& pos, co } void stream_end() noexcept { + OPTICK_EVENT(); const auto vertRange = push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size()); - stream::PipelineConfig config{}; + stream::PipelineConfig config; + memset(&config, 0, sizeof(stream::PipelineConfig)); config.shaderConfig.denormalizedVertexAttributes = true; config.shaderConfig.denormalizedHasNrm = sStreamState->flags.IsSet(metaforce::EStreamFlagBits::fHasNormal); const auto info = populate_pipeline_config(config, sStreamState->primitive, {}); diff --git a/aurora/lib/gfx/stream/shader.cpp b/aurora/lib/gfx/stream/shader.cpp index a6dbbc82b..09a05afb1 100644 --- a/aurora/lib/gfx/stream/shader.cpp +++ b/aurora/lib/gfx/stream/shader.cpp @@ -65,6 +65,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] Pipeli State construct_state() { return {}; } void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { + OPTICK_EVENT(); if (!bind_pipeline(data.pipeline, pass)) { return; } diff --git a/aurora/lib/gfx/stream/shader.hpp b/aurora/lib/gfx/stream/shader.hpp index 7ac66c513..5f9626bbc 100644 --- a/aurora/lib/gfx/stream/shader.hpp +++ b/aurora/lib/gfx/stream/shader.hpp @@ -12,7 +12,7 @@ struct DrawData { gx::GXBindGroups bindGroups; }; -struct PipelineConfig : public gx::PipelineConfig {}; +PACK(struct PipelineConfig : public gx::PipelineConfig {}); struct State {}; diff --git a/aurora/lib/gfx/texture.cpp b/aurora/lib/gfx/texture.cpp index bc583448b..4890941fe 100644 --- a/aurora/lib/gfx/texture.cpp +++ b/aurora/lib/gfx/texture.cpp @@ -40,6 +40,7 @@ static wgpu::Extent3D physical_size(wgpu::Extent3D size, TextureFormatInfo info) TextureHandle new_static_texture_2d(uint32_t width, uint32_t height, uint32_t mips, metaforce::ETexelFormat format, ArrayRef data, zstring_view label) noexcept { + OPTICK_EVENT(); auto handle = new_dynamic_texture_2d(width, height, mips, format, label); const TextureRef& ref = *handle.ref; @@ -89,6 +90,7 @@ TextureHandle new_static_texture_2d(uint32_t width, uint32_t height, uint32_t mi TextureHandle new_dynamic_texture_2d(uint32_t width, uint32_t height, uint32_t mips, metaforce::ETexelFormat format, zstring_view label) noexcept { + OPTICK_EVENT(); const auto wgpuFormat = to_wgpu(format); const auto size = wgpu::Extent3D{ .width = width, @@ -120,6 +122,7 @@ TextureHandle new_render_texture(uint32_t width, uint32_t height, uint32_t color // TODO accept mip/layer parameters void write_texture(const TextureHandle& handle, ArrayRef data) noexcept { + OPTICK_EVENT(); const TextureRef& ref = *handle.ref; ByteBuffer buffer; diff --git a/aurora/lib/gfx/texture_convert.cpp b/aurora/lib/gfx/texture_convert.cpp index 8273a4ff8..0ff522388 100644 --- a/aurora/lib/gfx/texture_convert.cpp +++ b/aurora/lib/gfx/texture_convert.cpp @@ -78,6 +78,7 @@ constexpr T bswap16(T val) noexcept { } static ByteBuffer BuildI4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); const size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -117,6 +118,7 @@ static ByteBuffer BuildI4FromGCN(uint32_t width, uint32_t height, uint32_t mips, } static ByteBuffer BuildI8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); const size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -158,6 +160,7 @@ static ByteBuffer BuildI8FromGCN(uint32_t width, uint32_t height, uint32_t mips, } ByteBuffer BuildIA4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); const size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -199,6 +202,7 @@ ByteBuffer BuildIA4FromGCN(uint32_t width, uint32_t height, uint32_t mips, Array } ByteBuffer BuildIA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); const size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -240,6 +244,7 @@ ByteBuffer BuildIA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, Array } ByteBuffer BuildC4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data, RGBA8* palette) { + OPTICK_EVENT(); const size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -277,6 +282,7 @@ ByteBuffer BuildC4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayR } ByteBuffer BuildC8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data, RGBA8* palette) { + OPTICK_EVENT(); const size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -314,6 +320,7 @@ ByteBuffer BuildC8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayR } ByteBuffer BuildRGB565FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); const size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -354,6 +361,7 @@ ByteBuffer BuildRGB565FromGCN(uint32_t width, uint32_t height, uint32_t mips, Ar } ByteBuffer BuildRGB5A3FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -401,6 +409,7 @@ ByteBuffer BuildRGB5A3FromGCN(uint32_t width, uint32_t height, uint32_t mips, Ar } ByteBuffer BuildRGBA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); const size_t texelCount = ComputeMippedTexelCount(width, height, mips); ByteBuffer buf{sizeof(RGBA8) * texelCount}; @@ -445,6 +454,7 @@ ByteBuffer BuildRGBA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, Arr } ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); const size_t blockCount = ComputeMippedBlockCountDXT1(width, height, mips); ByteBuffer buf{sizeof(DXT1Block) * blockCount}; @@ -493,6 +503,7 @@ ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mips, Arra ByteBuffer convert_texture(metaforce::ETexelFormat format, uint32_t width, uint32_t height, uint32_t mips, ArrayRef data) { + OPTICK_EVENT(); switch (format) { case metaforce::ETexelFormat::RGBA8PC: case metaforce::ETexelFormat::R8PC: diff --git a/aurora/lib/gfx/textured_quad/shader.cpp b/aurora/lib/gfx/textured_quad/shader.cpp index 30625bfe4..69d87c502 100644 --- a/aurora/lib/gfx/textured_quad/shader.cpp +++ b/aurora/lib/gfx/textured_quad/shader.cpp @@ -376,6 +376,7 @@ DrawData make_draw_data_verts(const State& state, CameraFilterType filter_type, } void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { + OPTICK_EVENT(); if (!bind_pipeline(data.pipeline, pass)) { return; } diff --git a/aurora/lib/gpu.cpp b/aurora/lib/gpu.cpp index d6de7ac13..7a567f748 100644 --- a/aurora/lib/gpu.cpp +++ b/aurora/lib/gpu.cpp @@ -182,8 +182,8 @@ void initialize(SDL_Window* window) { } g_queue = g_device.GetQueue(); - g_BackendBinding = - std::unique_ptr(utils::CreateBinding(g_backendType, window, g_device.Get())); + g_BackendBinding = std::unique_ptr( + utils::CreateBinding(g_backendType, window, g_Adapter.Get(), g_device.Get())); if (!g_BackendBinding) { Log.report(logvisor::Fatal, FMT_STRING("Unsupported backend {}"), backendName); unreachable(); @@ -237,4 +237,16 @@ void resize_swapchain(uint32_t width, uint32_t height) { g_frameBufferResolved = create_render_texture(false); g_depthBuffer = create_depth_texture(); } + +#if USE_OPTICK +void* get_native_swapchain() { return g_BackendBinding->GetNativeSwapChain(); } + +Optick::GPUContext begin_cmdlist() { + return g_BackendBinding->OptickSetGpuContext(); +} + +void end_cmdlist(Optick::GPUContext ctx) { + Optick::SetGpuContext(ctx); +} +#endif } // namespace aurora::gpu diff --git a/aurora/lib/gpu.hpp b/aurora/lib/gpu.hpp index 8704cb13d..dd81694c4 100644 --- a/aurora/lib/gpu.hpp +++ b/aurora/lib/gpu.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #ifdef __GNUC__ [[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); } @@ -52,6 +53,12 @@ extern TextureWithSampler g_depthBuffer; void initialize(SDL_Window* window); void shutdown(); void resize_swapchain(uint32_t width, uint32_t height); + +#if USE_OPTICK +void* get_native_swapchain(); +Optick::GPUContext begin_cmdlist(); +void end_cmdlist(Optick::GPUContext ctx); +#endif } // namespace aurora::gpu namespace aurora::gpu::utils { diff --git a/aurora/lib/imgui.cpp b/aurora/lib/imgui.cpp index 2379f4e7b..b4ba00d17 100644 --- a/aurora/lib/imgui.cpp +++ b/aurora/lib/imgui.cpp @@ -75,6 +75,7 @@ void new_frame(const WindowSize& size) noexcept { } void render(const wgpu::RenderPassEncoder& pass) noexcept { + OPTICK_EVENT(); ImGui::Render(); auto* data = ImGui::GetDrawData(); diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt index f27a7643b..6bd31bac2 100644 --- a/extern/CMakeLists.txt +++ b/extern/CMakeLists.txt @@ -106,17 +106,20 @@ if (DAWN_ENABLE_VULKAN) endif () if (MSVC) target_compile_options(dawn_native PRIVATE /bigobj) -else() +else () target_compile_options(SPIRV-Tools-static PRIVATE -Wno-implicit-fallthrough) target_compile_options(SPIRV-Tools-opt PRIVATE -Wno-implicit-fallthrough) endif () option(OPTICK_ENABLED "Enable profiling with Optick" OFF) set(OPTICK_USE_VULKAN ${DAWN_ENABLE_VULKAN} CACHE BOOL "Built-in support for Vulkan" FORCE) +set(OPTICK_USE_D3D12 ${DAWN_ENABLE_D3D12} CACHE BOOL "Built-in support for Vulkan" FORCE) set(OPTICK_INSTALL_TARGETS OFF CACHE BOOL "Should optick be installed? Set to OFF if you use add_subdirectory to include Optick." FORCE) add_subdirectory(optick) if (NOT MSVC) target_compile_options(OptickCore PRIVATE -Wno-implicit-fallthrough) +elseif (OPTICK_USE_D3D12) + target_link_libraries(OptickCore PRIVATE dxgi) endif () add_subdirectory(libjpeg-turbo EXCLUDE_FROM_ALL) diff --git a/normalize_submodules.sh b/normalize_submodules.sh index a2ac8e7d4..f40fb8f98 100644 --- a/normalize_submodules.sh +++ b/normalize_submodules.sh @@ -8,7 +8,6 @@ for sub in "extern/amuse" \ "extern/jbus" \ "extern/kabufuda" \ "extern/nod" \ - "extern/xxhash" \ "extern/zeus"; do if [ -d $sub ]; then pushd $sub > /dev/null