aurora: Integrate Optick & hashing performance improvements

This commit is contained in:
Luke Street 2022-03-16 19:23:52 -04:00
parent b4e242b88d
commit 32c2ff5498
29 changed files with 402 additions and 242 deletions

View File

@ -351,8 +351,6 @@ public:
} }
} }
OPTICK_FRAME("MainThread");
// Check if fullscreen has been toggled, if so set the fullscreen cvar accordingly // Check if fullscreen has been toggled, if so set the fullscreen cvar accordingly
if (m_fullscreenToggleRequested) { if (m_fullscreenToggleRequested) {
m_cvarCommons.m_fullscreen->fromBoolean(!m_cvarCommons.getFullscreen()); m_cvarCommons.m_fullscreen->fromBoolean(!m_cvarCommons.getFullscreen());

View File

@ -19,8 +19,12 @@ add_library(aurora STATIC
target_compile_definitions(aurora PRIVATE IMGUI_USER_CONFIG="imconfig_user.h") # IMGUI_USE_WCHAR32 target_compile_definitions(aurora PRIVATE IMGUI_USER_CONFIG="imconfig_user.h") # IMGUI_USE_WCHAR32
target_include_directories(aurora PUBLIC include ../) target_include_directories(aurora PUBLIC include ../)
target_include_directories(aurora PRIVATE ../imgui ../extern/imgui) target_include_directories(aurora PRIVATE ../imgui ../extern/imgui)
target_include_directories(aurora PRIVATE
../extern/dawn/src
../extern/dawn/third_party/abseil-cpp
${CMAKE_BINARY_DIR}/extern/dawn/gen/src) # for hacks :)
target_link_libraries(aurora PRIVATE dawn_native dawncpp webgpu_dawn zeus logvisor SDL2-static xxhash target_link_libraries(aurora PRIVATE dawn_native dawncpp webgpu_dawn zeus logvisor SDL2-static xxhash
absl::btree absl::flat_hash_map) absl::btree absl::flat_hash_map OptickCore)
if (APPLE) if (APPLE)
target_compile_definitions(aurora PRIVATE DAWN_ENABLE_BACKEND_METAL) target_compile_definitions(aurora PRIVATE DAWN_ENABLE_BACKEND_METAL)
target_sources(aurora PRIVATE lib/dawn/MetalBinding.mm) target_sources(aurora PRIVATE lib/dawn/MetalBinding.mm)

View File

@ -280,12 +280,18 @@ void app_run(std::unique_ptr<AppDelegate> app, Icon icon, int argc, char** argv)
g_AppDelegate->onAppWindowResized(size); g_AppDelegate->onAppWindowResized(size);
while (poll_events()) { while (poll_events()) {
OPTICK_FRAME("MainThread");
imgui::new_frame(g_windowSize); imgui::new_frame(g_windowSize);
if (!g_AppDelegate->onAppIdle(ImGui::GetIO().DeltaTime)) { if (!g_AppDelegate->onAppIdle(ImGui::GetIO().DeltaTime)) {
break; break;
} }
const wgpu::TextureView view = g_swapChain.GetCurrentTextureView(); wgpu::TextureView view;
{
OPTICK_EVENT("SwapChain GetCurrentTextureView");
view = g_swapChain.GetCurrentTextureView();
}
gfx::begin_frame(); gfx::begin_frame();
g_AppDelegate->onAppDraw(); g_AppDelegate->onAppDraw();
@ -293,8 +299,12 @@ void app_run(std::unique_ptr<AppDelegate> app, Icon icon, int argc, char** argv)
.label = "Redraw encoder", .label = "Redraw encoder",
}; };
auto encoder = g_device.CreateCommandEncoder(&encoderDescriptor); auto encoder = g_device.CreateCommandEncoder(&encoderDescriptor);
#if USE_OPTICK
auto prevContext = gpu::begin_cmdlist();
#endif
gfx::end_frame(encoder); gfx::end_frame(encoder);
{ {
OPTICK_EVENT("Main Render Pass");
const std::array attachments{ const std::array attachments{
wgpu::RenderPassColorAttachment{ wgpu::RenderPassColorAttachment{
.view = view, .view = view,
@ -329,6 +339,7 @@ void app_run(std::unique_ptr<AppDelegate> app, Icon icon, int argc, char** argv)
pass.End(); pass.End();
} }
{ {
OPTICK_EVENT("ImGui Render Pass");
const std::array attachments{ const std::array attachments{
wgpu::RenderPassColorAttachment{ wgpu::RenderPassColorAttachment{
.view = view, .view = view,
@ -345,9 +356,19 @@ void app_run(std::unique_ptr<AppDelegate> app, Icon icon, int argc, char** argv)
imgui::render(pass); imgui::render(pass);
pass.End(); pass.End();
} }
const auto buffer = encoder.Finish(); #if USE_OPTICK
g_queue.Submit(1, &buffer); gpu::end_cmdlist(prevContext);
g_swapChain.Present(); #endif
{
OPTICK_EVENT("Queue Submit");
const auto buffer = encoder.Finish();
g_queue.Submit(1, &buffer);
}
{
OPTICK_GPU_FLIP(gpu::get_native_swapchain());
OPTICK_CATEGORY("Present", Optick::Category::Wait);
g_swapChain.Present();
}
g_AppDelegate->onAppPostDraw(); g_AppDelegate->onAppPostDraw();

View File

@ -8,22 +8,23 @@
namespace aurora::gpu::utils { namespace aurora::gpu::utils {
#if defined(DAWN_ENABLE_BACKEND_D3D12) #if defined(DAWN_ENABLE_BACKEND_D3D12)
BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUDevice device); BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_METAL) #if defined(DAWN_ENABLE_BACKEND_METAL)
BackendBinding* CreateMetalBinding(SDL_Window* window, WGPUDevice device); BackendBinding* CreateMetalBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_NULL) #if defined(DAWN_ENABLE_BACKEND_NULL)
BackendBinding* CreateNullBinding(SDL_Window* window, WGPUDevice device); BackendBinding* CreateNullBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_OPENGL) #if defined(DAWN_ENABLE_BACKEND_OPENGL)
BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUDevice device); BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_VULKAN) #if defined(DAWN_ENABLE_BACKEND_VULKAN)
BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUDevice device); BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif #endif
BackendBinding::BackendBinding(SDL_Window* window, WGPUDevice device) : m_window(window), m_device(device) {} BackendBinding::BackendBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device)
: m_window(window), m_adapter(adapter), m_device(device) {}
void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu::BackendType type) { void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu::BackendType type) {
if (type == wgpu::BackendType::OpenGL || type == wgpu::BackendType::OpenGLES) { if (type == wgpu::BackendType::OpenGL || type == wgpu::BackendType::OpenGLES) {
@ -45,31 +46,31 @@ void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu:
} }
} }
BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUDevice device) { BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) {
switch (type) { switch (type) {
#if defined(DAWN_ENABLE_BACKEND_D3D12) #if defined(DAWN_ENABLE_BACKEND_D3D12)
case wgpu::BackendType::D3D12: case wgpu::BackendType::D3D12:
return CreateD3D12Binding(window, device); return CreateD3D12Binding(window, adapter, device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_METAL) #if defined(DAWN_ENABLE_BACKEND_METAL)
case wgpu::BackendType::Metal: case wgpu::BackendType::Metal:
return CreateMetalBinding(window, device); return CreateMetalBinding(window, adapter, device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_NULL) #if defined(DAWN_ENABLE_BACKEND_NULL)
case wgpu::BackendType::Null: case wgpu::BackendType::Null:
return CreateNullBinding(window, device); return CreateNullBinding(window, adapter, device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_DESKTOP_GL) #if defined(DAWN_ENABLE_BACKEND_DESKTOP_GL)
case wgpu::BackendType::OpenGL: case wgpu::BackendType::OpenGL:
return CreateOpenGLBinding(window, device); return CreateOpenGLBinding(window, adapter, device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_OPENGLES) #if defined(DAWN_ENABLE_BACKEND_OPENGLES)
case wgpu::BackendType::OpenGLES: case wgpu::BackendType::OpenGLES:
return CreateOpenGLBinding(window, device); return CreateOpenGLBinding(window, adapter, device);
#endif #endif
#if defined(DAWN_ENABLE_BACKEND_VULKAN) #if defined(DAWN_ENABLE_BACKEND_VULKAN)
case wgpu::BackendType::Vulkan: case wgpu::BackendType::Vulkan:
return CreateVulkanBinding(window, device); return CreateVulkanBinding(window, adapter, device);
#endif #endif
default: default:
return nullptr; return nullptr;

View File

@ -2,6 +2,7 @@
#include <dawn/native/DawnNative.h> #include <dawn/native/DawnNative.h>
#include <dawn/webgpu_cpp.h> #include <dawn/webgpu_cpp.h>
#include <optick.h>
struct SDL_Window; struct SDL_Window;
@ -13,15 +14,20 @@ public:
virtual uint64_t GetSwapChainImplementation() = 0; virtual uint64_t GetSwapChainImplementation() = 0;
virtual WGPUTextureFormat GetPreferredSwapChainTextureFormat() = 0; virtual WGPUTextureFormat GetPreferredSwapChainTextureFormat() = 0;
#if USE_OPTICK
virtual void* GetNativeSwapChain() { return nullptr; };
virtual Optick::GPUContext OptickSetGpuContext() { return {}; };
#endif
protected: protected:
BackendBinding(SDL_Window* window, WGPUDevice device); BackendBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
SDL_Window* m_window = nullptr; SDL_Window* m_window = nullptr;
WGPUAdapter m_adapter = nullptr;
WGPUDevice m_device = nullptr; WGPUDevice m_device = nullptr;
}; };
void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu::BackendType type); void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu::BackendType type);
BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUDevice device); BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
} // namespace aurora::gpu::utils } // namespace aurora::gpu::utils

View File

@ -2,11 +2,21 @@
#include <SDL_syswm.h> #include <SDL_syswm.h>
#include <dawn/native/D3D12Backend.h> #include <dawn/native/D3D12Backend.h>
#include <optick.h>
#if USE_OPTICK
// Internal headers
#include <dawn/native/d3d12/CommandBufferD3D12.h>
#include <dawn/native/d3d12/DeviceD3D12.h>
#define private public
#include <dawn/native/d3d12/NativeSwapChainImplD3D12.h>
#undef private
#endif
namespace aurora::gpu::utils { namespace aurora::gpu::utils {
class D3D12Binding : public BackendBinding { class D3D12Binding : public BackendBinding {
public: public:
D3D12Binding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {} D3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {}
uint64_t GetSwapChainImplementation() override { uint64_t GetSwapChainImplementation() override {
if (m_swapChainImpl.userData == nullptr) { if (m_swapChainImpl.userData == nullptr) {
@ -22,6 +32,19 @@ public:
return dawn::native::d3d12::GetNativeSwapChainPreferredFormat(&m_swapChainImpl); return dawn::native::d3d12::GetNativeSwapChainPreferredFormat(&m_swapChainImpl);
} }
#if USE_OPTICK
void* GetNativeSwapChain() override {
auto* impl = static_cast<dawn::native::d3d12::NativeSwapChainImpl*>(m_swapChainImpl.userData);
return impl->mSwapChain.Get();
}
Optick::GPUContext OptickSetGpuContext() override {
auto* device = dawn::native::d3d12::ToBackend(static_cast<dawn::native::DeviceBase*>(static_cast<void*>(m_device)));
auto* commandList = device->GetPendingCommandContext().AcquireSuccess()->GetCommandList();
return Optick::SetGpuContext({commandList});
}
#endif
private: private:
DawnSwapChainImplementation m_swapChainImpl{}; DawnSwapChainImplementation m_swapChainImpl{};
@ -30,8 +53,16 @@ private:
SDL_VERSION(&wmInfo.version); SDL_VERSION(&wmInfo.version);
SDL_GetWindowWMInfo(m_window, &wmInfo); SDL_GetWindowWMInfo(m_window, &wmInfo);
m_swapChainImpl = dawn::native::d3d12::CreateNativeSwapChainImpl(m_device, wmInfo.info.win.window); m_swapChainImpl = dawn::native::d3d12::CreateNativeSwapChainImpl(m_device, wmInfo.info.win.window);
#if USE_OPTICK
auto* device = dawn::native::d3d12::ToBackend(static_cast<dawn::native::DeviceBase*>(static_cast<void*>(m_device)));
auto* d3d12Device = device->GetD3D12Device();
auto* d3d12CommandQueue = device->GetCommandQueue().Get();
OPTICK_GPU_INIT_D3D12(d3d12Device, &d3d12CommandQueue, 1);
#endif
} }
}; };
BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUDevice device) { return new D3D12Binding(window, device); } BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) {
return new D3D12Binding(window, adapter, device);
}
} // namespace aurora::gpu::utils } // namespace aurora::gpu::utils

View File

@ -89,7 +89,7 @@ private:
class MetalBinding : public BackendBinding { class MetalBinding : public BackendBinding {
public: public:
MetalBinding(SDL_Window *window, WGPUDevice device) : BackendBinding(window, device) {} MetalBinding(SDL_Window *window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {}
uint64_t GetSwapChainImplementation() override { uint64_t GetSwapChainImplementation() override {
if (m_swapChainImpl.userData == nullptr) { if (m_swapChainImpl.userData == nullptr) {
@ -104,5 +104,7 @@ private:
DawnSwapChainImplementation m_swapChainImpl{}; DawnSwapChainImplementation m_swapChainImpl{};
}; };
BackendBinding *CreateMetalBinding(SDL_Window *window, WGPUDevice device) { return new MetalBinding(window, device); } BackendBinding *CreateMetalBinding(SDL_Window *window, WGPUAdapter adapter, WGPUDevice device) {
return new MetalBinding(window, adapter, device);
}
} // namespace aurora::gpu::utils } // namespace aurora::gpu::utils

View File

@ -6,7 +6,7 @@
namespace aurora::gpu::utils { namespace aurora::gpu::utils {
class OpenGLBinding : public BackendBinding { class OpenGLBinding : public BackendBinding {
public: public:
OpenGLBinding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {} OpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {}
uint64_t GetSwapChainImplementation() override { uint64_t GetSwapChainImplementation() override {
if (m_swapChainImpl.userData == nullptr) { if (m_swapChainImpl.userData == nullptr) {
@ -31,5 +31,7 @@ private:
} }
}; };
BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUDevice device) { return new OpenGLBinding(window, device); } BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) {
return new OpenGLBinding(window, adapter, device);
}
} // namespace aurora::gpu::utils } // namespace aurora::gpu::utils

View File

@ -1,13 +1,24 @@
#include "BackendBinding.hpp" #include "BackendBinding.hpp"
#include <SDL_vulkan.h>
#include <cassert> #include <cassert>
#include <optick.h>
#if USE_OPTICK
// Internal headers
#include <dawn/native/vulkan/AdapterVk.h>
#include <dawn/native/vulkan/DeviceVk.h>
#define private public
#include <dawn/native/vulkan/NativeSwapChainImplVk.h>
#undef private
#endif
#include <SDL_vulkan.h>
#include <dawn/native/VulkanBackend.h> #include <dawn/native/VulkanBackend.h>
namespace aurora::gpu::utils { namespace aurora::gpu::utils {
class VulkanBinding : public BackendBinding { class VulkanBinding : public BackendBinding {
public: public:
VulkanBinding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {} VulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {}
uint64_t GetSwapChainImplementation() override { uint64_t GetSwapChainImplementation() override {
if (m_swapChainImpl.userData == nullptr) { if (m_swapChainImpl.userData == nullptr) {
@ -23,6 +34,19 @@ public:
return dawn::native::vulkan::GetNativeSwapChainPreferredFormat(&m_swapChainImpl); return dawn::native::vulkan::GetNativeSwapChainPreferredFormat(&m_swapChainImpl);
} }
#if USE_OPTICK
void* GetNativeSwapChain() override {
auto* impl = static_cast<dawn::native::vulkan::NativeSwapChainImpl*>(m_swapChainImpl.userData);
return impl->mSwapChain;
}
Optick::GPUContext OptickSetGpuContext() override {
auto* device =
dawn::native::vulkan::ToBackend(static_cast<dawn::native::DeviceBase*>(static_cast<void*>(m_device)));
return Optick::SetGpuContext({device->GetPendingRecordingContext()->commandBuffer});
}
#endif
private: private:
DawnSwapChainImplementation m_swapChainImpl{}; DawnSwapChainImplementation m_swapChainImpl{};
@ -32,8 +56,21 @@ private:
assert(false); assert(false);
} }
m_swapChainImpl = dawn::native::vulkan::CreateNativeSwapChainImpl(m_device, surface); m_swapChainImpl = dawn::native::vulkan::CreateNativeSwapChainImpl(m_device, surface);
#if USE_OPTICK
auto* adapter =
dawn::native::vulkan::ToBackend(static_cast<dawn::native::AdapterBase*>(static_cast<void*>(m_adapter)));
auto* device =
dawn::native::vulkan::ToBackend(static_cast<dawn::native::DeviceBase*>(static_cast<void*>(m_device)));
VkDevice vkDevice = device->GetVkDevice();
VkPhysicalDevice vkPhysicalDevice = adapter->GetPhysicalDevice();
VkQueue vkQueue = device->GetQueue();
uint32_t queueFamily = device->GetGraphicsQueueFamily();
OPTICK_GPU_INIT_VULKAN(&vkDevice, &vkPhysicalDevice, &vkQueue, &queueFamily, 1, nullptr);
#endif
} }
}; };
BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUDevice device) { return new VulkanBinding(window, device); } BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) {
return new VulkanBinding(window, adapter, device);
}
} // namespace aurora::gpu::utils } // namespace aurora::gpu::utils

View File

@ -298,6 +298,7 @@ DrawData make_draw_data_verts(const State& state, CameraFilterType filter_type,
} }
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) { if (!bind_pipeline(data.pipeline, pass)) {
return; return;
} }

View File

@ -84,107 +84,44 @@ struct Command {
namespace aurora { namespace aurora {
template <> template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::colored_quad::PipelineConfig& input) { inline XXH64_hash_t xxh3_hash(const gfx::PipelineCreateCommand& input, XXH64_hash_t seed) {
XXH3_64bits_update(&state, &input.filterType, sizeof(gfx::colored_quad::PipelineConfig::filterType)); constexpr auto typeSize = sizeof(gfx::PipelineCreateCommand::type);
XXH3_64bits_update(&state, &input.zComparison, sizeof(gfx::colored_quad::PipelineConfig::zComparison));
XXH3_64bits_update(&state, &input.zTest, sizeof(gfx::colored_quad::PipelineConfig::zTest));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::textured_quad::PipelineConfig& input) {
XXH3_64bits_update(&state, &input.filterType, sizeof(gfx::textured_quad::PipelineConfig::filterType));
XXH3_64bits_update(&state, &input.zComparison, sizeof(gfx::textured_quad::PipelineConfig::zComparison));
XXH3_64bits_update(&state, &input.zTest, sizeof(gfx::textured_quad::PipelineConfig::zTest));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::movie_player::PipelineConfig& input) {
// no-op
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::PipelineConfig& input) {
xxh3_update(state, input.shaderConfig);
XXH3_64bits_update(&state, &input.primitive, sizeof(gfx::gx::PipelineConfig::primitive));
XXH3_64bits_update(&state, &input.depthFunc, sizeof(gfx::gx::PipelineConfig::depthFunc));
XXH3_64bits_update(&state, &input.cullMode, sizeof(gfx::gx::PipelineConfig::cullMode));
XXH3_64bits_update(&state, &input.blendMode, sizeof(gfx::gx::PipelineConfig::blendMode));
XXH3_64bits_update(&state, &input.blendFacSrc, sizeof(gfx::gx::PipelineConfig::blendFacSrc));
XXH3_64bits_update(&state, &input.blendFacDst, sizeof(gfx::gx::PipelineConfig::blendFacDst));
XXH3_64bits_update(&state, &input.blendOp, sizeof(gfx::gx::PipelineConfig::blendOp));
if (input.dstAlpha) {
XXH3_64bits_update(&state, &*input.dstAlpha, sizeof(float));
}
XXH3_64bits_update(&state, &input.depthCompare, sizeof(gfx::gx::PipelineConfig::depthCompare));
XXH3_64bits_update(&state, &input.depthUpdate, sizeof(gfx::gx::PipelineConfig::depthUpdate));
XXH3_64bits_update(&state, &input.alphaUpdate, sizeof(gfx::gx::PipelineConfig::alphaUpdate));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::stream::PipelineConfig& input) {
xxh3_update<gfx::gx::PipelineConfig>(state, input);
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::model::PipelineConfig& input) {
xxh3_update<gfx::gx::PipelineConfig>(state, input);
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::PipelineCreateCommand& input) {
XXH3_64bits_update(&state, &input.type, sizeof(gfx::PipelineCreateCommand::type));
switch (input.type) { switch (input.type) {
case gfx::ShaderType::Aabb: case gfx::ShaderType::Aabb:
// TODO // TODO
break; break;
case gfx::ShaderType::ColoredQuad: case gfx::ShaderType::ColoredQuad:
xxh3_update(state, input.coloredQuad); return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::coloredQuad), seed);
break;
case gfx::ShaderType::TexturedQuad: case gfx::ShaderType::TexturedQuad:
xxh3_update(state, input.texturedQuad); return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::texturedQuad), seed);
break;
case gfx::ShaderType::MoviePlayer: case gfx::ShaderType::MoviePlayer:
xxh3_update(state, input.moviePlayer); return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::moviePlayer), seed);
break;
case gfx::ShaderType::Stream: case gfx::ShaderType::Stream:
xxh3_update(state, input.stream); return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::stream), seed);
break;
case gfx::ShaderType::Model: case gfx::ShaderType::Model:
xxh3_update(state, input.model); return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::model), seed);
break;
} }
return 0;
} }
template <> template <>
inline void xxh3_update(XXH3_state_t& state, const wgpu::BindGroupEntry& input) { inline XXH64_hash_t xxh3_hash(const wgpu::BindGroupDescriptor& input, XXH64_hash_t seed) {
XXH3_64bits_update(&state, &input.binding, sizeof(wgpu::BindGroupEntry::binding)); constexpr auto offset = sizeof(void*) * 2; // skip nextInChain, label
XXH3_64bits_update(&state, &input.buffer, sizeof(wgpu::BindGroupEntry::buffer)); const auto hash = xxh3_hash_s(reinterpret_cast<const u8*>(&input) + offset,
XXH3_64bits_update(&state, &input.offset, sizeof(wgpu::BindGroupEntry::offset)); sizeof(wgpu::BindGroupDescriptor) - offset - sizeof(void*) /* skip entries */, seed);
if (input.buffer != nullptr) { // // TODO ensure size is zeroed elsewhere
XXH3_64bits_update(&state, &input.size, sizeof(wgpu::BindGroupEntry::size)); // for (int i = 0; i < input.entryCount; ++i) {
} // const wgpu::BindGroupEntry& entry = input.entries[i];
XXH3_64bits_update(&state, &input.sampler, sizeof(wgpu::BindGroupEntry::sampler)); // if (!entry.buffer && entry.size != 0) {
XXH3_64bits_update(&state, &input.textureView, sizeof(wgpu::BindGroupEntry::textureView)); // gfx::Log.report(logvisor::Fatal, FMT_STRING("Size != 0! {}"), entry.size);
// }
// }
return xxh3_hash_s(input.entries, sizeof(wgpu::BindGroupEntry) * input.entryCount, hash);
} }
template <> template <>
inline void xxh3_update(XXH3_state_t& state, const wgpu::BindGroupDescriptor& input) { inline XXH64_hash_t xxh3_hash(const wgpu::SamplerDescriptor& input, XXH64_hash_t seed) {
if (input.label != nullptr) { constexpr auto offset = sizeof(void*) * 2; // skip nextInChain, label
XXH3_64bits_update(&state, input.label, strlen(input.label)); return xxh3_hash_s(reinterpret_cast<const u8*>(&input) + offset,
} sizeof(wgpu::SamplerDescriptor) - offset - 2 /* skip padding */, seed);
XXH3_64bits_update(&state, &input.layout, sizeof(wgpu::BindGroupDescriptor::layout));
XXH3_64bits_update(&state, &input.entryCount, sizeof(wgpu::BindGroupDescriptor::entryCount));
for (int i = 0; i < input.entryCount; ++i) {
xxh3_update(state, input.entries[i]);
}
}
template <>
inline void xxh3_update(XXH3_state_t& state, const wgpu::SamplerDescriptor& input) {
if (input.label != nullptr) {
XXH3_64bits_update(&state, input.label, strlen(input.label));
}
XXH3_64bits_update(&state, &input.addressModeU, sizeof(wgpu::SamplerDescriptor::addressModeU));
XXH3_64bits_update(&state, &input.addressModeV, sizeof(wgpu::SamplerDescriptor::addressModeV));
XXH3_64bits_update(&state, &input.addressModeW, sizeof(wgpu::SamplerDescriptor::addressModeW));
XXH3_64bits_update(&state, &input.magFilter, sizeof(wgpu::SamplerDescriptor::magFilter));
XXH3_64bits_update(&state, &input.minFilter, sizeof(wgpu::SamplerDescriptor::minFilter));
XXH3_64bits_update(&state, &input.mipmapFilter, sizeof(wgpu::SamplerDescriptor::mipmapFilter));
XXH3_64bits_update(&state, &input.lodMinClamp, sizeof(wgpu::SamplerDescriptor::lodMinClamp));
XXH3_64bits_update(&state, &input.lodMaxClamp, sizeof(wgpu::SamplerDescriptor::lodMaxClamp));
XXH3_64bits_update(&state, &input.compare, sizeof(wgpu::SamplerDescriptor::compare));
XXH3_64bits_update(&state, &input.maxAnisotropy, sizeof(wgpu::SamplerDescriptor::maxAnisotropy));
} }
} // namespace aurora } // namespace aurora
@ -219,6 +156,7 @@ static PipelineRef g_currentPipeline;
static std::vector<Command> g_commands; static std::vector<Command> g_commands;
static PipelineRef find_pipeline(PipelineCreateCommand command, NewPipelineCallback&& cb) { static PipelineRef find_pipeline(PipelineCreateCommand command, NewPipelineCallback&& cb) {
OPTICK_EVENT();
const auto hash = xxh3_hash(command); const auto hash = xxh3_hash(command);
bool found = false; bool found = false;
{ {
@ -357,6 +295,7 @@ PipelineRef pipeline_ref(model::PipelineConfig config) {
} }
static void pipeline_worker() { static void pipeline_worker() {
OPTICK_THREAD("Pipeline Worker");
bool hasMore = false; bool hasMore = false;
while (true) { while (true) {
std::pair<PipelineRef, NewPipelineCallback> cb; std::pair<PipelineRef, NewPipelineCallback> cb;
@ -387,6 +326,8 @@ static void pipeline_worker() {
} }
void initialize() { void initialize() {
gx::initialize();
g_pipelineThread = std::thread(pipeline_worker); g_pipelineThread = std::thread(pipeline_worker);
const auto createBuffer = [](wgpu::Buffer& out, wgpu::BufferUsage usage, uint64_t size, const char* label) { const auto createBuffer = [](wgpu::Buffer& out, wgpu::BufferUsage usage, uint64_t size, const char* label) {
@ -440,6 +381,7 @@ void shutdown() {
static size_t currentStagingBuffer = 0; static size_t currentStagingBuffer = 0;
static bool bufferMapped = false; static bool bufferMapped = false;
void map_staging_buffer() { void map_staging_buffer() {
OPTICK_EVENT();
bufferMapped = false; bufferMapped = false;
g_stagingBuffers[currentStagingBuffer].MapAsync( g_stagingBuffers[currentStagingBuffer].MapAsync(
wgpu::MapMode::Write, 0, StagingBufferSize, wgpu::MapMode::Write, 0, StagingBufferSize,
@ -456,9 +398,13 @@ void map_staging_buffer() {
} }
void begin_frame() { void begin_frame() {
while (!bufferMapped) { {
g_device.Tick(); OPTICK_EVENT("Wait for buffer mapping");
while (!bufferMapped) {
g_device.Tick();
}
} }
OPTICK_EVENT();
size_t bufferOffset = 0; size_t bufferOffset = 0;
auto& stagingBuf = g_stagingBuffers[currentStagingBuffer]; auto& stagingBuf = g_stagingBuffers[currentStagingBuffer];
const auto mapBuffer = [&](ByteBuffer& buf, uint64_t size) { const auto mapBuffer = [&](ByteBuffer& buf, uint64_t size) {
@ -472,6 +418,7 @@ void begin_frame() {
} }
void end_frame(const wgpu::CommandEncoder& cmd) { void end_frame(const wgpu::CommandEncoder& cmd) {
OPTICK_EVENT();
uint64_t bufferOffset = 0; uint64_t bufferOffset = 0;
const auto writeBuffer = [&](ByteBuffer& buf, wgpu::Buffer& out, uint64_t size, std::string_view label) { const auto writeBuffer = [&](ByteBuffer& buf, wgpu::Buffer& out, uint64_t size, std::string_view label) {
const auto writeSize = buf.size(); // Only need to copy this many bytes const auto writeSize = buf.size(); // Only need to copy this many bytes
@ -491,6 +438,7 @@ void end_frame(const wgpu::CommandEncoder& cmd) {
} }
void render(const wgpu::RenderPassEncoder& pass) { void render(const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
g_currentPipeline = UINT64_MAX; g_currentPipeline = UINT64_MAX;
for (const auto& cmd : g_commands) { for (const auto& cmd : g_commands) {
@ -533,6 +481,7 @@ void render(const wgpu::RenderPassEncoder& pass) {
} }
bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass) { bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (ref == g_currentPipeline) { if (ref == g_currentPipeline) {
return true; return true;
} }
@ -547,6 +496,7 @@ bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass) {
} }
static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length, size_t alignment) { static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length, size_t alignment) {
OPTICK_EVENT();
size_t padding = 0; size_t padding = 0;
if (alignment != 0) { if (alignment != 0) {
padding = alignment - length % alignment; padding = alignment - length % alignment;
@ -565,6 +515,7 @@ static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length,
return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)}; return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)};
} }
static inline Range map(ByteBuffer& target, size_t length, size_t alignment) { static inline Range map(ByteBuffer& target, size_t length, size_t alignment) {
OPTICK_EVENT();
size_t padding = 0; size_t padding = 0;
if (alignment != 0) { if (alignment != 0) {
padding = alignment - length % alignment; padding = alignment - length % alignment;
@ -576,19 +527,28 @@ static inline Range map(ByteBuffer& target, size_t length, size_t alignment) {
target.append_zeroes(length + padding); target.append_zeroes(length + padding);
return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)}; return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)};
} }
Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 0 /* TODO? */); } Range push_verts(const uint8_t* data, size_t length) {
Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 0 /* TODO? */); } OPTICK_EVENT();
return push(g_verts, data, length, 0 /* TODO? */);
}
Range push_indices(const uint8_t* data, size_t length) {
OPTICK_EVENT();
return push(g_indices, data, length, 0 /* TODO? */);
}
Range push_uniform(const uint8_t* data, size_t length) { Range push_uniform(const uint8_t* data, size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits; wgpu::SupportedLimits limits;
g_device.GetLimits(&limits); g_device.GetLimits(&limits);
return push(g_uniforms, data, length, limits.limits.minUniformBufferOffsetAlignment); return push(g_uniforms, data, length, limits.limits.minUniformBufferOffsetAlignment);
} }
Range push_storage(const uint8_t* data, size_t length) { Range push_storage(const uint8_t* data, size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits; wgpu::SupportedLimits limits;
g_device.GetLimits(&limits); g_device.GetLimits(&limits);
return push(g_storage, data, length, limits.limits.minStorageBufferOffsetAlignment); return push(g_storage, data, length, limits.limits.minStorageBufferOffsetAlignment);
} }
Range push_static_storage(const uint8_t* data, size_t length) { Range push_static_storage(const uint8_t* data, size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits; wgpu::SupportedLimits limits;
g_device.GetLimits(&limits); g_device.GetLimits(&limits);
auto range = push(g_staticStorage, data, length, limits.limits.minStorageBufferOffsetAlignment); auto range = push(g_staticStorage, data, length, limits.limits.minStorageBufferOffsetAlignment);
@ -596,20 +556,24 @@ Range push_static_storage(const uint8_t* data, size_t length) {
return range; return range;
} }
std::pair<ByteBuffer, Range> map_verts(size_t length) { std::pair<ByteBuffer, Range> map_verts(size_t length) {
OPTICK_EVENT();
const auto range = map(g_verts, length, 0 /* TODO? */); const auto range = map(g_verts, length, 0 /* TODO? */);
return {ByteBuffer{g_verts.data() + range.offset, range.size}, range}; return {ByteBuffer{g_verts.data() + range.offset, range.size}, range};
} }
std::pair<ByteBuffer, Range> map_indices(size_t length) { std::pair<ByteBuffer, Range> map_indices(size_t length) {
OPTICK_EVENT();
const auto range = map(g_indices, length, 0 /* TODO? */); const auto range = map(g_indices, length, 0 /* TODO? */);
return {ByteBuffer{g_indices.data() + range.offset, range.size}, range}; return {ByteBuffer{g_indices.data() + range.offset, range.size}, range};
} }
std::pair<ByteBuffer, Range> map_uniform(size_t length) { std::pair<ByteBuffer, Range> map_uniform(size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits; wgpu::SupportedLimits limits;
g_device.GetLimits(&limits); g_device.GetLimits(&limits);
const auto range = map(g_uniforms, length, limits.limits.minUniformBufferOffsetAlignment); const auto range = map(g_uniforms, length, limits.limits.minUniformBufferOffsetAlignment);
return {ByteBuffer{g_uniforms.data() + range.offset, range.size}, range}; return {ByteBuffer{g_uniforms.data() + range.offset, range.size}, range};
} }
std::pair<ByteBuffer, Range> map_storage(size_t length) { std::pair<ByteBuffer, Range> map_storage(size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits; wgpu::SupportedLimits limits;
g_device.GetLimits(&limits); g_device.GetLimits(&limits);
const auto range = map(g_storage, length, limits.limits.minStorageBufferOffsetAlignment); const auto range = map(g_storage, length, limits.limits.minStorageBufferOffsetAlignment);
@ -617,6 +581,7 @@ std::pair<ByteBuffer, Range> map_storage(size_t length) {
} }
BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) { BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) {
OPTICK_EVENT();
const auto id = xxh3_hash(descriptor); const auto id = xxh3_hash(descriptor);
if (!g_cachedBindGroups.contains(id)) { if (!g_cachedBindGroups.contains(id)) {
g_cachedBindGroups.try_emplace(id, g_device.CreateBindGroup(&descriptor)); g_cachedBindGroups.try_emplace(id, g_device.CreateBindGroup(&descriptor));
@ -624,6 +589,7 @@ BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) {
return id; return id;
} }
const wgpu::BindGroup& find_bind_group(BindGroupRef id) { const wgpu::BindGroup& find_bind_group(BindGroupRef id) {
OPTICK_EVENT();
const auto it = g_cachedBindGroups.find(id); const auto it = g_cachedBindGroups.find(id);
if (it == g_cachedBindGroups.end()) { if (it == g_cachedBindGroups.end()) {
Log.report(logvisor::Fatal, FMT_STRING("get_bind_group: failed to locate {}"), id); Log.report(logvisor::Fatal, FMT_STRING("get_bind_group: failed to locate {}"), id);
@ -633,6 +599,7 @@ const wgpu::BindGroup& find_bind_group(BindGroupRef id) {
} }
const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) { const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) {
OPTICK_EVENT();
const auto id = xxh3_hash(descriptor); const auto id = xxh3_hash(descriptor);
auto it = g_cachedSamplers.find(id); auto it = g_cachedSamplers.find(id);
if (it == g_cachedSamplers.end()) { if (it == g_cachedSamplers.end()) {
@ -642,6 +609,7 @@ const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) {
} }
uint32_t align_uniform(uint32_t value) { uint32_t align_uniform(uint32_t value) {
OPTICK_EVENT();
wgpu::SupportedLimits limits; wgpu::SupportedLimits limits;
g_device.GetLimits(&limits); // TODO cache g_device.GetLimits(&limits); // TODO cache
const auto uniform_alignment = limits.limits.minUniformBufferOffsetAlignment; const auto uniform_alignment = limits.limits.minUniformBufferOffsetAlignment;

View File

@ -4,20 +4,32 @@
#include <utility> #include <utility>
#include <dawn/webgpu_cpp.h> #include <dawn/webgpu_cpp.h>
#include <xxhash_impl.h> #define XXH_INLINE_ALL
#define XXH_STATIC_LINKING_ONLY
#define XXH_IMPLEMENTATION
#include <xxhash.h>
#include <optick.h>
#ifndef ALIGN #ifndef ALIGN
#define ALIGN(x, a) (((x) + ((a)-1)) & ~((a)-1)) #define ALIGN(x, a) (((x) + ((a)-1)) & ~((a)-1))
#endif #endif
#ifdef __GNUC__
#define PACK(...) __VA_ARGS__ __attribute__((__packed__))
#endif
#ifdef _MSC_VER
#define PACK(...) __pragma(pack(push, 1)); __VA_ARGS__; __pragma(pack(pop))
#endif
namespace aurora { namespace aurora {
template <typename T> template <typename T>
static inline void xxh3_update(XXH3_state_t& state, const T& input); static inline void xxh3_update(XXH3_state_t& state, const T& input);
static inline XXH64_hash_t xxh3_hash(const void* input, size_t len, XXH64_hash_t seed = 0) { static inline XXH64_hash_t xxh3_hash_s(const void* input, size_t len, XXH64_hash_t seed = 0) {
return XXH3_64bits_withSeed(input, len, seed); return XXH3_64bits_withSeed(input, len, seed);
} }
template <typename T> template <typename T>
static inline XXH64_hash_t xxh3_hash(const T& input, XXH64_hash_t seed = 0) { static inline XXH64_hash_t xxh3_hash(const T& input, XXH64_hash_t seed = 0) {
OPTICK_EVENT();
XXH3_state_t state; XXH3_state_t state;
memset(&state, 0, sizeof(XXH3_state_t)); memset(&state, 0, sizeof(XXH3_state_t));
XXH3_64bits_reset_withSeed(&state, seed); XXH3_64bits_reset_withSeed(&state, seed);

View File

@ -335,6 +335,7 @@ static inline wgpu::PrimitiveState to_primitive_state(GX::Primitive gx_prim, GX:
wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info, wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info,
ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader, ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader,
zstring_view label) noexcept { zstring_view label) noexcept {
OPTICK_EVENT();
const auto depthStencil = wgpu::DepthStencilState{ const auto depthStencil = wgpu::DepthStencilState{
.format = g_graphicsConfig.depthFormat, .format = g_graphicsConfig.depthFormat,
.depthWriteEnabled = config.depthUpdate, .depthWriteEnabled = config.depthUpdate,
@ -387,6 +388,7 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primitive, ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primitive,
const BindGroupRanges& ranges) noexcept { const BindGroupRanges& ranges) noexcept {
OPTICK_EVENT();
for (u8 i = 0; i < g_gxState.numTevStages; ++i) { for (u8 i = 0; i < g_gxState.numTevStages; ++i) {
config.shaderConfig.tevStages[i] = g_gxState.tevStages[i]; config.shaderConfig.tevStages[i] = g_gxState.tevStages[i];
} }
@ -396,7 +398,9 @@ ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primit
for (u8 i = 0; i < g_gxState.numTexGens; ++i) { for (u8 i = 0; i < g_gxState.numTexGens; ++i) {
config.shaderConfig.tcgs[i] = g_gxState.tcgs[i]; config.shaderConfig.tcgs[i] = g_gxState.tcgs[i];
} }
config.shaderConfig.alphaDiscard = g_gxState.alphaDiscard; if (g_gxState.alphaDiscard) {
config.shaderConfig.alphaDiscard = g_gxState.alphaDiscard;
}
config.shaderConfig.fogType = g_gxState.fog.type; config.shaderConfig.fogType = g_gxState.fog.type;
config = { config = {
.shaderConfig = config.shaderConfig, .shaderConfig = config.shaderConfig,
@ -414,6 +418,7 @@ ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primit
}; };
// TODO separate shader info from build_shader for async // TODO separate shader info from build_shader for async
{ {
OPTICK_EVENT("Shader info & bind groups");
std::lock_guard lk{g_pipelineMutex}; std::lock_guard lk{g_pipelineMutex};
auto [_, info] = build_shader(config.shaderConfig); auto [_, info] = build_shader(config.shaderConfig);
info.bindGroups = build_bind_groups(info, config.shaderConfig, ranges); // TODO this is hack info.bindGroups = build_bind_groups(info, config.shaderConfig, ranges); // TODO this is hack
@ -542,6 +547,7 @@ static absl::flat_hash_map<u32, std::pair<wgpu::BindGroupLayout, wgpu::BindGroup
GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config, GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config,
const BindGroupRanges& ranges) noexcept { const BindGroupRanges& ranges) noexcept {
OPTICK_EVENT();
const auto layouts = build_bind_group_layouts(info, config); const auto layouts = build_bind_group_layouts(info, config);
u32 textureCount = info.sampledTextures.count(); u32 textureCount = info.sampledTextures.count();
@ -578,6 +584,9 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
}; };
std::array<wgpu::BindGroupEntry, MaxTextures> samplerEntries; std::array<wgpu::BindGroupEntry, MaxTextures> samplerEntries;
std::array<wgpu::BindGroupEntry, MaxTextures> textureEntries; std::array<wgpu::BindGroupEntry, MaxTextures> textureEntries;
{
OPTICK_EVENT("Build texture entries");
for (u32 texIdx = 0, i = 0; texIdx < info.sampledTextures.size(); ++texIdx) { for (u32 texIdx = 0, i = 0; texIdx < info.sampledTextures.size(); ++texIdx) {
if (!info.sampledTextures.test(texIdx)) { if (!info.sampledTextures.test(texIdx)) {
continue; continue;
@ -597,6 +606,7 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
}; };
i++; i++;
} }
}
return { return {
.uniformBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{ .uniformBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{
.label = "GX Uniform Bind Group", .label = "GX Uniform Bind Group",
@ -620,6 +630,7 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
} }
GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept { GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept {
OPTICK_EVENT();
GXBindGroupLayouts out; GXBindGroupLayouts out;
u32 uniformSizeKey = info.uniformSize + (config.denormalizedVertexAttributes ? 0 : 1); u32 uniformSizeKey = info.uniformSize + (config.denormalizedVertexAttributes ? 0 : 1);
const auto uniformIt = sUniformBindGroupLayouts.find(uniformSizeKey); const auto uniformIt = sUniformBindGroupLayouts.find(uniformSizeKey);
@ -729,6 +740,10 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader
return out; return out;
} }
void initialize() noexcept {
memset(&g_gxState, 0, sizeof(GXState));
}
// TODO this is awkward // TODO this is awkward
extern absl::flat_hash_map<ShaderRef, std::pair<wgpu::ShaderModule, gx::ShaderInfo>> g_gxCachedShaders; extern absl::flat_hash_map<ShaderRef, std::pair<wgpu::ShaderModule, gx::ShaderInfo>> g_gxCachedShaders;
void shutdown() noexcept { void shutdown() noexcept {

View File

@ -14,23 +14,23 @@ constexpr u32 MaxTexMtx = 10;
constexpr u32 MaxPTTexMtx = 20; constexpr u32 MaxPTTexMtx = 20;
constexpr u32 MaxTexCoord = GX::MAX_TEXCOORD; constexpr u32 MaxTexCoord = GX::MAX_TEXCOORD;
template <typename Arg, Arg Default> PACK(template <typename Arg, Arg Default>
struct TevPass { struct TevPass {
Arg a = Default; Arg a = Default;
Arg b = Default; Arg b = Default;
Arg c = Default; Arg c = Default;
Arg d = Default; Arg d = Default;
bool operator==(const TevPass&) const = default; bool operator==(const TevPass&) const = default;
}; });
struct TevOp { PACK(struct TevOp {
GX::TevOp op = GX::TevOp::TEV_ADD; GX::TevOp op = GX::TevOp::TEV_ADD;
GX::TevBias bias = GX::TevBias::TB_ZERO; GX::TevBias bias = GX::TevBias::TB_ZERO;
GX::TevScale scale = GX::TevScale::CS_SCALE_1; GX::TevScale scale = GX::TevScale::CS_SCALE_1;
GX::TevRegID outReg = GX::TevRegID::TEVPREV; GX::TevRegID outReg = GX::TevRegID::TEVPREV;
bool clamp = true; bool clamp = true;
bool operator==(const TevOp&) const = default; bool operator==(const TevOp&) const = default;
}; });
struct TevStage { PACK(struct TevStage {
TevPass<GX::TevColorArg, GX::CC_ZERO> colorPass; TevPass<GX::TevColorArg, GX::CC_ZERO> colorPass;
TevPass<GX::TevAlphaArg, GX::CA_ZERO> alphaPass; TevPass<GX::TevAlphaArg, GX::CA_ZERO> alphaPass;
TevOp colorOp; TevOp colorOp;
@ -41,7 +41,7 @@ struct TevStage {
GX::TexMapID texMapId = GX::TEXMAP_NULL; GX::TexMapID texMapId = GX::TEXMAP_NULL;
GX::ChannelID channelId = GX::COLOR_NULL; GX::ChannelID channelId = GX::COLOR_NULL;
bool operator==(const TevStage&) const = default; bool operator==(const TevStage&) const = default;
}; });
struct TextureBind { struct TextureBind {
aurora::gfx::TextureHandle handle; aurora::gfx::TextureHandle handle;
metaforce::EClampMode clampMode; metaforce::EClampMode clampMode;
@ -55,37 +55,37 @@ struct TextureBind {
operator bool() const noexcept { return handle; } operator bool() const noexcept { return handle; }
}; };
// For shader generation // For shader generation
struct ColorChannelConfig { PACK(struct ColorChannelConfig {
GX::ColorSrc matSrc = GX::SRC_REG; GX::ColorSrc matSrc = GX::SRC_REG;
GX::ColorSrc ambSrc = GX::SRC_REG; GX::ColorSrc ambSrc = GX::SRC_REG;
bool lightingEnabled = false; bool lightingEnabled = false;
bool operator==(const ColorChannelConfig&) const = default; bool operator==(const ColorChannelConfig&) const = default;
}; });
// For uniform generation // For uniform generation
struct ColorChannelState { PACK(struct ColorChannelState {
zeus::CColor matColor = zeus::skClear; zeus::CColor matColor = zeus::skClear;
zeus::CColor ambColor = zeus::skClear; zeus::CColor ambColor = zeus::skClear;
GX::LightMask lightState; GX::LightMask lightState;
}; });
using LightVariant = std::variant<std::monostate, Light, zeus::CColor>; using LightVariant = std::variant<std::monostate, Light, zeus::CColor>;
// Mat4x4 used instead of Mat4x3 for padding purposes // Mat4x4 used instead of Mat4x3 for padding purposes
using TexMtxVariant = std::variant<std::monostate, Mat4x2<float>, Mat4x4<float>>; using TexMtxVariant = std::variant<std::monostate, Mat4x2<float>, Mat4x4<float>>;
struct TcgConfig { PACK(struct TcgConfig {
GX::TexGenType type = GX::TG_MTX2x4; GX::TexGenType type = GX::TG_MTX2x4;
GX::TexGenSrc src = GX::MAX_TEXGENSRC; GX::TexGenSrc src = GX::MAX_TEXGENSRC;
GX::TexMtx mtx = GX::IDENTITY; GX::TexMtx mtx = GX::IDENTITY;
GX::PTTexMtx postMtx = GX::PTIDENTITY; GX::PTTexMtx postMtx = GX::PTIDENTITY;
bool normalize = false; bool normalize = false;
bool operator==(const TcgConfig&) const = default; bool operator==(const TcgConfig&) const = default;
}; });
struct FogState { PACK(struct FogState {
GX::FogType type = GX::FOG_NONE; GX::FogType type = GX::FOG_NONE;
float startZ = 0.f; float startZ = 0.f;
float endZ = 0.f; float endZ = 0.f;
float nearZ = 0.f; float nearZ = 0.f;
float farZ = 0.f; float farZ = 0.f;
zeus::CColor color; zeus::CColor color;
}; });
struct GXState { struct GXState {
zeus::CMatrix4f mv; zeus::CMatrix4f mv;
@ -123,10 +123,11 @@ extern GXState g_gxState;
static inline Mat4x4<float> get_combined_matrix() noexcept { return g_gxState.proj * g_gxState.mv; } static inline Mat4x4<float> get_combined_matrix() noexcept { return g_gxState.proj * g_gxState.mv; }
void initialize() noexcept;
void shutdown() noexcept; void shutdown() noexcept;
const TextureBind& get_texture(GX::TexMapID id) noexcept; const TextureBind& get_texture(GX::TexMapID id) noexcept;
struct ShaderConfig { PACK(struct ShaderConfig {
GX::FogType fogType; GX::FogType fogType;
std::array<std::optional<TevStage>, MaxTevStages> tevStages; std::array<std::optional<TevStage>, MaxTevStages> tevStages;
std::array<ColorChannelConfig, MaxColorChannels> colorChannels; std::array<ColorChannelConfig, MaxColorChannels> colorChannels;
@ -135,8 +136,8 @@ struct ShaderConfig {
bool denormalizedVertexAttributes = false; bool denormalizedVertexAttributes = false;
bool denormalizedHasNrm = false; // TODO this is a hack bool denormalizedHasNrm = false; // TODO this is a hack
bool operator==(const ShaderConfig&) const = default; bool operator==(const ShaderConfig&) const = default;
}; });
struct PipelineConfig { PACK(struct PipelineConfig {
ShaderConfig shaderConfig; ShaderConfig shaderConfig;
GX::Primitive primitive; GX::Primitive primitive;
GX::Compare depthFunc; GX::Compare depthFunc;
@ -146,7 +147,7 @@ struct PipelineConfig {
GX::LogicOp blendOp; GX::LogicOp blendOp;
std::optional<float> dstAlpha; std::optional<float> dstAlpha;
bool depthCompare, depthUpdate, alphaUpdate; bool depthCompare, depthUpdate, alphaUpdate;
}; });
struct GXBindGroupLayouts { struct GXBindGroupLayouts {
wgpu::BindGroupLayout uniformLayout; wgpu::BindGroupLayout uniformLayout;
wgpu::BindGroupLayout samplerLayout; wgpu::BindGroupLayout samplerLayout;
@ -202,70 +203,71 @@ struct DlVert {
} // namespace aurora::gfx::gx } // namespace aurora::gfx::gx
namespace aurora { namespace aurora {
template <typename Arg, Arg Default> //template <typename Arg, Arg Default>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevPass<Arg, Default>& input) { //inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevPass<Arg, Default>& input) {
XXH3_64bits_update(&state, &input.a, sizeof(Arg)); // XXH3_64bits_update(&state, &input.a, sizeof(Arg));
XXH3_64bits_update(&state, &input.b, sizeof(Arg)); // XXH3_64bits_update(&state, &input.b, sizeof(Arg));
XXH3_64bits_update(&state, &input.c, sizeof(Arg)); // XXH3_64bits_update(&state, &input.c, sizeof(Arg));
XXH3_64bits_update(&state, &input.d, sizeof(Arg)); // XXH3_64bits_update(&state, &input.d, sizeof(Arg));
} //}
//template <>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevOp& input) {
// XXH3_64bits_update(&state, &input.op, sizeof(gfx::gx::TevOp::op));
// XXH3_64bits_update(&state, &input.bias, sizeof(gfx::gx::TevOp::bias));
// XXH3_64bits_update(&state, &input.scale, sizeof(gfx::gx::TevOp::scale));
// XXH3_64bits_update(&state, &input.outReg, sizeof(gfx::gx::TevOp::outReg));
// XXH3_64bits_update(&state, &input.clamp, sizeof(bool));
//}
//template <>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevStage& input) {
// xxh3_update(state, input.colorPass);
// xxh3_update(state, input.alphaPass);
// xxh3_update(state, input.colorOp);
// xxh3_update(state, input.alphaOp);
// XXH3_64bits_update(&state, &input.kcSel, sizeof(gfx::gx::TevStage::kcSel));
// XXH3_64bits_update(&state, &input.kaSel, sizeof(gfx::gx::TevStage::kaSel));
// XXH3_64bits_update(&state, &input.texCoordId, sizeof(gfx::gx::TevStage::texCoordId));
// XXH3_64bits_update(&state, &input.texMapId, sizeof(gfx::gx::TevStage::texMapId));
// XXH3_64bits_update(&state, &input.channelId, sizeof(gfx::gx::TevStage::channelId));
//}
//template <>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ColorChannelConfig& input) {
// XXH3_64bits_update(&state, &input.lightingEnabled, sizeof(gfx::gx::ColorChannelConfig::lightingEnabled));
// XXH3_64bits_update(&state, &input.matSrc, sizeof(gfx::gx::ColorChannelConfig::matSrc));
// if (input.lightingEnabled) {
// // Unused when lighting is disabled
// XXH3_64bits_update(&state, &input.ambSrc, sizeof(gfx::gx::ColorChannelConfig::ambSrc));
// }
//}
//template <>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TcgConfig& input) {
// XXH3_64bits_update(&state, &input.type, sizeof(gfx::gx::TcgConfig::type));
// XXH3_64bits_update(&state, &input.src, sizeof(gfx::gx::TcgConfig::src));
// XXH3_64bits_update(&state, &input.mtx, sizeof(gfx::gx::TcgConfig::mtx));
// XXH3_64bits_update(&state, &input.postMtx, sizeof(gfx::gx::TcgConfig::postMtx));
// XXH3_64bits_update(&state, &input.normalize, sizeof(gfx::gx::TcgConfig::normalize));
//}
template <> template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevOp& input) { inline XXH64_hash_t xxh3_hash(const gfx::gx::ShaderConfig& input, XXH64_hash_t seed) {
XXH3_64bits_update(&state, &input.op, sizeof(gfx::gx::TevOp::op)); return xxh3_hash_s(&input, sizeof(gfx::gx::ShaderConfig), seed);
XXH3_64bits_update(&state, &input.bias, sizeof(gfx::gx::TevOp::bias)); // for (const auto& item : input.tevStages) {
XXH3_64bits_update(&state, &input.scale, sizeof(gfx::gx::TevOp::scale)); // if (!item) {
XXH3_64bits_update(&state, &input.outReg, sizeof(gfx::gx::TevOp::outReg)); // break;
XXH3_64bits_update(&state, &input.clamp, sizeof(bool)); // }
} // xxh3_update(state, *item);
template <> // }
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevStage& input) { // for (const auto& item : input.colorChannels) {
xxh3_update(state, input.colorPass); // xxh3_update(state, item);
xxh3_update(state, input.alphaPass); // }
xxh3_update(state, input.colorOp); // for (const auto& item : input.tcgs) {
xxh3_update(state, input.alphaOp); // xxh3_update(state, item);
XXH3_64bits_update(&state, &input.kcSel, sizeof(gfx::gx::TevStage::kcSel)); // }
XXH3_64bits_update(&state, &input.kaSel, sizeof(gfx::gx::TevStage::kaSel)); // if (input.alphaDiscard) {
XXH3_64bits_update(&state, &input.texCoordId, sizeof(gfx::gx::TevStage::texCoordId)); // XXH3_64bits_update(&state, &*input.alphaDiscard, sizeof(float));
XXH3_64bits_update(&state, &input.texMapId, sizeof(gfx::gx::TevStage::texMapId)); // }
XXH3_64bits_update(&state, &input.channelId, sizeof(gfx::gx::TevStage::channelId)); // XXH3_64bits_update(&state, &input.denormalizedVertexAttributes,
} // sizeof(gfx::gx::ShaderConfig::denormalizedVertexAttributes));
template <> // XXH3_64bits_update(&state, &input.denormalizedHasNrm, sizeof(gfx::gx::ShaderConfig::denormalizedHasNrm));
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ColorChannelConfig& input) { // XXH3_64bits_update(&state, &input.fogType, sizeof(gfx::gx::ShaderConfig::fogType));
XXH3_64bits_update(&state, &input.lightingEnabled, sizeof(gfx::gx::ColorChannelConfig::lightingEnabled));
XXH3_64bits_update(&state, &input.matSrc, sizeof(gfx::gx::ColorChannelConfig::matSrc));
if (input.lightingEnabled) {
// Unused when lighting is disabled
XXH3_64bits_update(&state, &input.ambSrc, sizeof(gfx::gx::ColorChannelConfig::ambSrc));
}
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TcgConfig& input) {
XXH3_64bits_update(&state, &input.type, sizeof(gfx::gx::TcgConfig::type));
XXH3_64bits_update(&state, &input.src, sizeof(gfx::gx::TcgConfig::src));
XXH3_64bits_update(&state, &input.mtx, sizeof(gfx::gx::TcgConfig::mtx));
XXH3_64bits_update(&state, &input.postMtx, sizeof(gfx::gx::TcgConfig::postMtx));
XXH3_64bits_update(&state, &input.normalize, sizeof(gfx::gx::TcgConfig::normalize));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ShaderConfig& input) {
for (const auto& item : input.tevStages) {
if (!item) {
break;
}
xxh3_update(state, *item);
}
for (const auto& item : input.colorChannels) {
xxh3_update(state, item);
}
for (const auto& item : input.tcgs) {
xxh3_update(state, item);
}
if (input.alphaDiscard) {
XXH3_64bits_update(&state, &*input.alphaDiscard, sizeof(float));
}
XXH3_64bits_update(&state, &input.denormalizedVertexAttributes,
sizeof(gfx::gx::ShaderConfig::denormalizedVertexAttributes));
XXH3_64bits_update(&state, &input.denormalizedHasNrm, sizeof(gfx::gx::ShaderConfig::denormalizedHasNrm));
XXH3_64bits_update(&state, &input.fogType, sizeof(gfx::gx::ShaderConfig::fogType));
} }
} // namespace aurora } // namespace aurora

View File

@ -359,6 +359,7 @@ std::pair<wgpu::ShaderModule, ShaderInfo> build_shader(const ShaderConfig& confi
#endif #endif
return it->second; return it->second;
} }
OPTICK_EVENT();
Log.report(logvisor::Info, FMT_STRING("Shader config (hash {:x}):"), hash); Log.report(logvisor::Info, FMT_STRING("Shader config (hash {:x}):"), hash);
ShaderInfo info{ ShaderInfo info{
@ -636,7 +637,8 @@ var<storage, read> v_packed_uvs: Vec2Block;
} else { } else {
vtxOutAttrs += fmt::format(FMT_STRING("\n @location({}) tex{}_uv: vec2<f32>;"), locIdx, i); vtxOutAttrs += fmt::format(FMT_STRING("\n @location({}) tex{}_uv: vec2<f32>;"), locIdx, i);
if (tcg.src >= GX::TG_TEX0 && tcg.src <= GX::TG_TEX7) { if (tcg.src >= GX::TG_TEX0 && tcg.src <= GX::TG_TEX7) {
vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{} = vec4<f32>({}, 0.0, 1.0);"), i, in_uv(tcg.src - GX::TG_TEX0)); vtxXfrAttrs +=
fmt::format(FMT_STRING("\n var tc{} = vec4<f32>({}, 0.0, 1.0);"), i, in_uv(tcg.src - GX::TG_TEX0));
} else if (tcg.src == GX::TG_POS) { } else if (tcg.src == GX::TG_POS) {
vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{} = vec4<f32>(obj_pos.xyz, 1.0);"), i); vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{} = vec4<f32>(obj_pos.xyz, 1.0);"), i);
} else if (tcg.src == GX::TG_NRM) { } else if (tcg.src == GX::TG_NRM) {
@ -663,7 +665,8 @@ var<storage, read> v_packed_uvs: Vec2Block;
} else { } else {
u32 postMtxIdx = (tcg.postMtx - GX::PTTEXMTX0) / 3; u32 postMtxIdx = (tcg.postMtx - GX::PTTEXMTX0) / 3;
info.usesPTTexMtx.set(postMtxIdx); info.usesPTTexMtx.set(postMtxIdx);
vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{0}_proj = ubuf.postmtx{1} * vec4<f32>(tc{0}_tmp.xyz, 1.0);"), i, postMtxIdx); vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{0}_proj = ubuf.postmtx{1} * vec4<f32>(tc{0}_tmp.xyz, 1.0);"),
i, postMtxIdx);
} }
vtxXfrAttrs += fmt::format(FMT_STRING("\n out.tex{0}_uv = tc{0}_proj.xy;"), i); vtxXfrAttrs += fmt::format(FMT_STRING("\n out.tex{0}_uv = tc{0}_proj.xy;"), i);
fragmentFnPre += fmt::format( fragmentFnPre += fmt::format(
@ -698,7 +701,8 @@ var<storage, read> v_packed_uvs: Vec2Block;
if (config.fogType != GX::FOG_NONE) { if (config.fogType != GX::FOG_NONE) {
info.usesFog = true; info.usesFog = true;
uniformPre += "\n" uniformPre +=
"\n"
"struct Fog {\n" "struct Fog {\n"
" color: vec4<f32>;\n" " color: vec4<f32>;\n"
" a: f32;\n" " a: f32;\n"
@ -729,7 +733,8 @@ var<storage, read> v_packed_uvs: Vec2Block;
break; break;
case GX::FOG_PERSP_REVEXP2: case GX::FOG_PERSP_REVEXP2:
case GX::FOG_ORTHO_REVEXP2: case GX::FOG_ORTHO_REVEXP2:
fragmentFn += "\n fogF = 1.0 - fogF;" fragmentFn +=
"\n fogF = 1.0 - fogF;"
"\n var fogZ = exp2(-8.0 * fogF * fogF);"; "\n var fogZ = exp2(-8.0 * fogF * fogF);";
break; break;
default: default:
@ -796,7 +801,11 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {{
.nextInChain = &wgslDescriptor, .nextInChain = &wgslDescriptor,
.label = label.c_str(), .label = label.c_str(),
}; };
auto shader = gpu::g_device.CreateShaderModule(&shaderDescriptor); wgpu::ShaderModule shader;
{
OPTICK_EVENT("Device CreateShaderModule");
shader = gpu::g_device.CreateShaderModule(&shaderDescriptor);
}
info.uniformSize = align_uniform(info.uniformSize); info.uniformSize = align_uniform(info.uniformSize);
auto pair = std::make_pair(std::move(shader), info); auto pair = std::make_pair(std::move(shader), info);

View File

@ -113,7 +113,8 @@ static inline std::pair<gx::DlVert, size_t> readVert(const u8* data) noexcept {
static absl::flat_hash_map<XXH64_hash_t, std::pair<std::vector<gx::DlVert>, std::vector<u32>>> sCachedDisplayLists; static absl::flat_hash_map<XXH64_hash_t, std::pair<std::vector<gx::DlVert>, std::vector<u32>>> sCachedDisplayLists;
void queue_surface(const u8* dlStart, u32 dlSize) noexcept { void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
const auto hash = xxh3_hash(dlStart, dlSize, 0); OPTICK_EVENT();
const auto hash = xxh3_hash_s(dlStart, dlSize, 0);
Range vertRange, idxRange; Range vertRange, idxRange;
uint32_t numIndices; uint32_t numIndices;
auto it = sCachedDisplayLists.find(hash); auto it = sCachedDisplayLists.find(hash);
@ -123,6 +124,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
vertRange = push_verts(ArrayRef{verts}); vertRange = push_verts(ArrayRef{verts});
idxRange = push_indices(ArrayRef{indices}); idxRange = push_indices(ArrayRef{indices});
} else { } else {
OPTICK_EVENT("Display list translation");
std::vector<gx::DlVert> verts; std::vector<gx::DlVert> verts;
std::vector<u32> indices; std::vector<u32> indices;
@ -181,30 +183,34 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
} }
Range sVtxRange, sNrmRange, sTcRange, sPackedTcRange; Range sVtxRange, sNrmRange, sTcRange, sPackedTcRange;
if (staticVtxRange) { {
sVtxRange = *staticVtxRange; OPTICK_EVENT("Storage push");
} else { if (staticVtxRange) {
sVtxRange = push_storage(reinterpret_cast<const uint8_t*>(vtxData->data()), vtxData->size() * 16); sVtxRange = *staticVtxRange;
} } else {
if (staticNrmRange) { sVtxRange = push_storage(reinterpret_cast<const uint8_t*>(vtxData->data()), vtxData->size() * 16);
sNrmRange = *staticNrmRange; }
} else { if (staticNrmRange) {
sNrmRange = push_storage(reinterpret_cast<const uint8_t*>(nrmData->data()), nrmData->size() * 16); sNrmRange = *staticNrmRange;
} } else {
if (staticTcRange) { sNrmRange = push_storage(reinterpret_cast<const uint8_t*>(nrmData->data()), nrmData->size() * 16);
sTcRange = *staticTcRange; }
} else { if (staticTcRange) {
sTcRange = push_storage(reinterpret_cast<const uint8_t*>(tcData->data()), tcData->size() * 8); sTcRange = *staticTcRange;
} } else {
if (staticPackedTcRange) { sTcRange = push_storage(reinterpret_cast<const uint8_t*>(tcData->data()), tcData->size() * 8);
sPackedTcRange = *staticPackedTcRange; }
} else if (tcData == tex0TcData) { if (staticPackedTcRange) {
sPackedTcRange = sTcRange; sPackedTcRange = *staticPackedTcRange;
} else { } else if (tcData == tex0TcData) {
sPackedTcRange = push_storage(reinterpret_cast<const uint8_t*>(tex0TcData->data()), tex0TcData->size() * 8); sPackedTcRange = sTcRange;
} else {
sPackedTcRange = push_storage(reinterpret_cast<const uint8_t*>(tex0TcData->data()), tex0TcData->size() * 8);
}
} }
model::PipelineConfig config{}; model::PipelineConfig config;
memset(&config, 0, sizeof(model::PipelineConfig));
const gx::BindGroupRanges ranges{ const gx::BindGroupRanges ranges{
.vtxDataRange = sVtxRange, .vtxDataRange = sVtxRange,
.nrmDataRange = sNrmRange, .nrmDataRange = sNrmRange,
@ -238,6 +244,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] Pipeli
} }
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) { if (!bind_pipeline(data.pipeline, pass)) {
return; return;
} }
@ -281,6 +288,7 @@ static inline void cache_array(const void* data, Vec*& outPtr, std::optional<aur
} }
void GXSetArray(GX::Attr attr, const void* data, u8 stride) noexcept { void GXSetArray(GX::Attr attr, const void* data, u8 stride) noexcept {
OPTICK_EVENT();
using namespace aurora::gfx::model; using namespace aurora::gfx::model;
switch (attr) { switch (attr) {
case GX::VA_POS: case GX::VA_POS:

View File

@ -14,7 +14,7 @@ struct DrawData {
gx::GXBindGroups bindGroups; gx::GXBindGroups bindGroups;
}; };
struct PipelineConfig : gx::PipelineConfig {}; PACK(struct PipelineConfig : gx::PipelineConfig {});
struct State {}; struct State {};

View File

@ -233,6 +233,7 @@ DrawData make_draw_data(const State& state, const TextureHandle& tex_y, const Te
} }
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) { if (!bind_pipeline(data.pipeline, pass)) {
return; return;
} }

View File

@ -18,6 +18,7 @@ struct SStreamState {
static std::optional<SStreamState> sStreamState; static std::optional<SStreamState> sStreamState;
void stream_begin(GX::Primitive primitive) noexcept { void stream_begin(GX::Primitive primitive) noexcept {
OPTICK_EVENT();
if (sStreamState) { if (sStreamState) {
Log.report(logvisor::Fatal, FMT_STRING("Stream began twice!")); Log.report(logvisor::Fatal, FMT_STRING("Stream began twice!"));
unreachable(); unreachable();
@ -27,6 +28,7 @@ void stream_begin(GX::Primitive primitive) noexcept {
void stream_vertex(metaforce::EStreamFlags flags, const zeus::CVector3f& pos, const zeus::CVector3f& nrm, void stream_vertex(metaforce::EStreamFlags flags, const zeus::CVector3f& pos, const zeus::CVector3f& nrm,
const zeus::CColor& color, const zeus::CVector2f& uv) noexcept { const zeus::CColor& color, const zeus::CVector2f& uv) noexcept {
OPTICK_EVENT();
if (!sStreamState) { if (!sStreamState) {
Log.report(logvisor::Fatal, FMT_STRING("Stream not started!")); Log.report(logvisor::Fatal, FMT_STRING("Stream not started!"));
unreachable(); unreachable();
@ -53,9 +55,11 @@ void stream_vertex(metaforce::EStreamFlags flags, const zeus::CVector3f& pos, co
} }
void stream_end() noexcept { void stream_end() noexcept {
OPTICK_EVENT();
const auto vertRange = push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size()); const auto vertRange = push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size());
stream::PipelineConfig config{}; stream::PipelineConfig config;
memset(&config, 0, sizeof(stream::PipelineConfig));
config.shaderConfig.denormalizedVertexAttributes = true; config.shaderConfig.denormalizedVertexAttributes = true;
config.shaderConfig.denormalizedHasNrm = sStreamState->flags.IsSet(metaforce::EStreamFlagBits::fHasNormal); config.shaderConfig.denormalizedHasNrm = sStreamState->flags.IsSet(metaforce::EStreamFlagBits::fHasNormal);
const auto info = populate_pipeline_config(config, sStreamState->primitive, {}); const auto info = populate_pipeline_config(config, sStreamState->primitive, {});

View File

@ -65,6 +65,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] Pipeli
State construct_state() { return {}; } State construct_state() { return {}; }
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) { if (!bind_pipeline(data.pipeline, pass)) {
return; return;
} }

View File

@ -12,7 +12,7 @@ struct DrawData {
gx::GXBindGroups bindGroups; gx::GXBindGroups bindGroups;
}; };
struct PipelineConfig : public gx::PipelineConfig {}; PACK(struct PipelineConfig : public gx::PipelineConfig {});
struct State {}; struct State {};

View File

@ -40,6 +40,7 @@ static wgpu::Extent3D physical_size(wgpu::Extent3D size, TextureFormatInfo info)
TextureHandle new_static_texture_2d(uint32_t width, uint32_t height, uint32_t mips, metaforce::ETexelFormat format, TextureHandle new_static_texture_2d(uint32_t width, uint32_t height, uint32_t mips, metaforce::ETexelFormat format,
ArrayRef<uint8_t> data, zstring_view label) noexcept { ArrayRef<uint8_t> data, zstring_view label) noexcept {
OPTICK_EVENT();
auto handle = new_dynamic_texture_2d(width, height, mips, format, label); auto handle = new_dynamic_texture_2d(width, height, mips, format, label);
const TextureRef& ref = *handle.ref; const TextureRef& ref = *handle.ref;
@ -89,6 +90,7 @@ TextureHandle new_static_texture_2d(uint32_t width, uint32_t height, uint32_t mi
TextureHandle new_dynamic_texture_2d(uint32_t width, uint32_t height, uint32_t mips, metaforce::ETexelFormat format, TextureHandle new_dynamic_texture_2d(uint32_t width, uint32_t height, uint32_t mips, metaforce::ETexelFormat format,
zstring_view label) noexcept { zstring_view label) noexcept {
OPTICK_EVENT();
const auto wgpuFormat = to_wgpu(format); const auto wgpuFormat = to_wgpu(format);
const auto size = wgpu::Extent3D{ const auto size = wgpu::Extent3D{
.width = width, .width = width,
@ -120,6 +122,7 @@ TextureHandle new_render_texture(uint32_t width, uint32_t height, uint32_t color
// TODO accept mip/layer parameters // TODO accept mip/layer parameters
void write_texture(const TextureHandle& handle, ArrayRef<uint8_t> data) noexcept { void write_texture(const TextureHandle& handle, ArrayRef<uint8_t> data) noexcept {
OPTICK_EVENT();
const TextureRef& ref = *handle.ref; const TextureRef& ref = *handle.ref;
ByteBuffer buffer; ByteBuffer buffer;

View File

@ -78,6 +78,7 @@ constexpr T bswap16(T val) noexcept {
} }
static ByteBuffer BuildI4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) { static ByteBuffer BuildI4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips); const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -117,6 +118,7 @@ static ByteBuffer BuildI4FromGCN(uint32_t width, uint32_t height, uint32_t mips,
} }
static ByteBuffer BuildI8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) { static ByteBuffer BuildI8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips); const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -158,6 +160,7 @@ static ByteBuffer BuildI8FromGCN(uint32_t width, uint32_t height, uint32_t mips,
} }
ByteBuffer BuildIA4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) { ByteBuffer BuildIA4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips); const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -199,6 +202,7 @@ ByteBuffer BuildIA4FromGCN(uint32_t width, uint32_t height, uint32_t mips, Array
} }
ByteBuffer BuildIA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) { ByteBuffer BuildIA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips); const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -240,6 +244,7 @@ ByteBuffer BuildIA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, Array
} }
ByteBuffer BuildC4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data, RGBA8* palette) { ByteBuffer BuildC4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data, RGBA8* palette) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips); const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -277,6 +282,7 @@ ByteBuffer BuildC4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayR
} }
ByteBuffer BuildC8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data, RGBA8* palette) { ByteBuffer BuildC8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data, RGBA8* palette) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips); const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -314,6 +320,7 @@ ByteBuffer BuildC8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayR
} }
ByteBuffer BuildRGB565FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) { ByteBuffer BuildRGB565FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips); const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -354,6 +361,7 @@ ByteBuffer BuildRGB565FromGCN(uint32_t width, uint32_t height, uint32_t mips, Ar
} }
ByteBuffer BuildRGB5A3FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) { ByteBuffer BuildRGB5A3FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
size_t texelCount = ComputeMippedTexelCount(width, height, mips); size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -401,6 +409,7 @@ ByteBuffer BuildRGB5A3FromGCN(uint32_t width, uint32_t height, uint32_t mips, Ar
} }
ByteBuffer BuildRGBA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) { ByteBuffer BuildRGBA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips); const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount}; ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -445,6 +454,7 @@ ByteBuffer BuildRGBA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, Arr
} }
ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) { ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t blockCount = ComputeMippedBlockCountDXT1(width, height, mips); const size_t blockCount = ComputeMippedBlockCountDXT1(width, height, mips);
ByteBuffer buf{sizeof(DXT1Block) * blockCount}; ByteBuffer buf{sizeof(DXT1Block) * blockCount};
@ -493,6 +503,7 @@ ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mips, Arra
ByteBuffer convert_texture(metaforce::ETexelFormat format, uint32_t width, uint32_t height, uint32_t mips, ByteBuffer convert_texture(metaforce::ETexelFormat format, uint32_t width, uint32_t height, uint32_t mips,
ArrayRef<uint8_t> data) { ArrayRef<uint8_t> data) {
OPTICK_EVENT();
switch (format) { switch (format) {
case metaforce::ETexelFormat::RGBA8PC: case metaforce::ETexelFormat::RGBA8PC:
case metaforce::ETexelFormat::R8PC: case metaforce::ETexelFormat::R8PC:

View File

@ -376,6 +376,7 @@ DrawData make_draw_data_verts(const State& state, CameraFilterType filter_type,
} }
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) { void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) { if (!bind_pipeline(data.pipeline, pass)) {
return; return;
} }

View File

@ -182,8 +182,8 @@ void initialize(SDL_Window* window) {
} }
g_queue = g_device.GetQueue(); g_queue = g_device.GetQueue();
g_BackendBinding = g_BackendBinding = std::unique_ptr<utils::BackendBinding>(
std::unique_ptr<utils::BackendBinding>(utils::CreateBinding(g_backendType, window, g_device.Get())); utils::CreateBinding(g_backendType, window, g_Adapter.Get(), g_device.Get()));
if (!g_BackendBinding) { if (!g_BackendBinding) {
Log.report(logvisor::Fatal, FMT_STRING("Unsupported backend {}"), backendName); Log.report(logvisor::Fatal, FMT_STRING("Unsupported backend {}"), backendName);
unreachable(); unreachable();
@ -237,4 +237,16 @@ void resize_swapchain(uint32_t width, uint32_t height) {
g_frameBufferResolved = create_render_texture(false); g_frameBufferResolved = create_render_texture(false);
g_depthBuffer = create_depth_texture(); g_depthBuffer = create_depth_texture();
} }
#if USE_OPTICK
void* get_native_swapchain() { return g_BackendBinding->GetNativeSwapChain(); }
Optick::GPUContext begin_cmdlist() {
return g_BackendBinding->OptickSetGpuContext();
}
void end_cmdlist(Optick::GPUContext ctx) {
Optick::SetGpuContext(ctx);
}
#endif
} // namespace aurora::gpu } // namespace aurora::gpu

View File

@ -3,6 +3,7 @@
#include <array> #include <array>
#include <cstdint> #include <cstdint>
#include <dawn/webgpu_cpp.h> #include <dawn/webgpu_cpp.h>
#include <optick.h>
#ifdef __GNUC__ #ifdef __GNUC__
[[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); } [[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); }
@ -52,6 +53,12 @@ extern TextureWithSampler g_depthBuffer;
void initialize(SDL_Window* window); void initialize(SDL_Window* window);
void shutdown(); void shutdown();
void resize_swapchain(uint32_t width, uint32_t height); void resize_swapchain(uint32_t width, uint32_t height);
#if USE_OPTICK
void* get_native_swapchain();
Optick::GPUContext begin_cmdlist();
void end_cmdlist(Optick::GPUContext ctx);
#endif
} // namespace aurora::gpu } // namespace aurora::gpu
namespace aurora::gpu::utils { namespace aurora::gpu::utils {

View File

@ -75,6 +75,7 @@ void new_frame(const WindowSize& size) noexcept {
} }
void render(const wgpu::RenderPassEncoder& pass) noexcept { void render(const wgpu::RenderPassEncoder& pass) noexcept {
OPTICK_EVENT();
ImGui::Render(); ImGui::Render();
auto* data = ImGui::GetDrawData(); auto* data = ImGui::GetDrawData();

View File

@ -106,17 +106,20 @@ if (DAWN_ENABLE_VULKAN)
endif () endif ()
if (MSVC) if (MSVC)
target_compile_options(dawn_native PRIVATE /bigobj) target_compile_options(dawn_native PRIVATE /bigobj)
else() else ()
target_compile_options(SPIRV-Tools-static PRIVATE -Wno-implicit-fallthrough) target_compile_options(SPIRV-Tools-static PRIVATE -Wno-implicit-fallthrough)
target_compile_options(SPIRV-Tools-opt PRIVATE -Wno-implicit-fallthrough) target_compile_options(SPIRV-Tools-opt PRIVATE -Wno-implicit-fallthrough)
endif () endif ()
option(OPTICK_ENABLED "Enable profiling with Optick" OFF) option(OPTICK_ENABLED "Enable profiling with Optick" OFF)
set(OPTICK_USE_VULKAN ${DAWN_ENABLE_VULKAN} CACHE BOOL "Built-in support for Vulkan" FORCE) set(OPTICK_USE_VULKAN ${DAWN_ENABLE_VULKAN} CACHE BOOL "Built-in support for Vulkan" FORCE)
set(OPTICK_USE_D3D12 ${DAWN_ENABLE_D3D12} CACHE BOOL "Built-in support for Vulkan" FORCE)
set(OPTICK_INSTALL_TARGETS OFF CACHE BOOL "Should optick be installed? Set to OFF if you use add_subdirectory to include Optick." FORCE) set(OPTICK_INSTALL_TARGETS OFF CACHE BOOL "Should optick be installed? Set to OFF if you use add_subdirectory to include Optick." FORCE)
add_subdirectory(optick) add_subdirectory(optick)
if (NOT MSVC) if (NOT MSVC)
target_compile_options(OptickCore PRIVATE -Wno-implicit-fallthrough) target_compile_options(OptickCore PRIVATE -Wno-implicit-fallthrough)
elseif (OPTICK_USE_D3D12)
target_link_libraries(OptickCore PRIVATE dxgi)
endif () endif ()
add_subdirectory(libjpeg-turbo EXCLUDE_FROM_ALL) add_subdirectory(libjpeg-turbo EXCLUDE_FROM_ALL)

View File

@ -8,7 +8,6 @@ for sub in "extern/amuse" \
"extern/jbus" \ "extern/jbus" \
"extern/kabufuda" \ "extern/kabufuda" \
"extern/nod" \ "extern/nod" \
"extern/xxhash" \
"extern/zeus"; do "extern/zeus"; do
if [ -d $sub ]; then if [ -d $sub ]; then
pushd $sub > /dev/null pushd $sub > /dev/null