aurora: Integrate Optick & hashing performance improvements

This commit is contained in:
Luke Street 2022-03-16 19:23:52 -04:00
parent b4e242b88d
commit 32c2ff5498
29 changed files with 402 additions and 242 deletions

View File

@ -351,8 +351,6 @@ public:
}
}
OPTICK_FRAME("MainThread");
// Check if fullscreen has been toggled, if so set the fullscreen cvar accordingly
if (m_fullscreenToggleRequested) {
m_cvarCommons.m_fullscreen->fromBoolean(!m_cvarCommons.getFullscreen());

View File

@ -19,8 +19,12 @@ add_library(aurora STATIC
target_compile_definitions(aurora PRIVATE IMGUI_USER_CONFIG="imconfig_user.h") # IMGUI_USE_WCHAR32
target_include_directories(aurora PUBLIC include ../)
target_include_directories(aurora PRIVATE ../imgui ../extern/imgui)
target_include_directories(aurora PRIVATE
../extern/dawn/src
../extern/dawn/third_party/abseil-cpp
${CMAKE_BINARY_DIR}/extern/dawn/gen/src) # for hacks :)
target_link_libraries(aurora PRIVATE dawn_native dawncpp webgpu_dawn zeus logvisor SDL2-static xxhash
absl::btree absl::flat_hash_map)
absl::btree absl::flat_hash_map OptickCore)
if (APPLE)
target_compile_definitions(aurora PRIVATE DAWN_ENABLE_BACKEND_METAL)
target_sources(aurora PRIVATE lib/dawn/MetalBinding.mm)

View File

@ -280,12 +280,18 @@ void app_run(std::unique_ptr<AppDelegate> app, Icon icon, int argc, char** argv)
g_AppDelegate->onAppWindowResized(size);
while (poll_events()) {
OPTICK_FRAME("MainThread");
imgui::new_frame(g_windowSize);
if (!g_AppDelegate->onAppIdle(ImGui::GetIO().DeltaTime)) {
break;
}
const wgpu::TextureView view = g_swapChain.GetCurrentTextureView();
wgpu::TextureView view;
{
OPTICK_EVENT("SwapChain GetCurrentTextureView");
view = g_swapChain.GetCurrentTextureView();
}
gfx::begin_frame();
g_AppDelegate->onAppDraw();
@ -293,8 +299,12 @@ void app_run(std::unique_ptr<AppDelegate> app, Icon icon, int argc, char** argv)
.label = "Redraw encoder",
};
auto encoder = g_device.CreateCommandEncoder(&encoderDescriptor);
#if USE_OPTICK
auto prevContext = gpu::begin_cmdlist();
#endif
gfx::end_frame(encoder);
{
OPTICK_EVENT("Main Render Pass");
const std::array attachments{
wgpu::RenderPassColorAttachment{
.view = view,
@ -329,6 +339,7 @@ void app_run(std::unique_ptr<AppDelegate> app, Icon icon, int argc, char** argv)
pass.End();
}
{
OPTICK_EVENT("ImGui Render Pass");
const std::array attachments{
wgpu::RenderPassColorAttachment{
.view = view,
@ -345,9 +356,19 @@ void app_run(std::unique_ptr<AppDelegate> app, Icon icon, int argc, char** argv)
imgui::render(pass);
pass.End();
}
const auto buffer = encoder.Finish();
g_queue.Submit(1, &buffer);
g_swapChain.Present();
#if USE_OPTICK
gpu::end_cmdlist(prevContext);
#endif
{
OPTICK_EVENT("Queue Submit");
const auto buffer = encoder.Finish();
g_queue.Submit(1, &buffer);
}
{
OPTICK_GPU_FLIP(gpu::get_native_swapchain());
OPTICK_CATEGORY("Present", Optick::Category::Wait);
g_swapChain.Present();
}
g_AppDelegate->onAppPostDraw();

View File

@ -8,22 +8,23 @@
namespace aurora::gpu::utils {
#if defined(DAWN_ENABLE_BACKEND_D3D12)
BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUDevice device);
BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif
#if defined(DAWN_ENABLE_BACKEND_METAL)
BackendBinding* CreateMetalBinding(SDL_Window* window, WGPUDevice device);
BackendBinding* CreateMetalBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif
#if defined(DAWN_ENABLE_BACKEND_NULL)
BackendBinding* CreateNullBinding(SDL_Window* window, WGPUDevice device);
BackendBinding* CreateNullBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif
#if defined(DAWN_ENABLE_BACKEND_OPENGL)
BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUDevice device);
BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif
#if defined(DAWN_ENABLE_BACKEND_VULKAN)
BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUDevice device);
BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
#endif
BackendBinding::BackendBinding(SDL_Window* window, WGPUDevice device) : m_window(window), m_device(device) {}
BackendBinding::BackendBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device)
: m_window(window), m_adapter(adapter), m_device(device) {}
void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu::BackendType type) {
if (type == wgpu::BackendType::OpenGL || type == wgpu::BackendType::OpenGLES) {
@ -45,31 +46,31 @@ void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu:
}
}
BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUDevice device) {
BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) {
switch (type) {
#if defined(DAWN_ENABLE_BACKEND_D3D12)
case wgpu::BackendType::D3D12:
return CreateD3D12Binding(window, device);
return CreateD3D12Binding(window, adapter, device);
#endif
#if defined(DAWN_ENABLE_BACKEND_METAL)
case wgpu::BackendType::Metal:
return CreateMetalBinding(window, device);
return CreateMetalBinding(window, adapter, device);
#endif
#if defined(DAWN_ENABLE_BACKEND_NULL)
case wgpu::BackendType::Null:
return CreateNullBinding(window, device);
return CreateNullBinding(window, adapter, device);
#endif
#if defined(DAWN_ENABLE_BACKEND_DESKTOP_GL)
case wgpu::BackendType::OpenGL:
return CreateOpenGLBinding(window, device);
return CreateOpenGLBinding(window, adapter, device);
#endif
#if defined(DAWN_ENABLE_BACKEND_OPENGLES)
case wgpu::BackendType::OpenGLES:
return CreateOpenGLBinding(window, device);
return CreateOpenGLBinding(window, adapter, device);
#endif
#if defined(DAWN_ENABLE_BACKEND_VULKAN)
case wgpu::BackendType::Vulkan:
return CreateVulkanBinding(window, device);
return CreateVulkanBinding(window, adapter, device);
#endif
default:
return nullptr;

View File

@ -2,6 +2,7 @@
#include <dawn/native/DawnNative.h>
#include <dawn/webgpu_cpp.h>
#include <optick.h>
struct SDL_Window;
@ -13,15 +14,20 @@ public:
virtual uint64_t GetSwapChainImplementation() = 0;
virtual WGPUTextureFormat GetPreferredSwapChainTextureFormat() = 0;
#if USE_OPTICK
virtual void* GetNativeSwapChain() { return nullptr; };
virtual Optick::GPUContext OptickSetGpuContext() { return {}; };
#endif
protected:
BackendBinding(SDL_Window* window, WGPUDevice device);
BackendBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
SDL_Window* m_window = nullptr;
WGPUAdapter m_adapter = nullptr;
WGPUDevice m_device = nullptr;
};
void DiscoverAdapter(dawn::native::Instance* instance, SDL_Window* window, wgpu::BackendType type);
BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUDevice device);
BackendBinding* CreateBinding(wgpu::BackendType type, SDL_Window* window, WGPUAdapter adapter, WGPUDevice device);
} // namespace aurora::gpu::utils

View File

@ -2,11 +2,21 @@
#include <SDL_syswm.h>
#include <dawn/native/D3D12Backend.h>
#include <optick.h>
#if USE_OPTICK
// Internal headers
#include <dawn/native/d3d12/CommandBufferD3D12.h>
#include <dawn/native/d3d12/DeviceD3D12.h>
#define private public
#include <dawn/native/d3d12/NativeSwapChainImplD3D12.h>
#undef private
#endif
namespace aurora::gpu::utils {
class D3D12Binding : public BackendBinding {
public:
D3D12Binding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {}
D3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {}
uint64_t GetSwapChainImplementation() override {
if (m_swapChainImpl.userData == nullptr) {
@ -22,6 +32,19 @@ public:
return dawn::native::d3d12::GetNativeSwapChainPreferredFormat(&m_swapChainImpl);
}
#if USE_OPTICK
void* GetNativeSwapChain() override {
auto* impl = static_cast<dawn::native::d3d12::NativeSwapChainImpl*>(m_swapChainImpl.userData);
return impl->mSwapChain.Get();
}
Optick::GPUContext OptickSetGpuContext() override {
auto* device = dawn::native::d3d12::ToBackend(static_cast<dawn::native::DeviceBase*>(static_cast<void*>(m_device)));
auto* commandList = device->GetPendingCommandContext().AcquireSuccess()->GetCommandList();
return Optick::SetGpuContext({commandList});
}
#endif
private:
DawnSwapChainImplementation m_swapChainImpl{};
@ -30,8 +53,16 @@ private:
SDL_VERSION(&wmInfo.version);
SDL_GetWindowWMInfo(m_window, &wmInfo);
m_swapChainImpl = dawn::native::d3d12::CreateNativeSwapChainImpl(m_device, wmInfo.info.win.window);
#if USE_OPTICK
auto* device = dawn::native::d3d12::ToBackend(static_cast<dawn::native::DeviceBase*>(static_cast<void*>(m_device)));
auto* d3d12Device = device->GetD3D12Device();
auto* d3d12CommandQueue = device->GetCommandQueue().Get();
OPTICK_GPU_INIT_D3D12(d3d12Device, &d3d12CommandQueue, 1);
#endif
}
};
BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUDevice device) { return new D3D12Binding(window, device); }
BackendBinding* CreateD3D12Binding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) {
return new D3D12Binding(window, adapter, device);
}
} // namespace aurora::gpu::utils

View File

@ -89,7 +89,7 @@ private:
class MetalBinding : public BackendBinding {
public:
MetalBinding(SDL_Window *window, WGPUDevice device) : BackendBinding(window, device) {}
MetalBinding(SDL_Window *window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {}
uint64_t GetSwapChainImplementation() override {
if (m_swapChainImpl.userData == nullptr) {
@ -104,5 +104,7 @@ private:
DawnSwapChainImplementation m_swapChainImpl{};
};
BackendBinding *CreateMetalBinding(SDL_Window *window, WGPUDevice device) { return new MetalBinding(window, device); }
BackendBinding *CreateMetalBinding(SDL_Window *window, WGPUAdapter adapter, WGPUDevice device) {
return new MetalBinding(window, adapter, device);
}
} // namespace aurora::gpu::utils

View File

@ -6,7 +6,7 @@
namespace aurora::gpu::utils {
class OpenGLBinding : public BackendBinding {
public:
OpenGLBinding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {}
OpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {}
uint64_t GetSwapChainImplementation() override {
if (m_swapChainImpl.userData == nullptr) {
@ -31,5 +31,7 @@ private:
}
};
BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUDevice device) { return new OpenGLBinding(window, device); }
BackendBinding* CreateOpenGLBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) {
return new OpenGLBinding(window, adapter, device);
}
} // namespace aurora::gpu::utils

View File

@ -1,13 +1,24 @@
#include "BackendBinding.hpp"
#include <SDL_vulkan.h>
#include <cassert>
#include <optick.h>
#if USE_OPTICK
// Internal headers
#include <dawn/native/vulkan/AdapterVk.h>
#include <dawn/native/vulkan/DeviceVk.h>
#define private public
#include <dawn/native/vulkan/NativeSwapChainImplVk.h>
#undef private
#endif
#include <SDL_vulkan.h>
#include <dawn/native/VulkanBackend.h>
namespace aurora::gpu::utils {
class VulkanBinding : public BackendBinding {
public:
VulkanBinding(SDL_Window* window, WGPUDevice device) : BackendBinding(window, device) {}
VulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) : BackendBinding(window, adapter, device) {}
uint64_t GetSwapChainImplementation() override {
if (m_swapChainImpl.userData == nullptr) {
@ -23,6 +34,19 @@ public:
return dawn::native::vulkan::GetNativeSwapChainPreferredFormat(&m_swapChainImpl);
}
#if USE_OPTICK
void* GetNativeSwapChain() override {
auto* impl = static_cast<dawn::native::vulkan::NativeSwapChainImpl*>(m_swapChainImpl.userData);
return impl->mSwapChain;
}
Optick::GPUContext OptickSetGpuContext() override {
auto* device =
dawn::native::vulkan::ToBackend(static_cast<dawn::native::DeviceBase*>(static_cast<void*>(m_device)));
return Optick::SetGpuContext({device->GetPendingRecordingContext()->commandBuffer});
}
#endif
private:
DawnSwapChainImplementation m_swapChainImpl{};
@ -32,8 +56,21 @@ private:
assert(false);
}
m_swapChainImpl = dawn::native::vulkan::CreateNativeSwapChainImpl(m_device, surface);
#if USE_OPTICK
auto* adapter =
dawn::native::vulkan::ToBackend(static_cast<dawn::native::AdapterBase*>(static_cast<void*>(m_adapter)));
auto* device =
dawn::native::vulkan::ToBackend(static_cast<dawn::native::DeviceBase*>(static_cast<void*>(m_device)));
VkDevice vkDevice = device->GetVkDevice();
VkPhysicalDevice vkPhysicalDevice = adapter->GetPhysicalDevice();
VkQueue vkQueue = device->GetQueue();
uint32_t queueFamily = device->GetGraphicsQueueFamily();
OPTICK_GPU_INIT_VULKAN(&vkDevice, &vkPhysicalDevice, &vkQueue, &queueFamily, 1, nullptr);
#endif
}
};
BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUDevice device) { return new VulkanBinding(window, device); }
BackendBinding* CreateVulkanBinding(SDL_Window* window, WGPUAdapter adapter, WGPUDevice device) {
return new VulkanBinding(window, adapter, device);
}
} // namespace aurora::gpu::utils

View File

@ -298,6 +298,7 @@ DrawData make_draw_data_verts(const State& state, CameraFilterType filter_type,
}
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) {
return;
}

View File

@ -84,107 +84,44 @@ struct Command {
namespace aurora {
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::colored_quad::PipelineConfig& input) {
XXH3_64bits_update(&state, &input.filterType, sizeof(gfx::colored_quad::PipelineConfig::filterType));
XXH3_64bits_update(&state, &input.zComparison, sizeof(gfx::colored_quad::PipelineConfig::zComparison));
XXH3_64bits_update(&state, &input.zTest, sizeof(gfx::colored_quad::PipelineConfig::zTest));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::textured_quad::PipelineConfig& input) {
XXH3_64bits_update(&state, &input.filterType, sizeof(gfx::textured_quad::PipelineConfig::filterType));
XXH3_64bits_update(&state, &input.zComparison, sizeof(gfx::textured_quad::PipelineConfig::zComparison));
XXH3_64bits_update(&state, &input.zTest, sizeof(gfx::textured_quad::PipelineConfig::zTest));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::movie_player::PipelineConfig& input) {
// no-op
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::PipelineConfig& input) {
xxh3_update(state, input.shaderConfig);
XXH3_64bits_update(&state, &input.primitive, sizeof(gfx::gx::PipelineConfig::primitive));
XXH3_64bits_update(&state, &input.depthFunc, sizeof(gfx::gx::PipelineConfig::depthFunc));
XXH3_64bits_update(&state, &input.cullMode, sizeof(gfx::gx::PipelineConfig::cullMode));
XXH3_64bits_update(&state, &input.blendMode, sizeof(gfx::gx::PipelineConfig::blendMode));
XXH3_64bits_update(&state, &input.blendFacSrc, sizeof(gfx::gx::PipelineConfig::blendFacSrc));
XXH3_64bits_update(&state, &input.blendFacDst, sizeof(gfx::gx::PipelineConfig::blendFacDst));
XXH3_64bits_update(&state, &input.blendOp, sizeof(gfx::gx::PipelineConfig::blendOp));
if (input.dstAlpha) {
XXH3_64bits_update(&state, &*input.dstAlpha, sizeof(float));
}
XXH3_64bits_update(&state, &input.depthCompare, sizeof(gfx::gx::PipelineConfig::depthCompare));
XXH3_64bits_update(&state, &input.depthUpdate, sizeof(gfx::gx::PipelineConfig::depthUpdate));
XXH3_64bits_update(&state, &input.alphaUpdate, sizeof(gfx::gx::PipelineConfig::alphaUpdate));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::stream::PipelineConfig& input) {
xxh3_update<gfx::gx::PipelineConfig>(state, input);
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::model::PipelineConfig& input) {
xxh3_update<gfx::gx::PipelineConfig>(state, input);
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::PipelineCreateCommand& input) {
XXH3_64bits_update(&state, &input.type, sizeof(gfx::PipelineCreateCommand::type));
inline XXH64_hash_t xxh3_hash(const gfx::PipelineCreateCommand& input, XXH64_hash_t seed) {
constexpr auto typeSize = sizeof(gfx::PipelineCreateCommand::type);
switch (input.type) {
case gfx::ShaderType::Aabb:
// TODO
break;
case gfx::ShaderType::ColoredQuad:
xxh3_update(state, input.coloredQuad);
break;
return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::coloredQuad), seed);
case gfx::ShaderType::TexturedQuad:
xxh3_update(state, input.texturedQuad);
break;
return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::texturedQuad), seed);
case gfx::ShaderType::MoviePlayer:
xxh3_update(state, input.moviePlayer);
break;
return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::moviePlayer), seed);
case gfx::ShaderType::Stream:
xxh3_update(state, input.stream);
break;
return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::stream), seed);
case gfx::ShaderType::Model:
xxh3_update(state, input.model);
break;
return xxh3_hash_s(&input, typeSize + sizeof(gfx::PipelineCreateCommand::model), seed);
}
return 0;
}
template <>
inline void xxh3_update(XXH3_state_t& state, const wgpu::BindGroupEntry& input) {
XXH3_64bits_update(&state, &input.binding, sizeof(wgpu::BindGroupEntry::binding));
XXH3_64bits_update(&state, &input.buffer, sizeof(wgpu::BindGroupEntry::buffer));
XXH3_64bits_update(&state, &input.offset, sizeof(wgpu::BindGroupEntry::offset));
if (input.buffer != nullptr) {
XXH3_64bits_update(&state, &input.size, sizeof(wgpu::BindGroupEntry::size));
}
XXH3_64bits_update(&state, &input.sampler, sizeof(wgpu::BindGroupEntry::sampler));
XXH3_64bits_update(&state, &input.textureView, sizeof(wgpu::BindGroupEntry::textureView));
inline XXH64_hash_t xxh3_hash(const wgpu::BindGroupDescriptor& input, XXH64_hash_t seed) {
constexpr auto offset = sizeof(void*) * 2; // skip nextInChain, label
const auto hash = xxh3_hash_s(reinterpret_cast<const u8*>(&input) + offset,
sizeof(wgpu::BindGroupDescriptor) - offset - sizeof(void*) /* skip entries */, seed);
// // TODO ensure size is zeroed elsewhere
// for (int i = 0; i < input.entryCount; ++i) {
// const wgpu::BindGroupEntry& entry = input.entries[i];
// if (!entry.buffer && entry.size != 0) {
// gfx::Log.report(logvisor::Fatal, FMT_STRING("Size != 0! {}"), entry.size);
// }
// }
return xxh3_hash_s(input.entries, sizeof(wgpu::BindGroupEntry) * input.entryCount, hash);
}
template <>
inline void xxh3_update(XXH3_state_t& state, const wgpu::BindGroupDescriptor& input) {
if (input.label != nullptr) {
XXH3_64bits_update(&state, input.label, strlen(input.label));
}
XXH3_64bits_update(&state, &input.layout, sizeof(wgpu::BindGroupDescriptor::layout));
XXH3_64bits_update(&state, &input.entryCount, sizeof(wgpu::BindGroupDescriptor::entryCount));
for (int i = 0; i < input.entryCount; ++i) {
xxh3_update(state, input.entries[i]);
}
}
template <>
inline void xxh3_update(XXH3_state_t& state, const wgpu::SamplerDescriptor& input) {
if (input.label != nullptr) {
XXH3_64bits_update(&state, input.label, strlen(input.label));
}
XXH3_64bits_update(&state, &input.addressModeU, sizeof(wgpu::SamplerDescriptor::addressModeU));
XXH3_64bits_update(&state, &input.addressModeV, sizeof(wgpu::SamplerDescriptor::addressModeV));
XXH3_64bits_update(&state, &input.addressModeW, sizeof(wgpu::SamplerDescriptor::addressModeW));
XXH3_64bits_update(&state, &input.magFilter, sizeof(wgpu::SamplerDescriptor::magFilter));
XXH3_64bits_update(&state, &input.minFilter, sizeof(wgpu::SamplerDescriptor::minFilter));
XXH3_64bits_update(&state, &input.mipmapFilter, sizeof(wgpu::SamplerDescriptor::mipmapFilter));
XXH3_64bits_update(&state, &input.lodMinClamp, sizeof(wgpu::SamplerDescriptor::lodMinClamp));
XXH3_64bits_update(&state, &input.lodMaxClamp, sizeof(wgpu::SamplerDescriptor::lodMaxClamp));
XXH3_64bits_update(&state, &input.compare, sizeof(wgpu::SamplerDescriptor::compare));
XXH3_64bits_update(&state, &input.maxAnisotropy, sizeof(wgpu::SamplerDescriptor::maxAnisotropy));
inline XXH64_hash_t xxh3_hash(const wgpu::SamplerDescriptor& input, XXH64_hash_t seed) {
constexpr auto offset = sizeof(void*) * 2; // skip nextInChain, label
return xxh3_hash_s(reinterpret_cast<const u8*>(&input) + offset,
sizeof(wgpu::SamplerDescriptor) - offset - 2 /* skip padding */, seed);
}
} // namespace aurora
@ -219,6 +156,7 @@ static PipelineRef g_currentPipeline;
static std::vector<Command> g_commands;
static PipelineRef find_pipeline(PipelineCreateCommand command, NewPipelineCallback&& cb) {
OPTICK_EVENT();
const auto hash = xxh3_hash(command);
bool found = false;
{
@ -357,6 +295,7 @@ PipelineRef pipeline_ref(model::PipelineConfig config) {
}
static void pipeline_worker() {
OPTICK_THREAD("Pipeline Worker");
bool hasMore = false;
while (true) {
std::pair<PipelineRef, NewPipelineCallback> cb;
@ -387,6 +326,8 @@ static void pipeline_worker() {
}
void initialize() {
gx::initialize();
g_pipelineThread = std::thread(pipeline_worker);
const auto createBuffer = [](wgpu::Buffer& out, wgpu::BufferUsage usage, uint64_t size, const char* label) {
@ -440,6 +381,7 @@ void shutdown() {
static size_t currentStagingBuffer = 0;
static bool bufferMapped = false;
void map_staging_buffer() {
OPTICK_EVENT();
bufferMapped = false;
g_stagingBuffers[currentStagingBuffer].MapAsync(
wgpu::MapMode::Write, 0, StagingBufferSize,
@ -456,9 +398,13 @@ void map_staging_buffer() {
}
void begin_frame() {
while (!bufferMapped) {
g_device.Tick();
{
OPTICK_EVENT("Wait for buffer mapping");
while (!bufferMapped) {
g_device.Tick();
}
}
OPTICK_EVENT();
size_t bufferOffset = 0;
auto& stagingBuf = g_stagingBuffers[currentStagingBuffer];
const auto mapBuffer = [&](ByteBuffer& buf, uint64_t size) {
@ -472,6 +418,7 @@ void begin_frame() {
}
void end_frame(const wgpu::CommandEncoder& cmd) {
OPTICK_EVENT();
uint64_t bufferOffset = 0;
const auto writeBuffer = [&](ByteBuffer& buf, wgpu::Buffer& out, uint64_t size, std::string_view label) {
const auto writeSize = buf.size(); // Only need to copy this many bytes
@ -491,6 +438,7 @@ void end_frame(const wgpu::CommandEncoder& cmd) {
}
void render(const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
g_currentPipeline = UINT64_MAX;
for (const auto& cmd : g_commands) {
@ -533,6 +481,7 @@ void render(const wgpu::RenderPassEncoder& pass) {
}
bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (ref == g_currentPipeline) {
return true;
}
@ -547,6 +496,7 @@ bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass) {
}
static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length, size_t alignment) {
OPTICK_EVENT();
size_t padding = 0;
if (alignment != 0) {
padding = alignment - length % alignment;
@ -565,6 +515,7 @@ static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length,
return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)};
}
static inline Range map(ByteBuffer& target, size_t length, size_t alignment) {
OPTICK_EVENT();
size_t padding = 0;
if (alignment != 0) {
padding = alignment - length % alignment;
@ -576,19 +527,28 @@ static inline Range map(ByteBuffer& target, size_t length, size_t alignment) {
target.append_zeroes(length + padding);
return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)};
}
Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 0 /* TODO? */); }
Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 0 /* TODO? */); }
Range push_verts(const uint8_t* data, size_t length) {
OPTICK_EVENT();
return push(g_verts, data, length, 0 /* TODO? */);
}
Range push_indices(const uint8_t* data, size_t length) {
OPTICK_EVENT();
return push(g_indices, data, length, 0 /* TODO? */);
}
Range push_uniform(const uint8_t* data, size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits;
g_device.GetLimits(&limits);
return push(g_uniforms, data, length, limits.limits.minUniformBufferOffsetAlignment);
}
Range push_storage(const uint8_t* data, size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits;
g_device.GetLimits(&limits);
return push(g_storage, data, length, limits.limits.minStorageBufferOffsetAlignment);
}
Range push_static_storage(const uint8_t* data, size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits;
g_device.GetLimits(&limits);
auto range = push(g_staticStorage, data, length, limits.limits.minStorageBufferOffsetAlignment);
@ -596,20 +556,24 @@ Range push_static_storage(const uint8_t* data, size_t length) {
return range;
}
std::pair<ByteBuffer, Range> map_verts(size_t length) {
OPTICK_EVENT();
const auto range = map(g_verts, length, 0 /* TODO? */);
return {ByteBuffer{g_verts.data() + range.offset, range.size}, range};
}
std::pair<ByteBuffer, Range> map_indices(size_t length) {
OPTICK_EVENT();
const auto range = map(g_indices, length, 0 /* TODO? */);
return {ByteBuffer{g_indices.data() + range.offset, range.size}, range};
}
std::pair<ByteBuffer, Range> map_uniform(size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits;
g_device.GetLimits(&limits);
const auto range = map(g_uniforms, length, limits.limits.minUniformBufferOffsetAlignment);
return {ByteBuffer{g_uniforms.data() + range.offset, range.size}, range};
}
std::pair<ByteBuffer, Range> map_storage(size_t length) {
OPTICK_EVENT();
wgpu::SupportedLimits limits;
g_device.GetLimits(&limits);
const auto range = map(g_storage, length, limits.limits.minStorageBufferOffsetAlignment);
@ -617,6 +581,7 @@ std::pair<ByteBuffer, Range> map_storage(size_t length) {
}
BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) {
OPTICK_EVENT();
const auto id = xxh3_hash(descriptor);
if (!g_cachedBindGroups.contains(id)) {
g_cachedBindGroups.try_emplace(id, g_device.CreateBindGroup(&descriptor));
@ -624,6 +589,7 @@ BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) {
return id;
}
const wgpu::BindGroup& find_bind_group(BindGroupRef id) {
OPTICK_EVENT();
const auto it = g_cachedBindGroups.find(id);
if (it == g_cachedBindGroups.end()) {
Log.report(logvisor::Fatal, FMT_STRING("get_bind_group: failed to locate {}"), id);
@ -633,6 +599,7 @@ const wgpu::BindGroup& find_bind_group(BindGroupRef id) {
}
const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) {
OPTICK_EVENT();
const auto id = xxh3_hash(descriptor);
auto it = g_cachedSamplers.find(id);
if (it == g_cachedSamplers.end()) {
@ -642,6 +609,7 @@ const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) {
}
uint32_t align_uniform(uint32_t value) {
OPTICK_EVENT();
wgpu::SupportedLimits limits;
g_device.GetLimits(&limits); // TODO cache
const auto uniform_alignment = limits.limits.minUniformBufferOffsetAlignment;

View File

@ -4,20 +4,32 @@
#include <utility>
#include <dawn/webgpu_cpp.h>
#include <xxhash_impl.h>
#define XXH_INLINE_ALL
#define XXH_STATIC_LINKING_ONLY
#define XXH_IMPLEMENTATION
#include <xxhash.h>
#include <optick.h>
#ifndef ALIGN
#define ALIGN(x, a) (((x) + ((a)-1)) & ~((a)-1))
#endif
#ifdef __GNUC__
#define PACK(...) __VA_ARGS__ __attribute__((__packed__))
#endif
#ifdef _MSC_VER
#define PACK(...) __pragma(pack(push, 1)); __VA_ARGS__; __pragma(pack(pop))
#endif
namespace aurora {
template <typename T>
static inline void xxh3_update(XXH3_state_t& state, const T& input);
static inline XXH64_hash_t xxh3_hash(const void* input, size_t len, XXH64_hash_t seed = 0) {
static inline XXH64_hash_t xxh3_hash_s(const void* input, size_t len, XXH64_hash_t seed = 0) {
return XXH3_64bits_withSeed(input, len, seed);
}
template <typename T>
static inline XXH64_hash_t xxh3_hash(const T& input, XXH64_hash_t seed = 0) {
OPTICK_EVENT();
XXH3_state_t state;
memset(&state, 0, sizeof(XXH3_state_t));
XXH3_64bits_reset_withSeed(&state, seed);

View File

@ -335,6 +335,7 @@ static inline wgpu::PrimitiveState to_primitive_state(GX::Primitive gx_prim, GX:
wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info,
ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader,
zstring_view label) noexcept {
OPTICK_EVENT();
const auto depthStencil = wgpu::DepthStencilState{
.format = g_graphicsConfig.depthFormat,
.depthWriteEnabled = config.depthUpdate,
@ -387,6 +388,7 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primitive,
const BindGroupRanges& ranges) noexcept {
OPTICK_EVENT();
for (u8 i = 0; i < g_gxState.numTevStages; ++i) {
config.shaderConfig.tevStages[i] = g_gxState.tevStages[i];
}
@ -396,7 +398,9 @@ ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primit
for (u8 i = 0; i < g_gxState.numTexGens; ++i) {
config.shaderConfig.tcgs[i] = g_gxState.tcgs[i];
}
config.shaderConfig.alphaDiscard = g_gxState.alphaDiscard;
if (g_gxState.alphaDiscard) {
config.shaderConfig.alphaDiscard = g_gxState.alphaDiscard;
}
config.shaderConfig.fogType = g_gxState.fog.type;
config = {
.shaderConfig = config.shaderConfig,
@ -414,6 +418,7 @@ ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primit
};
// TODO separate shader info from build_shader for async
{
OPTICK_EVENT("Shader info & bind groups");
std::lock_guard lk{g_pipelineMutex};
auto [_, info] = build_shader(config.shaderConfig);
info.bindGroups = build_bind_groups(info, config.shaderConfig, ranges); // TODO this is hack
@ -542,6 +547,7 @@ static absl::flat_hash_map<u32, std::pair<wgpu::BindGroupLayout, wgpu::BindGroup
GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config,
const BindGroupRanges& ranges) noexcept {
OPTICK_EVENT();
const auto layouts = build_bind_group_layouts(info, config);
u32 textureCount = info.sampledTextures.count();
@ -578,6 +584,9 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
};
std::array<wgpu::BindGroupEntry, MaxTextures> samplerEntries;
std::array<wgpu::BindGroupEntry, MaxTextures> textureEntries;
{
OPTICK_EVENT("Build texture entries");
for (u32 texIdx = 0, i = 0; texIdx < info.sampledTextures.size(); ++texIdx) {
if (!info.sampledTextures.test(texIdx)) {
continue;
@ -597,6 +606,7 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
};
i++;
}
}
return {
.uniformBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{
.label = "GX Uniform Bind Group",
@ -620,6 +630,7 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
}
GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept {
OPTICK_EVENT();
GXBindGroupLayouts out;
u32 uniformSizeKey = info.uniformSize + (config.denormalizedVertexAttributes ? 0 : 1);
const auto uniformIt = sUniformBindGroupLayouts.find(uniformSizeKey);
@ -729,6 +740,10 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader
return out;
}
void initialize() noexcept {
memset(&g_gxState, 0, sizeof(GXState));
}
// TODO this is awkward
extern absl::flat_hash_map<ShaderRef, std::pair<wgpu::ShaderModule, gx::ShaderInfo>> g_gxCachedShaders;
void shutdown() noexcept {

View File

@ -14,23 +14,23 @@ constexpr u32 MaxTexMtx = 10;
constexpr u32 MaxPTTexMtx = 20;
constexpr u32 MaxTexCoord = GX::MAX_TEXCOORD;
template <typename Arg, Arg Default>
PACK(template <typename Arg, Arg Default>
struct TevPass {
Arg a = Default;
Arg b = Default;
Arg c = Default;
Arg d = Default;
bool operator==(const TevPass&) const = default;
};
struct TevOp {
});
PACK(struct TevOp {
GX::TevOp op = GX::TevOp::TEV_ADD;
GX::TevBias bias = GX::TevBias::TB_ZERO;
GX::TevScale scale = GX::TevScale::CS_SCALE_1;
GX::TevRegID outReg = GX::TevRegID::TEVPREV;
bool clamp = true;
bool operator==(const TevOp&) const = default;
};
struct TevStage {
});
PACK(struct TevStage {
TevPass<GX::TevColorArg, GX::CC_ZERO> colorPass;
TevPass<GX::TevAlphaArg, GX::CA_ZERO> alphaPass;
TevOp colorOp;
@ -41,7 +41,7 @@ struct TevStage {
GX::TexMapID texMapId = GX::TEXMAP_NULL;
GX::ChannelID channelId = GX::COLOR_NULL;
bool operator==(const TevStage&) const = default;
};
});
struct TextureBind {
aurora::gfx::TextureHandle handle;
metaforce::EClampMode clampMode;
@ -55,37 +55,37 @@ struct TextureBind {
operator bool() const noexcept { return handle; }
};
// For shader generation
struct ColorChannelConfig {
PACK(struct ColorChannelConfig {
GX::ColorSrc matSrc = GX::SRC_REG;
GX::ColorSrc ambSrc = GX::SRC_REG;
bool lightingEnabled = false;
bool operator==(const ColorChannelConfig&) const = default;
};
});
// For uniform generation
struct ColorChannelState {
PACK(struct ColorChannelState {
zeus::CColor matColor = zeus::skClear;
zeus::CColor ambColor = zeus::skClear;
GX::LightMask lightState;
};
});
using LightVariant = std::variant<std::monostate, Light, zeus::CColor>;
// Mat4x4 used instead of Mat4x3 for padding purposes
using TexMtxVariant = std::variant<std::monostate, Mat4x2<float>, Mat4x4<float>>;
struct TcgConfig {
PACK(struct TcgConfig {
GX::TexGenType type = GX::TG_MTX2x4;
GX::TexGenSrc src = GX::MAX_TEXGENSRC;
GX::TexMtx mtx = GX::IDENTITY;
GX::PTTexMtx postMtx = GX::PTIDENTITY;
bool normalize = false;
bool operator==(const TcgConfig&) const = default;
};
struct FogState {
});
PACK(struct FogState {
GX::FogType type = GX::FOG_NONE;
float startZ = 0.f;
float endZ = 0.f;
float nearZ = 0.f;
float farZ = 0.f;
zeus::CColor color;
};
});
struct GXState {
zeus::CMatrix4f mv;
@ -123,10 +123,11 @@ extern GXState g_gxState;
static inline Mat4x4<float> get_combined_matrix() noexcept { return g_gxState.proj * g_gxState.mv; }
void initialize() noexcept;
void shutdown() noexcept;
const TextureBind& get_texture(GX::TexMapID id) noexcept;
struct ShaderConfig {
PACK(struct ShaderConfig {
GX::FogType fogType;
std::array<std::optional<TevStage>, MaxTevStages> tevStages;
std::array<ColorChannelConfig, MaxColorChannels> colorChannels;
@ -135,8 +136,8 @@ struct ShaderConfig {
bool denormalizedVertexAttributes = false;
bool denormalizedHasNrm = false; // TODO this is a hack
bool operator==(const ShaderConfig&) const = default;
};
struct PipelineConfig {
});
PACK(struct PipelineConfig {
ShaderConfig shaderConfig;
GX::Primitive primitive;
GX::Compare depthFunc;
@ -146,7 +147,7 @@ struct PipelineConfig {
GX::LogicOp blendOp;
std::optional<float> dstAlpha;
bool depthCompare, depthUpdate, alphaUpdate;
};
});
struct GXBindGroupLayouts {
wgpu::BindGroupLayout uniformLayout;
wgpu::BindGroupLayout samplerLayout;
@ -202,70 +203,71 @@ struct DlVert {
} // namespace aurora::gfx::gx
namespace aurora {
template <typename Arg, Arg Default>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevPass<Arg, Default>& input) {
XXH3_64bits_update(&state, &input.a, sizeof(Arg));
XXH3_64bits_update(&state, &input.b, sizeof(Arg));
XXH3_64bits_update(&state, &input.c, sizeof(Arg));
XXH3_64bits_update(&state, &input.d, sizeof(Arg));
}
//template <typename Arg, Arg Default>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevPass<Arg, Default>& input) {
// XXH3_64bits_update(&state, &input.a, sizeof(Arg));
// XXH3_64bits_update(&state, &input.b, sizeof(Arg));
// XXH3_64bits_update(&state, &input.c, sizeof(Arg));
// XXH3_64bits_update(&state, &input.d, sizeof(Arg));
//}
//template <>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevOp& input) {
// XXH3_64bits_update(&state, &input.op, sizeof(gfx::gx::TevOp::op));
// XXH3_64bits_update(&state, &input.bias, sizeof(gfx::gx::TevOp::bias));
// XXH3_64bits_update(&state, &input.scale, sizeof(gfx::gx::TevOp::scale));
// XXH3_64bits_update(&state, &input.outReg, sizeof(gfx::gx::TevOp::outReg));
// XXH3_64bits_update(&state, &input.clamp, sizeof(bool));
//}
//template <>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevStage& input) {
// xxh3_update(state, input.colorPass);
// xxh3_update(state, input.alphaPass);
// xxh3_update(state, input.colorOp);
// xxh3_update(state, input.alphaOp);
// XXH3_64bits_update(&state, &input.kcSel, sizeof(gfx::gx::TevStage::kcSel));
// XXH3_64bits_update(&state, &input.kaSel, sizeof(gfx::gx::TevStage::kaSel));
// XXH3_64bits_update(&state, &input.texCoordId, sizeof(gfx::gx::TevStage::texCoordId));
// XXH3_64bits_update(&state, &input.texMapId, sizeof(gfx::gx::TevStage::texMapId));
// XXH3_64bits_update(&state, &input.channelId, sizeof(gfx::gx::TevStage::channelId));
//}
//template <>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ColorChannelConfig& input) {
// XXH3_64bits_update(&state, &input.lightingEnabled, sizeof(gfx::gx::ColorChannelConfig::lightingEnabled));
// XXH3_64bits_update(&state, &input.matSrc, sizeof(gfx::gx::ColorChannelConfig::matSrc));
// if (input.lightingEnabled) {
// // Unused when lighting is disabled
// XXH3_64bits_update(&state, &input.ambSrc, sizeof(gfx::gx::ColorChannelConfig::ambSrc));
// }
//}
//template <>
//inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TcgConfig& input) {
// XXH3_64bits_update(&state, &input.type, sizeof(gfx::gx::TcgConfig::type));
// XXH3_64bits_update(&state, &input.src, sizeof(gfx::gx::TcgConfig::src));
// XXH3_64bits_update(&state, &input.mtx, sizeof(gfx::gx::TcgConfig::mtx));
// XXH3_64bits_update(&state, &input.postMtx, sizeof(gfx::gx::TcgConfig::postMtx));
// XXH3_64bits_update(&state, &input.normalize, sizeof(gfx::gx::TcgConfig::normalize));
//}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevOp& input) {
XXH3_64bits_update(&state, &input.op, sizeof(gfx::gx::TevOp::op));
XXH3_64bits_update(&state, &input.bias, sizeof(gfx::gx::TevOp::bias));
XXH3_64bits_update(&state, &input.scale, sizeof(gfx::gx::TevOp::scale));
XXH3_64bits_update(&state, &input.outReg, sizeof(gfx::gx::TevOp::outReg));
XXH3_64bits_update(&state, &input.clamp, sizeof(bool));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TevStage& input) {
xxh3_update(state, input.colorPass);
xxh3_update(state, input.alphaPass);
xxh3_update(state, input.colorOp);
xxh3_update(state, input.alphaOp);
XXH3_64bits_update(&state, &input.kcSel, sizeof(gfx::gx::TevStage::kcSel));
XXH3_64bits_update(&state, &input.kaSel, sizeof(gfx::gx::TevStage::kaSel));
XXH3_64bits_update(&state, &input.texCoordId, sizeof(gfx::gx::TevStage::texCoordId));
XXH3_64bits_update(&state, &input.texMapId, sizeof(gfx::gx::TevStage::texMapId));
XXH3_64bits_update(&state, &input.channelId, sizeof(gfx::gx::TevStage::channelId));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ColorChannelConfig& input) {
XXH3_64bits_update(&state, &input.lightingEnabled, sizeof(gfx::gx::ColorChannelConfig::lightingEnabled));
XXH3_64bits_update(&state, &input.matSrc, sizeof(gfx::gx::ColorChannelConfig::matSrc));
if (input.lightingEnabled) {
// Unused when lighting is disabled
XXH3_64bits_update(&state, &input.ambSrc, sizeof(gfx::gx::ColorChannelConfig::ambSrc));
}
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::TcgConfig& input) {
XXH3_64bits_update(&state, &input.type, sizeof(gfx::gx::TcgConfig::type));
XXH3_64bits_update(&state, &input.src, sizeof(gfx::gx::TcgConfig::src));
XXH3_64bits_update(&state, &input.mtx, sizeof(gfx::gx::TcgConfig::mtx));
XXH3_64bits_update(&state, &input.postMtx, sizeof(gfx::gx::TcgConfig::postMtx));
XXH3_64bits_update(&state, &input.normalize, sizeof(gfx::gx::TcgConfig::normalize));
}
template <>
inline void xxh3_update(XXH3_state_t& state, const gfx::gx::ShaderConfig& input) {
for (const auto& item : input.tevStages) {
if (!item) {
break;
}
xxh3_update(state, *item);
}
for (const auto& item : input.colorChannels) {
xxh3_update(state, item);
}
for (const auto& item : input.tcgs) {
xxh3_update(state, item);
}
if (input.alphaDiscard) {
XXH3_64bits_update(&state, &*input.alphaDiscard, sizeof(float));
}
XXH3_64bits_update(&state, &input.denormalizedVertexAttributes,
sizeof(gfx::gx::ShaderConfig::denormalizedVertexAttributes));
XXH3_64bits_update(&state, &input.denormalizedHasNrm, sizeof(gfx::gx::ShaderConfig::denormalizedHasNrm));
XXH3_64bits_update(&state, &input.fogType, sizeof(gfx::gx::ShaderConfig::fogType));
inline XXH64_hash_t xxh3_hash(const gfx::gx::ShaderConfig& input, XXH64_hash_t seed) {
return xxh3_hash_s(&input, sizeof(gfx::gx::ShaderConfig), seed);
// for (const auto& item : input.tevStages) {
// if (!item) {
// break;
// }
// xxh3_update(state, *item);
// }
// for (const auto& item : input.colorChannels) {
// xxh3_update(state, item);
// }
// for (const auto& item : input.tcgs) {
// xxh3_update(state, item);
// }
// if (input.alphaDiscard) {
// XXH3_64bits_update(&state, &*input.alphaDiscard, sizeof(float));
// }
// XXH3_64bits_update(&state, &input.denormalizedVertexAttributes,
// sizeof(gfx::gx::ShaderConfig::denormalizedVertexAttributes));
// XXH3_64bits_update(&state, &input.denormalizedHasNrm, sizeof(gfx::gx::ShaderConfig::denormalizedHasNrm));
// XXH3_64bits_update(&state, &input.fogType, sizeof(gfx::gx::ShaderConfig::fogType));
}
} // namespace aurora

View File

@ -359,6 +359,7 @@ std::pair<wgpu::ShaderModule, ShaderInfo> build_shader(const ShaderConfig& confi
#endif
return it->second;
}
OPTICK_EVENT();
Log.report(logvisor::Info, FMT_STRING("Shader config (hash {:x}):"), hash);
ShaderInfo info{
@ -636,7 +637,8 @@ var<storage, read> v_packed_uvs: Vec2Block;
} else {
vtxOutAttrs += fmt::format(FMT_STRING("\n @location({}) tex{}_uv: vec2<f32>;"), locIdx, i);
if (tcg.src >= GX::TG_TEX0 && tcg.src <= GX::TG_TEX7) {
vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{} = vec4<f32>({}, 0.0, 1.0);"), i, in_uv(tcg.src - GX::TG_TEX0));
vtxXfrAttrs +=
fmt::format(FMT_STRING("\n var tc{} = vec4<f32>({}, 0.0, 1.0);"), i, in_uv(tcg.src - GX::TG_TEX0));
} else if (tcg.src == GX::TG_POS) {
vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{} = vec4<f32>(obj_pos.xyz, 1.0);"), i);
} else if (tcg.src == GX::TG_NRM) {
@ -663,7 +665,8 @@ var<storage, read> v_packed_uvs: Vec2Block;
} else {
u32 postMtxIdx = (tcg.postMtx - GX::PTTEXMTX0) / 3;
info.usesPTTexMtx.set(postMtxIdx);
vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{0}_proj = ubuf.postmtx{1} * vec4<f32>(tc{0}_tmp.xyz, 1.0);"), i, postMtxIdx);
vtxXfrAttrs += fmt::format(FMT_STRING("\n var tc{0}_proj = ubuf.postmtx{1} * vec4<f32>(tc{0}_tmp.xyz, 1.0);"),
i, postMtxIdx);
}
vtxXfrAttrs += fmt::format(FMT_STRING("\n out.tex{0}_uv = tc{0}_proj.xy;"), i);
fragmentFnPre += fmt::format(
@ -698,7 +701,8 @@ var<storage, read> v_packed_uvs: Vec2Block;
if (config.fogType != GX::FOG_NONE) {
info.usesFog = true;
uniformPre += "\n"
uniformPre +=
"\n"
"struct Fog {\n"
" color: vec4<f32>;\n"
" a: f32;\n"
@ -729,7 +733,8 @@ var<storage, read> v_packed_uvs: Vec2Block;
break;
case GX::FOG_PERSP_REVEXP2:
case GX::FOG_ORTHO_REVEXP2:
fragmentFn += "\n fogF = 1.0 - fogF;"
fragmentFn +=
"\n fogF = 1.0 - fogF;"
"\n var fogZ = exp2(-8.0 * fogF * fogF);";
break;
default:
@ -796,7 +801,11 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {{
.nextInChain = &wgslDescriptor,
.label = label.c_str(),
};
auto shader = gpu::g_device.CreateShaderModule(&shaderDescriptor);
wgpu::ShaderModule shader;
{
OPTICK_EVENT("Device CreateShaderModule");
shader = gpu::g_device.CreateShaderModule(&shaderDescriptor);
}
info.uniformSize = align_uniform(info.uniformSize);
auto pair = std::make_pair(std::move(shader), info);

View File

@ -113,7 +113,8 @@ static inline std::pair<gx::DlVert, size_t> readVert(const u8* data) noexcept {
static absl::flat_hash_map<XXH64_hash_t, std::pair<std::vector<gx::DlVert>, std::vector<u32>>> sCachedDisplayLists;
void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
const auto hash = xxh3_hash(dlStart, dlSize, 0);
OPTICK_EVENT();
const auto hash = xxh3_hash_s(dlStart, dlSize, 0);
Range vertRange, idxRange;
uint32_t numIndices;
auto it = sCachedDisplayLists.find(hash);
@ -123,6 +124,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
vertRange = push_verts(ArrayRef{verts});
idxRange = push_indices(ArrayRef{indices});
} else {
OPTICK_EVENT("Display list translation");
std::vector<gx::DlVert> verts;
std::vector<u32> indices;
@ -181,30 +183,34 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
}
Range sVtxRange, sNrmRange, sTcRange, sPackedTcRange;
if (staticVtxRange) {
sVtxRange = *staticVtxRange;
} else {
sVtxRange = push_storage(reinterpret_cast<const uint8_t*>(vtxData->data()), vtxData->size() * 16);
}
if (staticNrmRange) {
sNrmRange = *staticNrmRange;
} else {
sNrmRange = push_storage(reinterpret_cast<const uint8_t*>(nrmData->data()), nrmData->size() * 16);
}
if (staticTcRange) {
sTcRange = *staticTcRange;
} else {
sTcRange = push_storage(reinterpret_cast<const uint8_t*>(tcData->data()), tcData->size() * 8);
}
if (staticPackedTcRange) {
sPackedTcRange = *staticPackedTcRange;
} else if (tcData == tex0TcData) {
sPackedTcRange = sTcRange;
} else {
sPackedTcRange = push_storage(reinterpret_cast<const uint8_t*>(tex0TcData->data()), tex0TcData->size() * 8);
{
OPTICK_EVENT("Storage push");
if (staticVtxRange) {
sVtxRange = *staticVtxRange;
} else {
sVtxRange = push_storage(reinterpret_cast<const uint8_t*>(vtxData->data()), vtxData->size() * 16);
}
if (staticNrmRange) {
sNrmRange = *staticNrmRange;
} else {
sNrmRange = push_storage(reinterpret_cast<const uint8_t*>(nrmData->data()), nrmData->size() * 16);
}
if (staticTcRange) {
sTcRange = *staticTcRange;
} else {
sTcRange = push_storage(reinterpret_cast<const uint8_t*>(tcData->data()), tcData->size() * 8);
}
if (staticPackedTcRange) {
sPackedTcRange = *staticPackedTcRange;
} else if (tcData == tex0TcData) {
sPackedTcRange = sTcRange;
} else {
sPackedTcRange = push_storage(reinterpret_cast<const uint8_t*>(tex0TcData->data()), tex0TcData->size() * 8);
}
}
model::PipelineConfig config{};
model::PipelineConfig config;
memset(&config, 0, sizeof(model::PipelineConfig));
const gx::BindGroupRanges ranges{
.vtxDataRange = sVtxRange,
.nrmDataRange = sNrmRange,
@ -238,6 +244,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] Pipeli
}
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) {
return;
}
@ -281,6 +288,7 @@ static inline void cache_array(const void* data, Vec*& outPtr, std::optional<aur
}
void GXSetArray(GX::Attr attr, const void* data, u8 stride) noexcept {
OPTICK_EVENT();
using namespace aurora::gfx::model;
switch (attr) {
case GX::VA_POS:

View File

@ -14,7 +14,7 @@ struct DrawData {
gx::GXBindGroups bindGroups;
};
struct PipelineConfig : gx::PipelineConfig {};
PACK(struct PipelineConfig : gx::PipelineConfig {});
struct State {};

View File

@ -233,6 +233,7 @@ DrawData make_draw_data(const State& state, const TextureHandle& tex_y, const Te
}
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) {
return;
}

View File

@ -18,6 +18,7 @@ struct SStreamState {
static std::optional<SStreamState> sStreamState;
void stream_begin(GX::Primitive primitive) noexcept {
OPTICK_EVENT();
if (sStreamState) {
Log.report(logvisor::Fatal, FMT_STRING("Stream began twice!"));
unreachable();
@ -27,6 +28,7 @@ void stream_begin(GX::Primitive primitive) noexcept {
void stream_vertex(metaforce::EStreamFlags flags, const zeus::CVector3f& pos, const zeus::CVector3f& nrm,
const zeus::CColor& color, const zeus::CVector2f& uv) noexcept {
OPTICK_EVENT();
if (!sStreamState) {
Log.report(logvisor::Fatal, FMT_STRING("Stream not started!"));
unreachable();
@ -53,9 +55,11 @@ void stream_vertex(metaforce::EStreamFlags flags, const zeus::CVector3f& pos, co
}
void stream_end() noexcept {
OPTICK_EVENT();
const auto vertRange = push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size());
stream::PipelineConfig config{};
stream::PipelineConfig config;
memset(&config, 0, sizeof(stream::PipelineConfig));
config.shaderConfig.denormalizedVertexAttributes = true;
config.shaderConfig.denormalizedHasNrm = sStreamState->flags.IsSet(metaforce::EStreamFlagBits::fHasNormal);
const auto info = populate_pipeline_config(config, sStreamState->primitive, {});

View File

@ -65,6 +65,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] Pipeli
State construct_state() { return {}; }
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) {
return;
}

View File

@ -12,7 +12,7 @@ struct DrawData {
gx::GXBindGroups bindGroups;
};
struct PipelineConfig : public gx::PipelineConfig {};
PACK(struct PipelineConfig : public gx::PipelineConfig {});
struct State {};

View File

@ -40,6 +40,7 @@ static wgpu::Extent3D physical_size(wgpu::Extent3D size, TextureFormatInfo info)
TextureHandle new_static_texture_2d(uint32_t width, uint32_t height, uint32_t mips, metaforce::ETexelFormat format,
ArrayRef<uint8_t> data, zstring_view label) noexcept {
OPTICK_EVENT();
auto handle = new_dynamic_texture_2d(width, height, mips, format, label);
const TextureRef& ref = *handle.ref;
@ -89,6 +90,7 @@ TextureHandle new_static_texture_2d(uint32_t width, uint32_t height, uint32_t mi
TextureHandle new_dynamic_texture_2d(uint32_t width, uint32_t height, uint32_t mips, metaforce::ETexelFormat format,
zstring_view label) noexcept {
OPTICK_EVENT();
const auto wgpuFormat = to_wgpu(format);
const auto size = wgpu::Extent3D{
.width = width,
@ -120,6 +122,7 @@ TextureHandle new_render_texture(uint32_t width, uint32_t height, uint32_t color
// TODO accept mip/layer parameters
void write_texture(const TextureHandle& handle, ArrayRef<uint8_t> data) noexcept {
OPTICK_EVENT();
const TextureRef& ref = *handle.ref;
ByteBuffer buffer;

View File

@ -78,6 +78,7 @@ constexpr T bswap16(T val) noexcept {
}
static ByteBuffer BuildI4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -117,6 +118,7 @@ static ByteBuffer BuildI4FromGCN(uint32_t width, uint32_t height, uint32_t mips,
}
static ByteBuffer BuildI8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -158,6 +160,7 @@ static ByteBuffer BuildI8FromGCN(uint32_t width, uint32_t height, uint32_t mips,
}
ByteBuffer BuildIA4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -199,6 +202,7 @@ ByteBuffer BuildIA4FromGCN(uint32_t width, uint32_t height, uint32_t mips, Array
}
ByteBuffer BuildIA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -240,6 +244,7 @@ ByteBuffer BuildIA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, Array
}
ByteBuffer BuildC4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data, RGBA8* palette) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -277,6 +282,7 @@ ByteBuffer BuildC4FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayR
}
ByteBuffer BuildC8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data, RGBA8* palette) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -314,6 +320,7 @@ ByteBuffer BuildC8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayR
}
ByteBuffer BuildRGB565FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -354,6 +361,7 @@ ByteBuffer BuildRGB565FromGCN(uint32_t width, uint32_t height, uint32_t mips, Ar
}
ByteBuffer BuildRGB5A3FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -401,6 +409,7 @@ ByteBuffer BuildRGB5A3FromGCN(uint32_t width, uint32_t height, uint32_t mips, Ar
}
ByteBuffer BuildRGBA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t texelCount = ComputeMippedTexelCount(width, height, mips);
ByteBuffer buf{sizeof(RGBA8) * texelCount};
@ -445,6 +454,7 @@ ByteBuffer BuildRGBA8FromGCN(uint32_t width, uint32_t height, uint32_t mips, Arr
}
ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mips, ArrayRef<uint8_t> data) {
OPTICK_EVENT();
const size_t blockCount = ComputeMippedBlockCountDXT1(width, height, mips);
ByteBuffer buf{sizeof(DXT1Block) * blockCount};
@ -493,6 +503,7 @@ ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mips, Arra
ByteBuffer convert_texture(metaforce::ETexelFormat format, uint32_t width, uint32_t height, uint32_t mips,
ArrayRef<uint8_t> data) {
OPTICK_EVENT();
switch (format) {
case metaforce::ETexelFormat::RGBA8PC:
case metaforce::ETexelFormat::R8PC:

View File

@ -376,6 +376,7 @@ DrawData make_draw_data_verts(const State& state, CameraFilterType filter_type,
}
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
OPTICK_EVENT();
if (!bind_pipeline(data.pipeline, pass)) {
return;
}

View File

@ -182,8 +182,8 @@ void initialize(SDL_Window* window) {
}
g_queue = g_device.GetQueue();
g_BackendBinding =
std::unique_ptr<utils::BackendBinding>(utils::CreateBinding(g_backendType, window, g_device.Get()));
g_BackendBinding = std::unique_ptr<utils::BackendBinding>(
utils::CreateBinding(g_backendType, window, g_Adapter.Get(), g_device.Get()));
if (!g_BackendBinding) {
Log.report(logvisor::Fatal, FMT_STRING("Unsupported backend {}"), backendName);
unreachable();
@ -237,4 +237,16 @@ void resize_swapchain(uint32_t width, uint32_t height) {
g_frameBufferResolved = create_render_texture(false);
g_depthBuffer = create_depth_texture();
}
#if USE_OPTICK
void* get_native_swapchain() { return g_BackendBinding->GetNativeSwapChain(); }
Optick::GPUContext begin_cmdlist() {
return g_BackendBinding->OptickSetGpuContext();
}
void end_cmdlist(Optick::GPUContext ctx) {
Optick::SetGpuContext(ctx);
}
#endif
} // namespace aurora::gpu

View File

@ -3,6 +3,7 @@
#include <array>
#include <cstdint>
#include <dawn/webgpu_cpp.h>
#include <optick.h>
#ifdef __GNUC__
[[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); }
@ -52,6 +53,12 @@ extern TextureWithSampler g_depthBuffer;
void initialize(SDL_Window* window);
void shutdown();
void resize_swapchain(uint32_t width, uint32_t height);
#if USE_OPTICK
void* get_native_swapchain();
Optick::GPUContext begin_cmdlist();
void end_cmdlist(Optick::GPUContext ctx);
#endif
} // namespace aurora::gpu
namespace aurora::gpu::utils {

View File

@ -75,6 +75,7 @@ void new_frame(const WindowSize& size) noexcept {
}
void render(const wgpu::RenderPassEncoder& pass) noexcept {
OPTICK_EVENT();
ImGui::Render();
auto* data = ImGui::GetDrawData();

View File

@ -106,17 +106,20 @@ if (DAWN_ENABLE_VULKAN)
endif ()
if (MSVC)
target_compile_options(dawn_native PRIVATE /bigobj)
else()
else ()
target_compile_options(SPIRV-Tools-static PRIVATE -Wno-implicit-fallthrough)
target_compile_options(SPIRV-Tools-opt PRIVATE -Wno-implicit-fallthrough)
endif ()
option(OPTICK_ENABLED "Enable profiling with Optick" OFF)
set(OPTICK_USE_VULKAN ${DAWN_ENABLE_VULKAN} CACHE BOOL "Built-in support for Vulkan" FORCE)
set(OPTICK_USE_D3D12 ${DAWN_ENABLE_D3D12} CACHE BOOL "Built-in support for Vulkan" FORCE)
set(OPTICK_INSTALL_TARGETS OFF CACHE BOOL "Should optick be installed? Set to OFF if you use add_subdirectory to include Optick." FORCE)
add_subdirectory(optick)
if (NOT MSVC)
target_compile_options(OptickCore PRIVATE -Wno-implicit-fallthrough)
elseif (OPTICK_USE_D3D12)
target_link_libraries(OptickCore PRIVATE dxgi)
endif ()
add_subdirectory(libjpeg-turbo EXCLUDE_FROM_ALL)

View File

@ -8,7 +8,6 @@ for sub in "extern/amuse" \
"extern/jbus" \
"extern/kabufuda" \
"extern/nod" \
"extern/xxhash" \
"extern/zeus"; do
if [ -d $sub ]; then
pushd $sub > /dev/null