Fix bind group caching

This commit is contained in:
Luke Street 2025-04-04 01:59:30 -06:00
parent 8a5e3bcdc7
commit 1016fbb36d
6 changed files with 152 additions and 103 deletions

1
.gitmodules vendored
View File

@ -2,6 +2,7 @@
path = extern/dawn
url = https://dawn.googlesource.com/dawn
branch = main
update = none
[submodule "extern/SDL"]
path = extern/SDL
url = https://github.com/libsdl-org/SDL.git

View File

@ -60,7 +60,7 @@ void GXLoadTexMtxImm(const void* mtx_, u32 id, GXTexMtxType type) {
CHECK((id >= GX_TEXMTX0 && id <= GX_IDENTITY) || (id >= GX_PTTEXMTX0 && id <= GX_PTIDENTITY), "invalid tex mtx {}",
static_cast<int>(id));
if (id >= GX_PTTEXMTX0) {
CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", type);
CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", static_cast<int>(type));
const auto idx = (id - GX_PTTEXMTX0) / 3;
#ifdef AURORA_NATIVE_MATRIX
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);

View File

@ -6,12 +6,14 @@
#include "stream/shader.hpp"
#include "texture.hpp"
#include <absl/container/flat_hash_map.h>
#include <condition_variable>
#include <deque>
#include <fstream>
#include <thread>
#include <mutex>
#include <thread>
#include <variant>
#include <absl/container/flat_hash_map.h>
#include <magic_enum.hpp>
namespace aurora::gfx {
@ -50,37 +52,37 @@ enum class CommandType {
SetScissor,
Draw,
};
struct SetViewportCommand {
float left;
float top;
float width;
float height;
float znear;
float zfar;
bool operator==(const SetViewportCommand& rhs) const {
return left == rhs.left && top == rhs.top && width == rhs.width && height == rhs.height && znear == rhs.znear &&
zfar == rhs.zfar;
}
bool operator!=(const SetViewportCommand& rhs) const { return !(*this == rhs); }
};
struct SetScissorCommand {
uint32_t x;
uint32_t y;
uint32_t w;
uint32_t h;
bool operator==(const SetScissorCommand& rhs) const { return x == rhs.x && y == rhs.y && w == rhs.w && h == rhs.h; }
bool operator!=(const SetScissorCommand& rhs) const { return !(*this == rhs); }
};
struct Command {
CommandType type;
#ifdef AURORA_GFX_DEBUG_GROUPS
std::vector<std::string> debugGroupStack;
#endif
union Data {
struct SetViewportCommand {
float left;
float top;
float width;
float height;
float znear;
float zfar;
bool operator==(const SetViewportCommand& rhs) const {
return left == rhs.left && top == rhs.top && width == rhs.width && height == rhs.height && znear == rhs.znear &&
zfar == rhs.zfar;
}
bool operator!=(const SetViewportCommand& rhs) const { return !(*this == rhs); }
} setViewport;
struct SetScissorCommand {
uint32_t x;
uint32_t y;
uint32_t w;
uint32_t h;
bool operator==(const SetScissorCommand& rhs) const {
return x == rhs.x && y == rhs.y && w == rhs.w && h == rhs.h;
}
bool operator!=(const SetScissorCommand& rhs) const { return !(*this == rhs); }
} setScissor;
SetViewportCommand setViewport;
SetScissorCommand setScissor;
ShaderDrawCommand draw;
} data;
};
@ -92,14 +94,14 @@ namespace aurora {
// the structure definition, which could easily change with Dawn updates.
template <>
inline HashType xxh3_hash(const wgpu::BindGroupDescriptor& input, HashType seed) {
constexpr auto offset = sizeof(void*) * 3; // skip nextInChain, label
constexpr auto offset = offsetof(wgpu::BindGroupDescriptor, layout); // skip nextInChain, label
const auto hash = xxh3_hash_s(reinterpret_cast<const u8*>(&input) + offset,
sizeof(wgpu::BindGroupDescriptor) - offset - sizeof(void*) /* skip entries */, seed);
return xxh3_hash_s(input.entries, sizeof(wgpu::BindGroupEntry) * input.entryCount, hash);
}
template <>
inline HashType xxh3_hash(const wgpu::SamplerDescriptor& input, HashType seed) {
constexpr auto offset = sizeof(void*) * 3; // skip nextInChain, label
constexpr auto offset = offsetof(wgpu::SamplerDescriptor, addressModeU); // skip nextInChain, label
return xxh3_hash_s(reinterpret_cast<const u8*>(&input) + offset,
sizeof(wgpu::SamplerDescriptor) - offset - 2 /* skip padding */, seed);
}
@ -230,6 +232,7 @@ static inline void push_command(CommandType type, const Command::Data& data) {
.data = data,
});
}
static inline Command& get_last_draw_command(ShaderType type) {
CHECK(g_currentRenderPass != UINT32_MAX, "No last command");
auto& last = g_renderPasses[g_currentRenderPass].commands.back();
@ -247,17 +250,18 @@ static void push_draw_command(ShaderDrawCommand data) {
++g_drawCallCount;
}
static Command::Data::SetViewportCommand g_cachedViewport;
static SetViewportCommand g_cachedViewport;
void set_viewport(float left, float top, float width, float height, float znear, float zfar) noexcept {
Command::Data::SetViewportCommand cmd{left, top, width, height, znear, zfar};
SetViewportCommand cmd{left, top, width, height, znear, zfar};
if (cmd != g_cachedViewport) {
push_command(CommandType::SetViewport, Command::Data{.setViewport = cmd});
g_cachedViewport = cmd;
}
}
static Command::Data::SetScissorCommand g_cachedScissor;
static SetScissorCommand g_cachedScissor;
void set_scissor(uint32_t x, uint32_t y, uint32_t w, uint32_t h) noexcept {
Command::Data::SetScissorCommand cmd{x, y, w, h};
SetScissorCommand cmd{x, y, w, h};
if (cmd != g_cachedScissor) {
push_command(CommandType::SetScissor, Command::Data{.setScissor = cmd});
g_cachedScissor = cmd;
@ -278,10 +282,12 @@ template <>
const stream::State& get_state() {
return g_state.stream;
}
template <>
void push_draw_command(stream::DrawData data) {
push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data});
}
template <>
void merge_draw_command(stream::DrawData data) {
auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream;
@ -294,6 +300,7 @@ void merge_draw_command(stream::DrawData data) {
last.indexCount += data.indexCount;
++g_mergedDrawCallCount;
}
template <>
PipelineRef pipeline_ref(stream::PipelineConfig config) {
return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); });
@ -303,6 +310,7 @@ template <>
void push_draw_command(model::DrawData data) {
push_draw_command(ShaderDrawCommand{.type = ShaderType::Model, .model = data});
}
template <>
PipelineRef pipeline_ref(model::PipelineConfig config) {
return find_pipeline(ShaderType::Model, config, [=]() { return create_pipeline(g_state.model, config); });
@ -792,7 +800,8 @@ BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) {
#endif
return id;
}
const wgpu::BindGroup& find_bind_group(BindGroupRef id) {
wgpu::BindGroup find_bind_group(BindGroupRef id) {
#ifdef EMSCRIPTEN
return g_cachedBindGroups[id];
#else
@ -802,7 +811,7 @@ const wgpu::BindGroup& find_bind_group(BindGroupRef id) {
#endif
}
const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor) {
wgpu::Sampler sampler_ref(const wgpu::SamplerDescriptor& descriptor) {
const auto id = xxh3_hash(descriptor);
auto it = g_cachedSamplers.find(id);
if (it == g_cachedSamplers.end()) {

View File

@ -27,6 +27,27 @@ static inline HashType xxh3_hash(const T& input, HashType seed = 0) {
return xxh3_hash_s(&input, sizeof(T), seed);
}
class Hasher {
public:
explicit Hasher(XXH64_hash_t seed = 0) {
XXH3_INITSTATE(&state);
XXH3_64bits_reset_withSeed(&state, seed);
}
void update(const void* data, size_t size) { XXH3_64bits_update(&state, data, size); }
template <typename T>
void update(const T& data) {
static_assert(std::has_unique_object_representations_v<T>);
update(&data, sizeof(T));
}
XXH64_hash_t digest() { return XXH3_64bits_digest(&state); }
private:
XXH3_state_t state;
};
class ByteBuffer {
public:
ByteBuffer() noexcept = default;
@ -211,9 +232,9 @@ PipelineRef pipeline_ref(PipelineConfig config);
bool bind_pipeline(PipelineRef ref, const wgpu::RenderPassEncoder& pass);
BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor);
const wgpu::BindGroup& find_bind_group(BindGroupRef id);
wgpu::BindGroup find_bind_group(BindGroupRef id);
const wgpu::Sampler& sampler_ref(const wgpu::SamplerDescriptor& descriptor);
wgpu::Sampler sampler_ref(const wgpu::SamplerDescriptor& descriptor);
uint32_t align_uniform(uint32_t value);

View File

@ -461,29 +461,28 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
const BindGroupRanges& ranges) noexcept {
const auto layouts = build_bind_group_layouts(info, config);
std::array<wgpu::BindGroupEntry, GX_VA_MAX_ATTR + 1> uniformEntries{
wgpu::BindGroupEntry{
.binding = 0,
.buffer = g_uniformBuffer,
.size = info.uniformSize,
},
};
std::array<wgpu::BindGroupEntry, GX_VA_MAX_ATTR + 1> uniformEntries;
memset(&uniformEntries, 0, sizeof(uniformEntries));
uniformEntries[0].binding = 0;
uniformEntries[0].buffer = g_uniformBuffer;
uniformEntries[0].size = info.uniformSize;
u32 uniformBindIdx = 1;
for (u32 i = 0; i < GX_VA_MAX_ATTR; ++i) {
const Range& range = ranges.vaRanges[i];
if (range.size <= 0) {
continue;
}
uniformEntries[uniformBindIdx] = wgpu::BindGroupEntry{
.binding = uniformBindIdx,
.buffer = g_storageBuffer,
.size = range.size,
};
wgpu::BindGroupEntry& entry = uniformEntries[uniformBindIdx];
entry.binding = uniformBindIdx;
entry.buffer = g_storageBuffer;
entry.size = range.size;
++uniformBindIdx;
}
std::array<wgpu::BindGroupEntry, MaxTextures> samplerEntries;
std::array<wgpu::BindGroupEntry, MaxTextures * 2> textureEntries;
memset(&samplerEntries, 0, sizeof(samplerEntries));
memset(&textureEntries, 0, sizeof(textureEntries));
u32 samplerCount = 0;
u32 textureCount = 0;
for (u32 i = 0; i < info.sampledTextures.size(); ++i) {
@ -492,15 +491,15 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
}
const auto& tex = g_gxState.textures[i];
CHECK(tex, "unbound texture {}", i);
samplerEntries[samplerCount] = {
.binding = samplerCount,
.sampler = sampler_ref(tex.get_descriptor()),
};
wgpu::BindGroupEntry& samplerEntry = samplerEntries[samplerCount];
samplerEntry.binding = samplerCount;
samplerEntry.size = wgpu::kWholeSize;
samplerEntry.sampler = sampler_ref(tex.get_descriptor());
++samplerCount;
textureEntries[textureCount] = {
.binding = textureCount,
.textureView = tex.texObj.ref->view,
};
wgpu::BindGroupEntry& textureEntry = textureEntries[textureCount];
textureEntry.binding = textureCount;
textureEntry.size = wgpu::kWholeSize;
textureEntry.textureView = tex.texObj.ref->view;
++textureCount;
// Load palette
const auto& texConfig = config.textureConfig[i];
@ -508,41 +507,48 @@ GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& confi
u32 tlut = tex.texObj.tlut;
CHECK(tlut >= GX_TLUT0 && tlut <= GX_BIGTLUT3, "tlut out of bounds {}", tlut);
CHECK(g_gxState.tluts[tlut].ref, "tlut unbound {}", tlut);
textureEntries[textureCount] = {
.binding = textureCount,
.textureView = g_gxState.tluts[tlut].ref->view,
};
wgpu::BindGroupEntry& tlutEntry = textureEntries[textureCount];
tlutEntry.binding = textureCount;
tlutEntry.size = wgpu::kWholeSize;
tlutEntry.textureView = g_gxState.tluts[tlut].ref->view;
++textureCount;
}
}
const wgpu::BindGroupDescriptor uniformBindGroupDescriptor{
.label = "GX Uniform Bind Group",
.layout = layouts.uniformLayout,
.entryCount = uniformBindIdx,
.entries = uniformEntries.data(),
};
const wgpu::BindGroupDescriptor samplerBindGroupDescriptor{
.label = "GX Sampler Bind Group",
.layout = layouts.samplerLayout,
.entryCount = samplerCount,
.entries = samplerEntries.data(),
};
const wgpu::BindGroupDescriptor textureBindGroupDescriptor{
.label = "GX Texture Bind Group",
.layout = layouts.textureLayout,
.entryCount = textureCount,
.entries = textureEntries.data(),
};
return {
.uniformBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{
.label = "GX Uniform Bind Group",
.layout = layouts.uniformLayout,
.entryCount = uniformBindIdx,
.entries = uniformEntries.data(),
}),
.samplerBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{
.label = "GX Sampler Bind Group",
.layout = layouts.samplerLayout,
.entryCount = samplerCount,
.entries = samplerEntries.data(),
}),
.textureBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{
.label = "GX Texture Bind Group",
.layout = layouts.textureLayout,
.entryCount = textureCount,
.entries = textureEntries.data(),
}),
.uniformBindGroup = bind_group_ref(uniformBindGroupDescriptor),
.samplerBindGroup = bind_group_ref(samplerBindGroupDescriptor),
.textureBindGroup = bind_group_ref(textureBindGroupDescriptor),
};
}
GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept {
GXBindGroupLayouts out;
u32 uniformSizeKey = info.uniformSize + (config.indexedAttributeCount > 0 ? 1 : 0);
const auto uniformIt = sUniformBindGroupLayouts.find(uniformSizeKey);
if (uniformIt != sUniformBindGroupLayouts.end()) {
out.uniformLayout = uniformIt->second;
Hasher uniformHasher;
uniformHasher.update(info.uniformSize);
uniformHasher.update(config.attrMapping);
const auto uniformLayoutHash = uniformHasher.digest();
auto it = sUniformBindGroupLayouts.find(uniformLayoutHash);
if (it != sUniformBindGroupLayouts.end()) {
out.uniformLayout = it->second;
} else {
std::array<wgpu::BindGroupLayoutEntry, GX_VA_MAX_ATTR + 1> uniformLayoutEntries{
wgpu::BindGroupLayoutEntry{
@ -577,16 +583,20 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader
.entries = uniformLayoutEntries.data(),
};
out.uniformLayout = g_device.CreateBindGroupLayout(&uniformLayoutDescriptor);
// sUniformBindGroupLayouts.try_emplace(uniformSizeKey, out.uniformLayout);
sUniformBindGroupLayouts[uniformLayoutHash] = out.uniformLayout;
}
Hasher textureHasher;
textureHasher.update(info.sampledTextures);
textureHasher.update(config.textureConfig);
const auto textureLayoutHash = textureHasher.digest();
auto it2 = sTextureBindGroupLayouts.find(textureLayoutHash);
if (it2 != sTextureBindGroupLayouts.end()) {
out.samplerLayout = it2->second.first;
out.textureLayout = it2->second.second;
return out;
}
// u32 textureCount = info.sampledTextures.count();
// const auto textureIt = sTextureBindGroupLayouts.find(textureCount);
// if (textureIt != sTextureBindGroupLayouts.end()) {
// const auto& [sl, tl] = textureIt->second;
// out.samplerLayout = sl;
// out.textureLayout = tl;
// } else {
u32 numSamplers = 0;
u32 numTextures = 0;
std::array<wgpu::BindGroupLayoutEntry, MaxTextures> samplerEntries;
@ -655,8 +665,7 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader
};
out.textureLayout = g_device.CreateBindGroupLayout(&descriptor);
}
// sTextureBindGroupLayouts.try_emplace(textureCount, out.samplerLayout, out.textureLayout);
// }
sTextureBindGroupLayouts[textureLayoutHash] = {out.samplerLayout, out.textureLayout};
return out;
}

View File

@ -1,25 +1,35 @@
#include "common.hpp"
#include "../webgpu/gpu.hpp"
#include "../internal.hpp"
#include "../webgpu/gpu.hpp"
#include "aurora/aurora.h"
#include "texture.hpp"
#include "texture_convert.hpp"
#include <algorithm>
#include <cstdint>
#include <memory>
#include <utility>
#include <fmt/format.h>
#include <magic_enum.hpp>
#include <webgpu/webgpu_cpp.h>
namespace aurora::gfx {
static Module Log("aurora::gfx");
using webgpu::g_device;
using webgpu::g_queue;
namespace {
Module Log("aurora::gfx");
struct TextureFormatInfo {
uint8_t blockWidth;
uint8_t blockHeight;
uint8_t blockSize;
bool compressed;
};
static TextureFormatInfo format_info(wgpu::TextureFormat format) {
TextureFormatInfo format_info(wgpu::TextureFormat format) {
switch (format) {
DEFAULT_FATAL("unimplemented texture format {}", magic_enum::enum_name(format));
case wgpu::TextureFormat::R8Unorm:
@ -33,11 +43,13 @@ static TextureFormatInfo format_info(wgpu::TextureFormat format) {
return {4, 4, 8, true};
}
}
static wgpu::Extent3D physical_size(wgpu::Extent3D size, TextureFormatInfo info) {
wgpu::Extent3D physical_size(wgpu::Extent3D size, TextureFormatInfo info) {
const uint32_t width = ((size.width + info.blockWidth - 1) / info.blockWidth) * info.blockWidth;
const uint32_t height = ((size.height + info.blockHeight - 1) / info.blockHeight) * info.blockHeight;
return {width, height, size.depthOrArrayLayers};
return {.width = width, .height = height, .depthOrArrayLayers = size.depthOrArrayLayers};
}
} // namespace
TextureHandle new_static_texture_2d(uint32_t width, uint32_t height, uint32_t mips, u32 format, ArrayRef<uint8_t> data,
const char* label) noexcept {
@ -112,7 +124,6 @@ TextureHandle new_dynamic_texture_2d(uint32_t width, uint32_t height, uint32_t m
.format = wgpuFormat,
.dimension = wgpu::TextureViewDimension::e2D,
.mipLevelCount = mips,
.arrayLayerCount = WGPU_ARRAY_LAYER_COUNT_UNDEFINED,
};
auto texture = g_device.CreateTexture(&textureDescriptor);
auto textureView = texture.CreateView(&textureViewDescriptor);
@ -141,8 +152,6 @@ TextureHandle new_render_texture(uint32_t width, uint32_t height, u32 fmt, const
.label = viewLabel.c_str(),
.format = wgpuFormat,
.dimension = wgpu::TextureViewDimension::e2D,
.mipLevelCount = WGPU_MIP_LEVEL_COUNT_UNDEFINED,
.arrayLayerCount = WGPU_ARRAY_LAYER_COUNT_UNDEFINED,
};
auto texture = g_device.CreateTexture(&textureDescriptor);
auto textureView = texture.CreateView(&textureViewDescriptor);