From 2ea4aac08088599caefb42a8d6cdf64c6fff498a Mon Sep 17 00:00:00 2001 From: Austin Eng Date: Sat, 25 Feb 2023 02:10:21 +0000 Subject: [PATCH] Support higher limits for maxDynamicUniform/StorageBuffers The higher tier currently supports all D3D12 devices, all Metal, and most Vulkan devices. Bug: dawn:685 Change-Id: I5bcb778b92a073c9c1af943acee193073c0741ff Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121101 Kokoro: Kokoro Commit-Queue: Austin Eng Reviewed-by: Corentin Wallez --- src/dawn/common/Constants.h | 4 - src/dawn/native/Adapter.cpp | 6 - src/dawn/native/BindGroupLayout.cpp | 3 +- src/dawn/native/BindGroupTracker.h | 30 +--- src/dawn/native/BindingInfo.cpp | 15 +- src/dawn/native/BindingInfo.h | 11 +- src/dawn/native/Limits.cpp | 11 +- src/dawn/native/PipelineLayout.cpp | 2 +- src/dawn/native/d3d12/AdapterD3D12.cpp | 51 +++--- src/dawn/native/d3d12/BindGroupD3D12.h | 3 +- src/dawn/native/d3d12/CommandBufferD3D12.cpp | 7 +- src/dawn/native/d3d12/PipelineLayoutD3D12.cpp | 4 +- src/dawn/native/d3d12/PipelineLayoutD3D12.h | 5 +- src/dawn/native/metal/BackendMTL.mm | 8 +- src/dawn/native/metal/CommandBufferMTL.mm | 12 +- src/dawn/native/opengl/CommandBufferGL.cpp | 11 +- src/dawn/native/vulkan/CommandBufferVk.cpp | 10 +- src/dawn/tests/end2end/MaxLimitTests.cpp | 152 ++++++++++++++++++ .../tests/end2end/PipelineLayoutTests.cpp | 3 +- .../validation/BindGroupValidationTests.cpp | 18 ++- 20 files changed, 247 insertions(+), 119 deletions(-) diff --git a/src/dawn/common/Constants.h b/src/dawn/common/Constants.h index da7e001d70..ca4c3b6b88 100644 --- a/src/dawn/common/Constants.h +++ b/src/dawn/common/Constants.h @@ -38,10 +38,6 @@ static constexpr uint32_t kMaxStorageBuffersPerShaderStage = 8; static constexpr uint32_t kMaxStorageTexturesPerShaderStage = 4; static constexpr uint32_t kMaxUniformBuffersPerShaderStage = 12; -// Per pipeline layout limits -static constexpr uint32_t kMaxDynamicUniformBuffersPerPipelineLayout = 8u; -static constexpr uint32_t kMaxDynamicStorageBuffersPerPipelineLayout = 4u; - // Indirect command sizes static constexpr uint64_t kDispatchIndirectSize = 3 * sizeof(uint32_t); static constexpr uint64_t kDrawIndirectSize = 4 * sizeof(uint32_t); diff --git a/src/dawn/native/Adapter.cpp b/src/dawn/native/Adapter.cpp index 77773764e2..2fbdf18dd8 100644 --- a/src/dawn/native/Adapter.cpp +++ b/src/dawn/native/Adapter.cpp @@ -70,12 +70,6 @@ MaybeError AdapterBase::Initialize() { std::min(mLimits.v1.maxStorageTexturesPerShaderStage, kMaxStorageTexturesPerShaderStage); mLimits.v1.maxUniformBuffersPerShaderStage = std::min(mLimits.v1.maxUniformBuffersPerShaderStage, kMaxUniformBuffersPerShaderStage); - mLimits.v1.maxDynamicUniformBuffersPerPipelineLayout = - std::min(mLimits.v1.maxDynamicUniformBuffersPerPipelineLayout, - kMaxDynamicUniformBuffersPerPipelineLayout); - mLimits.v1.maxDynamicStorageBuffersPerPipelineLayout = - std::min(mLimits.v1.maxDynamicStorageBuffersPerPipelineLayout, - kMaxDynamicStorageBuffersPerPipelineLayout); return {}; } diff --git a/src/dawn/native/BindGroupLayout.cpp b/src/dawn/native/BindGroupLayout.cpp index 125faa26f2..0f9f868234 100644 --- a/src/dawn/native/BindGroupLayout.cpp +++ b/src/dawn/native/BindGroupLayout.cpp @@ -271,7 +271,8 @@ MaybeError ValidateBindGroupLayoutDescriptor(DeviceBase* device, bindingsSet.insert(bindingNumber); } - DAWN_TRY_CONTEXT(ValidateBindingCounts(bindingCounts), "validating binding counts"); + DAWN_TRY_CONTEXT(ValidateBindingCounts(device->GetLimits(), bindingCounts), + "validating binding counts"); return {}; } diff --git a/src/dawn/native/BindGroupTracker.h b/src/dawn/native/BindGroupTracker.h index cd8254c98a..feccd59869 100644 --- a/src/dawn/native/BindGroupTracker.h +++ b/src/dawn/native/BindGroupTracker.h @@ -15,6 +15,7 @@ #ifndef SRC_DAWN_NATIVE_BINDGROUPTRACKER_H_ #define SRC_DAWN_NATIVE_BINDGROUPTRACKER_H_ +#include #include #include @@ -54,8 +55,9 @@ class BindGroupTrackerBase { } mBindGroups[index] = bindGroup; - mDynamicOffsetCounts[index] = dynamicOffsetCount; - SetDynamicOffsets(mDynamicOffsets[index].data(), dynamicOffsetCount, dynamicOffsets); + mDynamicOffsets[index].resize(BindingIndex(dynamicOffsetCount)); + std::copy(dynamicOffsets, dynamicOffsets + dynamicOffsetCount, + mDynamicOffsets[index].begin()); } void OnSetPipeline(PipelineBase* pipeline) { mPipelineLayout = pipeline->GetLayout(); } @@ -105,10 +107,7 @@ class BindGroupTrackerBase { BindGroupLayoutMask mDirtyBindGroupsObjectChangedOrIsDynamic = 0; BindGroupLayoutMask mBindGroupLayoutsMask = 0; ityp::array mBindGroups = {}; - ityp::array mDynamicOffsetCounts = {}; - ityp::array, - kMaxBindGroups> + ityp::array, kMaxBindGroups> mDynamicOffsets = {}; // |mPipelineLayout| is the current pipeline layout set on the command buffer. @@ -116,25 +115,6 @@ class BindGroupTrackerBase { // to the bind group bindings. PipelineLayoutBase* mPipelineLayout = nullptr; PipelineLayoutBase* mLastAppliedPipelineLayout = nullptr; - - private: - // We have two overloads here because offsets in Vulkan are uint32_t but uint64_t - // in other backends. - static void SetDynamicOffsets(uint64_t* data, - uint32_t dynamicOffsetCount, - uint32_t* dynamicOffsets) { - for (uint32_t i = 0; i < dynamicOffsetCount; ++i) { - data[i] = static_cast(dynamicOffsets[i]); - } - } - - static void SetDynamicOffsets(uint32_t* data, - uint32_t dynamicOffsetCount, - uint32_t* dynamicOffsets) { - if (dynamicOffsetCount > 0) { - memcpy(data, dynamicOffsets, sizeof(uint32_t) * dynamicOffsetCount); - } - } }; } // namespace dawn::native diff --git a/src/dawn/native/BindingInfo.cpp b/src/dawn/native/BindingInfo.cpp index 1d4b60d396..ba969373be 100644 --- a/src/dawn/native/BindingInfo.cpp +++ b/src/dawn/native/BindingInfo.cpp @@ -15,6 +15,7 @@ #include "dawn/native/BindingInfo.h" #include "dawn/native/ChainUtils_autogen.h" +#include "dawn/native/Limits.h" namespace dawn::native { @@ -93,18 +94,22 @@ void AccumulateBindingCounts(BindingCounts* bindingCounts, const BindingCounts& } } -MaybeError ValidateBindingCounts(const BindingCounts& bindingCounts) { +MaybeError ValidateBindingCounts(const CombinedLimits& limits, const BindingCounts& bindingCounts) { DAWN_INVALID_IF( - bindingCounts.dynamicUniformBufferCount > kMaxDynamicUniformBuffersPerPipelineLayout, + bindingCounts.dynamicUniformBufferCount > + limits.v1.maxDynamicUniformBuffersPerPipelineLayout, "The number of dynamic uniform buffers (%u) exceeds the maximum per-pipeline-layout " "limit (%u).", - bindingCounts.dynamicUniformBufferCount, kMaxDynamicUniformBuffersPerPipelineLayout); + bindingCounts.dynamicUniformBufferCount, + limits.v1.maxDynamicUniformBuffersPerPipelineLayout); DAWN_INVALID_IF( - bindingCounts.dynamicStorageBufferCount > kMaxDynamicStorageBuffersPerPipelineLayout, + bindingCounts.dynamicStorageBufferCount > + limits.v1.maxDynamicStorageBuffersPerPipelineLayout, "The number of dynamic storage buffers (%u) exceeds the maximum per-pipeline-layout " "limit (%u).", - bindingCounts.dynamicStorageBufferCount, kMaxDynamicStorageBuffersPerPipelineLayout); + bindingCounts.dynamicStorageBufferCount, + limits.v1.maxDynamicStorageBuffersPerPipelineLayout); for (SingleShaderStage stage : IterateStages(kAllStages)) { DAWN_INVALID_IF( diff --git a/src/dawn/native/BindingInfo.h b/src/dawn/native/BindingInfo.h index 9d32b05e08..8ebf5b891e 100644 --- a/src/dawn/native/BindingInfo.h +++ b/src/dawn/native/BindingInfo.h @@ -29,13 +29,6 @@ namespace dawn::native { -// Not a real WebGPU limit, but the sum of the two limits is useful for internal optimizations. -static constexpr uint32_t kMaxDynamicBuffersPerPipelineLayout = - kMaxDynamicUniformBuffersPerPipelineLayout + kMaxDynamicStorageBuffersPerPipelineLayout; - -static constexpr BindingIndex kMaxDynamicBuffersPerPipelineLayoutTyped = - BindingIndex(kMaxDynamicBuffersPerPipelineLayout); - // Not a real WebGPU limit, but used to optimize parts of Dawn which expect valid usage of the // API. There should never be more bindings than the max per stage, for each stage. static constexpr uint32_t kMaxBindingsPerPipelineLayout = @@ -87,9 +80,11 @@ struct BindingCounts { PerStage perStage; }; +struct CombinedLimits; + void IncrementBindingCounts(BindingCounts* bindingCounts, const BindGroupLayoutEntry& entry); void AccumulateBindingCounts(BindingCounts* bindingCounts, const BindingCounts& rhs); -MaybeError ValidateBindingCounts(const BindingCounts& bindingCounts); +MaybeError ValidateBindingCounts(const CombinedLimits& limits, const BindingCounts& bindingCounts); // For buffer size validation using RequiredBufferSizes = ityp::array, kMaxBindGroups>; diff --git a/src/dawn/native/Limits.cpp b/src/dawn/native/Limits.cpp index 433ab3c6e4..a17c355592 100644 --- a/src/dawn/native/Limits.cpp +++ b/src/dawn/native/Limits.cpp @@ -32,6 +32,13 @@ #define LIMITS_MAX_BUFFER_SIZE(X) \ X(Maximum, maxBufferSize, 0x10000000, 0x40000000, 0x80000000) +// Tiers for limits related to resource bindings. +// TODO(crbug.com/dawn/685): Define these better. For now, use two tiers where one +// offers slightly better than default limits. +#define LIMITS_RESOURCE_BINDINGS(X) \ + X(Maximum, maxDynamicUniformBuffersPerPipelineLayout, 8, 10) \ + X(Maximum, maxDynamicStorageBuffersPerPipelineLayout, 4, 8) \ + // TODO(crbug.com/dawn/685): // These limits don't have tiers yet. Define two tiers with the same values since the macros // in this file expect more than one tier. @@ -42,8 +49,6 @@ X(Maximum, maxTextureArrayLayers, 256, 256) \ X(Maximum, maxBindGroups, 4, 4) \ X(Maximum, maxBindingsPerBindGroup, 640, 640) \ - X(Maximum, maxDynamicUniformBuffersPerPipelineLayout, 8, 8) \ - X(Maximum, maxDynamicStorageBuffersPerPipelineLayout, 4, 4) \ X(Maximum, maxSampledTexturesPerShaderStage, 16, 16) \ X(Maximum, maxSamplersPerShaderStage, 16, 16) \ X(Maximum, maxStorageBuffersPerShaderStage, 8, 8) \ @@ -71,12 +76,14 @@ X(LIMITS_WORKGROUP_STORAGE_SIZE) \ X(LIMITS_STORAGE_BUFFER_BINDING_SIZE) \ X(LIMITS_MAX_BUFFER_SIZE) \ + X(LIMITS_RESOURCE_BINDINGS) \ X(LIMITS_OTHER) #define LIMITS(X) \ LIMITS_WORKGROUP_STORAGE_SIZE(X) \ LIMITS_STORAGE_BUFFER_BINDING_SIZE(X) \ LIMITS_MAX_BUFFER_SIZE(X) \ + LIMITS_RESOURCE_BINDINGS(X) \ LIMITS_OTHER(X) namespace dawn::native { diff --git a/src/dawn/native/PipelineLayout.cpp b/src/dawn/native/PipelineLayout.cpp index 39372be306..93e915f308 100644 --- a/src/dawn/native/PipelineLayout.cpp +++ b/src/dawn/native/PipelineLayout.cpp @@ -50,7 +50,7 @@ MaybeError ValidatePipelineLayoutDescriptor(DeviceBase* device, descriptor->bindGroupLayouts[i]->GetBindingCountInfo()); } - DAWN_TRY(ValidateBindingCounts(bindingCounts)); + DAWN_TRY(ValidateBindingCounts(device->GetLimits(), bindingCounts)); return {}; } diff --git a/src/dawn/native/d3d12/AdapterD3D12.cpp b/src/dawn/native/d3d12/AdapterD3D12.cpp index 4381234ea6..d05061e5b4 100644 --- a/src/dawn/native/d3d12/AdapterD3D12.cpp +++ b/src/dawn/native/d3d12/AdapterD3D12.cpp @@ -269,36 +269,45 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) { // CBVs/UAVs/SRVs for bind group are a root descriptor table // - (maxBindGroups) // Samplers for each bind group are a root descriptor table - // - (2 * maxDynamicBuffers) - // Each dynamic buffer is a root descriptor + // - dynamic uniform buffers - root descriptor + // - dynamic storage buffers - root descriptor plus a root constant for the size // RESERVED: // - 3 = max of: // - 2 root constants for the baseVertex/baseInstance constants. // - 3 root constants for num workgroups X, Y, Z - // - 4 root constants (kMaxDynamicStorageBuffersPerPipelineLayout) for dynamic storage - // buffer lengths. - static constexpr uint32_t kReservedSlots = 7; + static constexpr uint32_t kReservedSlots = 3; + + // Costs: + // - bind group: 2 = 1 cbv/uav/srv table + 1 sampler table + // - dynamic uniform buffer: 2 slots for a root descriptor + // - dynamic storage buffer: 3 slots for a root descriptor + root constant // Available slots after base limits considered. uint32_t availableRootSignatureSlots = - kMaxRootSignatureSize - kReservedSlots - - 2 * (limits->v1.maxBindGroups + limits->v1.maxDynamicUniformBuffersPerPipelineLayout + - limits->v1.maxDynamicStorageBuffersPerPipelineLayout); + kMaxRootSignatureSize - kReservedSlots - 2 * limits->v1.maxBindGroups - + 2 * limits->v1.maxDynamicUniformBuffersPerPipelineLayout - + 3 * limits->v1.maxDynamicStorageBuffersPerPipelineLayout; - // Because we need either: - // - 1 cbv/uav/srv table + 1 sampler table - // - 2 slots for a root descriptor - uint32_t availableDynamicBufferOrBindGroup = availableRootSignatureSlots / 2; + while (availableRootSignatureSlots >= 2) { + // Start by incrementing maxDynamicStorageBuffersPerPipelineLayout since the + // default is just 4 and developers likely want more. This scheme currently + // gets us to 8. + if (availableRootSignatureSlots >= 3) { + limits->v1.maxDynamicStorageBuffersPerPipelineLayout += 1; + availableRootSignatureSlots -= 3; + } + if (availableRootSignatureSlots >= 2) { + limits->v1.maxBindGroups += 1; + availableRootSignatureSlots -= 2; + } + if (availableRootSignatureSlots >= 2) { + limits->v1.maxDynamicUniformBuffersPerPipelineLayout += 1; + availableRootSignatureSlots -= 2; + } + } - // We can either have a bind group, a dyn uniform buffer or a dyn storage buffer. - // Distribute evenly. - limits->v1.maxBindGroups += availableDynamicBufferOrBindGroup / 3; - limits->v1.maxDynamicUniformBuffersPerPipelineLayout += availableDynamicBufferOrBindGroup / 3; - limits->v1.maxDynamicStorageBuffersPerPipelineLayout += - (availableDynamicBufferOrBindGroup - 2 * (availableDynamicBufferOrBindGroup / 3)); - - ASSERT(2 * (limits->v1.maxBindGroups + limits->v1.maxDynamicUniformBuffersPerPipelineLayout + - limits->v1.maxDynamicStorageBuffersPerPipelineLayout) <= + ASSERT(2 * limits->v1.maxBindGroups + 2 * limits->v1.maxDynamicUniformBuffersPerPipelineLayout + + 3 * limits->v1.maxDynamicStorageBuffersPerPipelineLayout <= kMaxRootSignatureSize - kReservedSlots); // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-attributes-numthreads diff --git a/src/dawn/native/d3d12/BindGroupD3D12.h b/src/dawn/native/d3d12/BindGroupD3D12.h index 243374a0bf..3faafba6ca 100644 --- a/src/dawn/native/d3d12/BindGroupD3D12.h +++ b/src/dawn/native/d3d12/BindGroupD3D12.h @@ -47,8 +47,7 @@ class BindGroup final : public BindGroupBase, public PlacementAllocated { void SetSamplerAllocationEntry(Ref entry); - using DynamicStorageBufferLengths = - ityp::stack_vec; + using DynamicStorageBufferLengths = ityp::stack_vec; const DynamicStorageBufferLengths& GetDynamicStorageBufferLengths() const; private: diff --git a/src/dawn/native/d3d12/CommandBufferD3D12.cpp b/src/dawn/native/d3d12/CommandBufferD3D12.cpp index ba8cfa1474..1ddead198a 100644 --- a/src/dawn/native/d3d12/CommandBufferD3D12.cpp +++ b/src/dawn/native/d3d12/CommandBufferD3D12.cpp @@ -442,7 +442,7 @@ class BindGroupStateTracker : public BindGroupTrackerBase { for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) { BindGroup* group = ToBackend(mBindGroups[index]); ApplyBindGroup(commandList, ToBackend(mPipelineLayout), index, group, - mDynamicOffsetCounts[index], mDynamicOffsets[index].data()); + mDynamicOffsets[index]); } AfterApply(); @@ -484,10 +484,7 @@ class BindGroupStateTracker : public BindGroupTrackerBase { const PipelineLayout* pipelineLayout, BindGroupIndex index, BindGroup* group, - uint32_t dynamicOffsetCountIn, - const uint64_t* dynamicOffsetsIn) { - ityp::span dynamicOffsets(dynamicOffsetsIn, - BindingIndex(dynamicOffsetCountIn)); + const ityp::vector& dynamicOffsets) { ASSERT(dynamicOffsets.size() == group->GetLayout()->GetDynamicBufferCount()); // Usually, the application won't set the same offsets many times, diff --git a/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp b/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp index aca85e83f8..aaab8c6262 100644 --- a/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp +++ b/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp @@ -139,6 +139,7 @@ MaybeError PipelineLayout::Initialize() { // Init root descriptors in root signatures for dynamic buffer bindings. // These are packed at the beginning of the layout binding info. + mDynamicRootParameterIndices[group].resize(bindGroupLayout->GetDynamicBufferCount()); for (BindingIndex dynamicBindingIndex{0}; dynamicBindingIndex < bindGroupLayout->GetDynamicBufferCount(); ++dynamicBindingIndex) { @@ -224,8 +225,6 @@ MaybeError PipelineLayout::Initialize() { ASSERT(mDynamicStorageBufferLengthInfo[group].bindingAndRegisterOffsets.size() == bgl->GetBindingCountInfo().dynamicStorageBufferCount); } - ASSERT(dynamicStorageBufferLengthsShaderRegisterOffset <= - kMaxDynamicStorageBuffersPerPipelineLayout); if (dynamicStorageBufferLengthsShaderRegisterOffset > 0) { D3D12_ROOT_PARAMETER dynamicStorageBufferLengthConstants{}; @@ -322,7 +321,6 @@ PipelineLayout::GetDynamicStorageBufferLengthInfo() const { uint32_t PipelineLayout::GetDynamicRootParameterIndex(BindGroupIndex group, BindingIndex bindingIndex) const { ASSERT(group < kMaxBindGroupsTyped); - ASSERT(bindingIndex < kMaxDynamicBuffersPerPipelineLayoutTyped); ASSERT(GetBindGroupLayout(group)->GetBindingInfo(bindingIndex).buffer.hasDynamicOffset); ASSERT(GetBindGroupLayout(group)->GetBindingInfo(bindingIndex).visibility != wgpu::ShaderStage::None); diff --git a/src/dawn/native/d3d12/PipelineLayoutD3D12.h b/src/dawn/native/d3d12/PipelineLayoutD3D12.h index 204741265c..4d30722bfc 100644 --- a/src/dawn/native/d3d12/PipelineLayoutD3D12.h +++ b/src/dawn/native/d3d12/PipelineLayoutD3D12.h @@ -19,6 +19,7 @@ #include "dawn/common/Constants.h" #include "dawn/common/ityp_array.h" +#include "dawn/common/ityp_vector.h" #include "dawn/native/BindingInfo.h" #include "dawn/native/PipelineLayout.h" #include "dawn/native/d3d12/d3d12_platform.h" @@ -91,9 +92,7 @@ class PipelineLayout final : public PipelineLayoutBase { ityp::array mCbvUavSrvRootParameterInfo; ityp::array mSamplerRootParameterInfo; - ityp::array, - kMaxBindGroups> + ityp::array, kMaxBindGroups> mDynamicRootParameterIndices; DynamicStorageBufferLengthInfo mDynamicStorageBufferLengthInfo; uint32_t mFirstIndexOffsetParameterIndex; diff --git a/src/dawn/native/metal/BackendMTL.mm b/src/dawn/native/metal/BackendMTL.mm index c2811c180a..5a1b166aa8 100644 --- a/src/dawn/native/metal/BackendMTL.mm +++ b/src/dawn/native/metal/BackendMTL.mm @@ -716,10 +716,10 @@ class Adapter : public AdapterBase { // buffers, 128 textures, and 16 samplers. Mac GPU families // with tier 2 argument buffers support 500000 buffers and // textures, and 1024 unique samplers - limits->v1.maxDynamicUniformBuffersPerPipelineLayout = - limits->v1.maxUniformBuffersPerShaderStage; - limits->v1.maxDynamicStorageBuffersPerPipelineLayout = - limits->v1.maxStorageBuffersPerShaderStage; + // Without argument buffers, we have slots [0 -> 29], inclusive, which is 30 total. + // 8 are used by maxVertexBuffers. + limits->v1.maxDynamicUniformBuffersPerPipelineLayout = 11u; + limits->v1.maxDynamicStorageBuffersPerPipelineLayout = 11u; // The WebGPU limit is the limit across all vertex buffers, combined. limits->v1.maxVertexAttributes = diff --git a/src/dawn/native/metal/CommandBufferMTL.mm b/src/dawn/native/metal/CommandBufferMTL.mm index 5f81387b2d..a0a83ee572 100644 --- a/src/dawn/native/metal/CommandBufferMTL.mm +++ b/src/dawn/native/metal/CommandBufferMTL.mm @@ -442,8 +442,7 @@ class BindGroupTracker : public BindGroupTrackerBase { void Apply(Encoder encoder) { BeforeApply(); for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) { - ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]), - mDynamicOffsetCounts[index], mDynamicOffsets[index].data(), + ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]), mDynamicOffsets[index], ToBackend(mPipelineLayout)); } AfterApply(); @@ -458,11 +457,8 @@ class BindGroupTracker : public BindGroupTrackerBase { id compute, BindGroupIndex index, BindGroup* group, - uint32_t dynamicOffsetCount, - uint64_t* dynamicOffsets, + const ityp::vector& dynamicOffsets, PipelineLayout* pipelineLayout) { - uint32_t currentDynamicBufferIndex = 0; - // TODO(crbug.com/dawn/854): Maintain buffers and offsets arrays in BindGroup // so that we only have to do one setVertexBuffers and one setFragmentBuffers // call here. @@ -504,8 +500,8 @@ class BindGroupTracker : public BindGroupTrackerBase { // TODO(crbug.com/dawn/854): Record bound buffer status to use // setBufferOffset to achieve better performance. if (bindingInfo.buffer.hasDynamicOffset) { - offset += dynamicOffsets[currentDynamicBufferIndex]; - currentDynamicBufferIndex++; + // Dynamic buffers are packed at the front of BindingIndices. + offset += dynamicOffsets[bindingIndex]; } if (hasVertStage) { diff --git a/src/dawn/native/opengl/CommandBufferGL.cpp b/src/dawn/native/opengl/CommandBufferGL.cpp index f363ba9c7c..d8a32e4430 100644 --- a/src/dawn/native/opengl/CommandBufferGL.cpp +++ b/src/dawn/native/opengl/CommandBufferGL.cpp @@ -231,8 +231,7 @@ class BindGroupTracker : public BindGroupTrackerBase { void Apply(const OpenGLFunctions& gl) { BeforeApply(); for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) { - ApplyBindGroup(gl, index, mBindGroups[index], mDynamicOffsetCounts[index], - mDynamicOffsets[index].data()); + ApplyBindGroup(gl, index, mBindGroups[index], mDynamicOffsets[index]); } AfterApply(); } @@ -241,10 +240,8 @@ class BindGroupTracker : public BindGroupTrackerBase { void ApplyBindGroup(const OpenGLFunctions& gl, BindGroupIndex index, BindGroupBase* group, - uint32_t dynamicOffsetCount, - uint64_t* dynamicOffsets) { + const ityp::vector& dynamicOffsets) { const auto& indices = ToBackend(mPipelineLayout)->GetBindingIndexInfo()[index]; - uint32_t currentDynamicOffsetIndex = 0; for (BindingIndex bindingIndex{0}; bindingIndex < group->GetLayout()->GetBindingCount(); ++bindingIndex) { @@ -268,8 +265,8 @@ class BindGroupTracker : public BindGroupTrackerBase { GLuint offset = binding.offset; if (bindingInfo.buffer.hasDynamicOffset) { - offset += dynamicOffsets[currentDynamicOffsetIndex]; - ++currentDynamicOffsetIndex; + // Dynamic buffers are packed at the front of BindingIndices. + offset += dynamicOffsets[bindingIndex]; } GLenum target; diff --git a/src/dawn/native/vulkan/CommandBufferVk.cpp b/src/dawn/native/vulkan/CommandBufferVk.cpp index 6121e9c9d2..54e197d5c9 100644 --- a/src/dawn/native/vulkan/CommandBufferVk.cpp +++ b/src/dawn/native/vulkan/CommandBufferVk.cpp @@ -141,12 +141,12 @@ class DescriptorSetTracker : public BindGroupTrackerBase { BeforeApply(); for (BindGroupIndex dirtyIndex : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) { VkDescriptorSet set = ToBackend(mBindGroups[dirtyIndex])->GetHandle(); + uint32_t count = static_cast(mDynamicOffsets[dirtyIndex].size()); const uint32_t* dynamicOffset = - mDynamicOffsetCounts[dirtyIndex] > 0 ? mDynamicOffsets[dirtyIndex].data() : nullptr; - device->fn.CmdBindDescriptorSets(recordingContext->commandBuffer, bindPoint, - ToBackend(mPipelineLayout)->GetHandle(), - static_cast(dirtyIndex), 1, &*set, - mDynamicOffsetCounts[dirtyIndex], dynamicOffset); + count > 0 ? mDynamicOffsets[dirtyIndex].data() : nullptr; + device->fn.CmdBindDescriptorSets( + recordingContext->commandBuffer, bindPoint, ToBackend(mPipelineLayout)->GetHandle(), + static_cast(dirtyIndex), 1, &*set, count, dynamicOffset); } AfterApply(); } diff --git a/src/dawn/tests/end2end/MaxLimitTests.cpp b/src/dawn/tests/end2end/MaxLimitTests.cpp index 19dd5d36f6..2e3320e4a6 100644 --- a/src/dawn/tests/end2end/MaxLimitTests.cpp +++ b/src/dawn/tests/end2end/MaxLimitTests.cpp @@ -15,10 +15,12 @@ #include #include #include +#include #include "dawn/common/Math.h" #include "dawn/common/Platform.h" #include "dawn/tests/DawnTest.h" +#include "dawn/utils/ComboRenderPipelineDescriptor.h" #include "dawn/utils/WGPUHelpers.h" class MaxLimitTests : public DawnTest { @@ -240,6 +242,156 @@ TEST_P(MaxLimitTests, MaxBufferBindingSize) { } } +// Test using the maximum number of dynamic uniform and storage buffers +TEST_P(MaxLimitTests, MaxDynamicBuffers) { + wgpu::Limits limits = GetSupportedLimits().limits; + + std::vector bglEntries; + std::vector bgEntries; + + // Binding number counter which is bumped as we create bind group layout + // entries. + uint32_t bindingNumber = 1u; + + // Lambda to create a buffer. The binding number is written at an offset of + // 256 bytes. The test binds at a 256-byte dynamic offset and checks that the + // contents of the buffer are equal to the binding number. + std::vector bufferData(1 + 256 / sizeof(uint32_t)); + auto MakeBuffer = [&](wgpu::BufferUsage usage) { + *bufferData.rbegin() = bindingNumber; + return utils::CreateBufferFromData(device, bufferData.data(), + sizeof(uint32_t) * bufferData.size(), usage); + }; + + // Create as many dynamic uniform buffers as the limits allow. + for (uint32_t i = 0u; i < limits.maxDynamicUniformBuffersPerPipelineLayout && + i < 2 * limits.maxUniformBuffersPerShaderStage; + ++i) { + wgpu::Buffer buffer = MakeBuffer(wgpu::BufferUsage::Uniform); + + bglEntries.push_back(utils::BindingLayoutEntryInitializationHelper{ + bindingNumber, + // When we surpass the per-stage limit, switch to the fragment shader. + i < limits.maxUniformBuffersPerShaderStage ? wgpu::ShaderStage::Vertex + : wgpu::ShaderStage::Fragment, + wgpu::BufferBindingType::Uniform, true}); + bgEntries.push_back( + utils::BindingInitializationHelper(bindingNumber, buffer, 0, sizeof(uint32_t)) + .GetAsBinding()); + + ++bindingNumber; + } + + // Create as many dynamic storage buffers as the limits allow. + for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout && + i < 2 * limits.maxStorageBuffersPerShaderStage; + ++i) { + wgpu::Buffer buffer = MakeBuffer(wgpu::BufferUsage::Storage); + + bglEntries.push_back(utils::BindingLayoutEntryInitializationHelper{ + bindingNumber, + // When we surpass the per-stage limit, switch to the fragment shader. + i < limits.maxStorageBuffersPerShaderStage ? wgpu::ShaderStage::Vertex + : wgpu::ShaderStage::Fragment, + wgpu::BufferBindingType::ReadOnlyStorage, true}); + bgEntries.push_back( + utils::BindingInitializationHelper(bindingNumber, buffer, 0, sizeof(uint32_t)) + .GetAsBinding()); + + ++bindingNumber; + } + + // Create the bind group layout. + wgpu::BindGroupLayoutDescriptor bglDesc; + bglDesc.entryCount = static_cast(bglEntries.size()); + bglDesc.entries = bglEntries.data(); + wgpu::BindGroupLayout bgl = device.CreateBindGroupLayout(&bglDesc); + + // Create the bind group. + wgpu::BindGroupDescriptor bgDesc; + bgDesc.layout = bgl; + bgDesc.entryCount = static_cast(bgEntries.size()); + bgDesc.entries = bgEntries.data(); + wgpu::BindGroup bindGroup = device.CreateBindGroup(&bgDesc); + + // Generate binding declarations at the top of the the shader. + std::ostringstream wgslShader; + for (const auto& binding : bglEntries) { + if (binding.buffer.type == wgpu::BufferBindingType::Uniform) { + wgslShader << "@group(0) @binding(" << binding.binding << ") var b" + << binding.binding << ": u32;\n"; + } else if (binding.buffer.type == wgpu::BufferBindingType::ReadOnlyStorage) { + wgslShader << "@group(0) @binding(" << binding.binding << ") var b" + << binding.binding << ": u32;\n"; + } + } + + // Generate a vertex shader which rasterizes primitives outside the viewport + // if the bound buffer contents are not expected. + wgslShader << "@vertex fn vert_main() -> @builtin(position) vec4f {\n"; + for (const auto& binding : bglEntries) { + if (binding.visibility == wgpu::ShaderStage::Vertex) { + // If the value is not what is expected, return a vertex that will be clipped. + wgslShader << " if (b" << binding.binding << " != " << binding.binding + << "u) { return vec4f(10.0, 10.0, 10.0, 1.0); }\n"; + } + } + wgslShader << " return vec4f(0.0, 0.0, 0.5, 1.0);\n"; + wgslShader << "}\n"; + + // Generate a fragment shader which discards fragments if the bound buffer + // contents are not expected. + wgslShader << "@fragment fn frag_main() -> @location(0) u32 {\n"; + for (const auto& binding : bglEntries) { + if (binding.visibility == wgpu::ShaderStage::Fragment) { + // If the value is not what is expected, discard. + wgslShader << " if (b" << binding.binding << " != " << binding.binding + << "u) { discard; }\n"; + } + } + wgslShader << " return 1u;\n"; + wgslShader << "}\n"; + + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, wgslShader.str().c_str()); + + // Create a render target. Its contents will be 1 if the test passes. + wgpu::TextureDescriptor renderTargetDesc; + renderTargetDesc.size = {1, 1}; + renderTargetDesc.format = wgpu::TextureFormat::R8Uint; + renderTargetDesc.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::RenderAttachment; + wgpu::Texture renderTarget = device.CreateTexture(&renderTargetDesc); + + utils::ComboRenderPipelineDescriptor pipelineDesc; + pipelineDesc.layout = utils::MakePipelineLayout(device, {bgl}); + pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList; + pipelineDesc.vertex.module = shaderModule; + pipelineDesc.vertex.entryPoint = "vert_main"; + pipelineDesc.cFragment.module = shaderModule; + pipelineDesc.cFragment.entryPoint = "frag_main"; + pipelineDesc.cTargets[0].format = renderTargetDesc.format; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&pipelineDesc); + + utils::ComboRenderPassDescriptor rpDesc({renderTarget.CreateView()}); + rpDesc.cColorAttachments[0].clearValue = {}; + rpDesc.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear; + rpDesc.cColorAttachments[0].storeOp = wgpu::StoreOp::Store; + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&rpDesc); + + // Bind the bind group with all resources at a 256-byte dynamic offset, and draw. + std::vector dynamicOffsets(bglEntries.size(), 256u); + pass.SetBindGroup(0, bindGroup, dynamicOffsets.size(), dynamicOffsets.data()); + pass.SetPipeline(pipeline); + pass.Draw(1); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + uint32_t expected = 1u; + EXPECT_TEXTURE_EQ(&expected, renderTarget, {0, 0}, {1, 1}); +} + DAWN_INSTANTIATE_TEST(MaxLimitTests, D3D12Backend(), MetalBackend(), diff --git a/src/dawn/tests/end2end/PipelineLayoutTests.cpp b/src/dawn/tests/end2end/PipelineLayoutTests.cpp index b9a61fb060..2b5a9c79f2 100644 --- a/src/dawn/tests/end2end/PipelineLayoutTests.cpp +++ b/src/dawn/tests/end2end/PipelineLayoutTests.cpp @@ -28,7 +28,8 @@ TEST_P(PipelineLayoutTests, DynamicBuffersOverflow) { wgpu::BindGroupLayout bglA; { std::vector entries; - for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; i++) { + for (uint32_t i = 0; + i < GetSupportedLimits().limits.maxDynamicStorageBuffersPerPipelineLayout; i++) { wgpu::BindGroupLayoutEntry entry; entry.binding = i; entry.visibility = wgpu::ShaderStage::Compute; diff --git a/src/dawn/tests/unittests/validation/BindGroupValidationTests.cpp b/src/dawn/tests/unittests/validation/BindGroupValidationTests.cpp index 2b4fdb0719..4e8c069d36 100644 --- a/src/dawn/tests/unittests/validation/BindGroupValidationTests.cpp +++ b/src/dawn/tests/unittests/validation/BindGroupValidationTests.cpp @@ -1476,22 +1476,24 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) { std::vector maxStorageDB; std::vector maxReadonlyStorageDB; + wgpu::Limits limits = GetSupportedLimits().limits; + // In this test, we use all the same shader stage. Ensure that this does not exceed the // per-stage limit. - static_assert(kMaxDynamicUniformBuffersPerPipelineLayout <= kMaxUniformBuffersPerShaderStage); - static_assert(kMaxDynamicStorageBuffersPerPipelineLayout <= kMaxStorageBuffersPerShaderStage); + ASSERT(limits.maxDynamicUniformBuffersPerPipelineLayout <= kMaxUniformBuffersPerShaderStage); + ASSERT(limits.maxDynamicStorageBuffersPerPipelineLayout <= kMaxStorageBuffersPerShaderStage); - for (uint32_t i = 0; i < kMaxDynamicUniformBuffersPerPipelineLayout; ++i) { + for (uint32_t i = 0; i < limits.maxDynamicUniformBuffersPerPipelineLayout; ++i) { maxUniformDB.push_back(utils::BindingLayoutEntryInitializationHelper( i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Uniform, true)); } - for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; ++i) { + for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout; ++i) { maxStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper( i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage, true)); } - for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; ++i) { + for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout; ++i) { maxReadonlyStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper( i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::ReadOnlyStorage, true)); } @@ -1561,7 +1563,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) { // Check dynamic uniform buffers exceed maximum in bind group layout. { maxUniformDB.push_back(utils::BindingLayoutEntryInitializationHelper( - kMaxDynamicUniformBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment, + limits.maxDynamicUniformBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment, wgpu::BufferBindingType::Uniform, true)); TestCreateBindGroupLayout(maxUniformDB.data(), maxUniformDB.size(), false); } @@ -1569,7 +1571,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) { // Check dynamic storage buffers exceed maximum in bind group layout. { maxStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper( - kMaxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment, + limits.maxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment, wgpu::BufferBindingType::Storage, true)); TestCreateBindGroupLayout(maxStorageDB.data(), maxStorageDB.size(), false); } @@ -1577,7 +1579,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) { // Check dynamic readonly storage buffers exceed maximum in bind group layout. { maxReadonlyStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper( - kMaxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment, + limits.maxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment, wgpu::BufferBindingType::ReadOnlyStorage, true)); TestCreateBindGroupLayout(maxReadonlyStorageDB.data(), maxReadonlyStorageDB.size(), false); }