Support higher limits for maxDynamicUniform/StorageBuffers

The higher tier currently supports all D3D12 devices, all Metal,
and most Vulkan devices.

Bug: dawn:685
Change-Id: I5bcb778b92a073c9c1af943acee193073c0741ff
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121101
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Austin Eng <enga@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Austin Eng 2023-02-25 02:10:21 +00:00 committed by Dawn LUCI CQ
parent 8ef7311413
commit 2ea4aac080
20 changed files with 247 additions and 119 deletions

View File

@ -38,10 +38,6 @@ static constexpr uint32_t kMaxStorageBuffersPerShaderStage = 8;
static constexpr uint32_t kMaxStorageTexturesPerShaderStage = 4;
static constexpr uint32_t kMaxUniformBuffersPerShaderStage = 12;
// Per pipeline layout limits
static constexpr uint32_t kMaxDynamicUniformBuffersPerPipelineLayout = 8u;
static constexpr uint32_t kMaxDynamicStorageBuffersPerPipelineLayout = 4u;
// Indirect command sizes
static constexpr uint64_t kDispatchIndirectSize = 3 * sizeof(uint32_t);
static constexpr uint64_t kDrawIndirectSize = 4 * sizeof(uint32_t);

View File

@ -70,12 +70,6 @@ MaybeError AdapterBase::Initialize() {
std::min(mLimits.v1.maxStorageTexturesPerShaderStage, kMaxStorageTexturesPerShaderStage);
mLimits.v1.maxUniformBuffersPerShaderStage =
std::min(mLimits.v1.maxUniformBuffersPerShaderStage, kMaxUniformBuffersPerShaderStage);
mLimits.v1.maxDynamicUniformBuffersPerPipelineLayout =
std::min(mLimits.v1.maxDynamicUniformBuffersPerPipelineLayout,
kMaxDynamicUniformBuffersPerPipelineLayout);
mLimits.v1.maxDynamicStorageBuffersPerPipelineLayout =
std::min(mLimits.v1.maxDynamicStorageBuffersPerPipelineLayout,
kMaxDynamicStorageBuffersPerPipelineLayout);
return {};
}

View File

@ -271,7 +271,8 @@ MaybeError ValidateBindGroupLayoutDescriptor(DeviceBase* device,
bindingsSet.insert(bindingNumber);
}
DAWN_TRY_CONTEXT(ValidateBindingCounts(bindingCounts), "validating binding counts");
DAWN_TRY_CONTEXT(ValidateBindingCounts(device->GetLimits(), bindingCounts),
"validating binding counts");
return {};
}

View File

@ -15,6 +15,7 @@
#ifndef SRC_DAWN_NATIVE_BINDGROUPTRACKER_H_
#define SRC_DAWN_NATIVE_BINDGROUPTRACKER_H_
#include <algorithm>
#include <array>
#include <bitset>
@ -54,8 +55,9 @@ class BindGroupTrackerBase {
}
mBindGroups[index] = bindGroup;
mDynamicOffsetCounts[index] = dynamicOffsetCount;
SetDynamicOffsets(mDynamicOffsets[index].data(), dynamicOffsetCount, dynamicOffsets);
mDynamicOffsets[index].resize(BindingIndex(dynamicOffsetCount));
std::copy(dynamicOffsets, dynamicOffsets + dynamicOffsetCount,
mDynamicOffsets[index].begin());
}
void OnSetPipeline(PipelineBase* pipeline) { mPipelineLayout = pipeline->GetLayout(); }
@ -105,10 +107,7 @@ class BindGroupTrackerBase {
BindGroupLayoutMask mDirtyBindGroupsObjectChangedOrIsDynamic = 0;
BindGroupLayoutMask mBindGroupLayoutsMask = 0;
ityp::array<BindGroupIndex, BindGroupBase*, kMaxBindGroups> mBindGroups = {};
ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mDynamicOffsetCounts = {};
ityp::array<BindGroupIndex,
std::array<DynamicOffset, kMaxDynamicBuffersPerPipelineLayout>,
kMaxBindGroups>
ityp::array<BindGroupIndex, ityp::vector<BindingIndex, DynamicOffset>, kMaxBindGroups>
mDynamicOffsets = {};
// |mPipelineLayout| is the current pipeline layout set on the command buffer.
@ -116,25 +115,6 @@ class BindGroupTrackerBase {
// to the bind group bindings.
PipelineLayoutBase* mPipelineLayout = nullptr;
PipelineLayoutBase* mLastAppliedPipelineLayout = nullptr;
private:
// We have two overloads here because offsets in Vulkan are uint32_t but uint64_t
// in other backends.
static void SetDynamicOffsets(uint64_t* data,
uint32_t dynamicOffsetCount,
uint32_t* dynamicOffsets) {
for (uint32_t i = 0; i < dynamicOffsetCount; ++i) {
data[i] = static_cast<uint64_t>(dynamicOffsets[i]);
}
}
static void SetDynamicOffsets(uint32_t* data,
uint32_t dynamicOffsetCount,
uint32_t* dynamicOffsets) {
if (dynamicOffsetCount > 0) {
memcpy(data, dynamicOffsets, sizeof(uint32_t) * dynamicOffsetCount);
}
}
};
} // namespace dawn::native

View File

@ -15,6 +15,7 @@
#include "dawn/native/BindingInfo.h"
#include "dawn/native/ChainUtils_autogen.h"
#include "dawn/native/Limits.h"
namespace dawn::native {
@ -93,18 +94,22 @@ void AccumulateBindingCounts(BindingCounts* bindingCounts, const BindingCounts&
}
}
MaybeError ValidateBindingCounts(const BindingCounts& bindingCounts) {
MaybeError ValidateBindingCounts(const CombinedLimits& limits, const BindingCounts& bindingCounts) {
DAWN_INVALID_IF(
bindingCounts.dynamicUniformBufferCount > kMaxDynamicUniformBuffersPerPipelineLayout,
bindingCounts.dynamicUniformBufferCount >
limits.v1.maxDynamicUniformBuffersPerPipelineLayout,
"The number of dynamic uniform buffers (%u) exceeds the maximum per-pipeline-layout "
"limit (%u).",
bindingCounts.dynamicUniformBufferCount, kMaxDynamicUniformBuffersPerPipelineLayout);
bindingCounts.dynamicUniformBufferCount,
limits.v1.maxDynamicUniformBuffersPerPipelineLayout);
DAWN_INVALID_IF(
bindingCounts.dynamicStorageBufferCount > kMaxDynamicStorageBuffersPerPipelineLayout,
bindingCounts.dynamicStorageBufferCount >
limits.v1.maxDynamicStorageBuffersPerPipelineLayout,
"The number of dynamic storage buffers (%u) exceeds the maximum per-pipeline-layout "
"limit (%u).",
bindingCounts.dynamicStorageBufferCount, kMaxDynamicStorageBuffersPerPipelineLayout);
bindingCounts.dynamicStorageBufferCount,
limits.v1.maxDynamicStorageBuffersPerPipelineLayout);
for (SingleShaderStage stage : IterateStages(kAllStages)) {
DAWN_INVALID_IF(

View File

@ -29,13 +29,6 @@
namespace dawn::native {
// Not a real WebGPU limit, but the sum of the two limits is useful for internal optimizations.
static constexpr uint32_t kMaxDynamicBuffersPerPipelineLayout =
kMaxDynamicUniformBuffersPerPipelineLayout + kMaxDynamicStorageBuffersPerPipelineLayout;
static constexpr BindingIndex kMaxDynamicBuffersPerPipelineLayoutTyped =
BindingIndex(kMaxDynamicBuffersPerPipelineLayout);
// Not a real WebGPU limit, but used to optimize parts of Dawn which expect valid usage of the
// API. There should never be more bindings than the max per stage, for each stage.
static constexpr uint32_t kMaxBindingsPerPipelineLayout =
@ -87,9 +80,11 @@ struct BindingCounts {
PerStage<PerStageBindingCounts> perStage;
};
struct CombinedLimits;
void IncrementBindingCounts(BindingCounts* bindingCounts, const BindGroupLayoutEntry& entry);
void AccumulateBindingCounts(BindingCounts* bindingCounts, const BindingCounts& rhs);
MaybeError ValidateBindingCounts(const BindingCounts& bindingCounts);
MaybeError ValidateBindingCounts(const CombinedLimits& limits, const BindingCounts& bindingCounts);
// For buffer size validation
using RequiredBufferSizes = ityp::array<BindGroupIndex, std::vector<uint64_t>, kMaxBindGroups>;

View File

@ -32,6 +32,13 @@
#define LIMITS_MAX_BUFFER_SIZE(X) \
X(Maximum, maxBufferSize, 0x10000000, 0x40000000, 0x80000000)
// Tiers for limits related to resource bindings.
// TODO(crbug.com/dawn/685): Define these better. For now, use two tiers where one
// offers slightly better than default limits.
#define LIMITS_RESOURCE_BINDINGS(X) \
X(Maximum, maxDynamicUniformBuffersPerPipelineLayout, 8, 10) \
X(Maximum, maxDynamicStorageBuffersPerPipelineLayout, 4, 8) \
// TODO(crbug.com/dawn/685):
// These limits don't have tiers yet. Define two tiers with the same values since the macros
// in this file expect more than one tier.
@ -42,8 +49,6 @@
X(Maximum, maxTextureArrayLayers, 256, 256) \
X(Maximum, maxBindGroups, 4, 4) \
X(Maximum, maxBindingsPerBindGroup, 640, 640) \
X(Maximum, maxDynamicUniformBuffersPerPipelineLayout, 8, 8) \
X(Maximum, maxDynamicStorageBuffersPerPipelineLayout, 4, 4) \
X(Maximum, maxSampledTexturesPerShaderStage, 16, 16) \
X(Maximum, maxSamplersPerShaderStage, 16, 16) \
X(Maximum, maxStorageBuffersPerShaderStage, 8, 8) \
@ -71,12 +76,14 @@
X(LIMITS_WORKGROUP_STORAGE_SIZE) \
X(LIMITS_STORAGE_BUFFER_BINDING_SIZE) \
X(LIMITS_MAX_BUFFER_SIZE) \
X(LIMITS_RESOURCE_BINDINGS) \
X(LIMITS_OTHER)
#define LIMITS(X) \
LIMITS_WORKGROUP_STORAGE_SIZE(X) \
LIMITS_STORAGE_BUFFER_BINDING_SIZE(X) \
LIMITS_MAX_BUFFER_SIZE(X) \
LIMITS_RESOURCE_BINDINGS(X) \
LIMITS_OTHER(X)
namespace dawn::native {

View File

@ -50,7 +50,7 @@ MaybeError ValidatePipelineLayoutDescriptor(DeviceBase* device,
descriptor->bindGroupLayouts[i]->GetBindingCountInfo());
}
DAWN_TRY(ValidateBindingCounts(bindingCounts));
DAWN_TRY(ValidateBindingCounts(device->GetLimits(), bindingCounts));
return {};
}

View File

@ -269,36 +269,45 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
// CBVs/UAVs/SRVs for bind group are a root descriptor table
// - (maxBindGroups)
// Samplers for each bind group are a root descriptor table
// - (2 * maxDynamicBuffers)
// Each dynamic buffer is a root descriptor
// - dynamic uniform buffers - root descriptor
// - dynamic storage buffers - root descriptor plus a root constant for the size
// RESERVED:
// - 3 = max of:
// - 2 root constants for the baseVertex/baseInstance constants.
// - 3 root constants for num workgroups X, Y, Z
// - 4 root constants (kMaxDynamicStorageBuffersPerPipelineLayout) for dynamic storage
// buffer lengths.
static constexpr uint32_t kReservedSlots = 7;
static constexpr uint32_t kReservedSlots = 3;
// Costs:
// - bind group: 2 = 1 cbv/uav/srv table + 1 sampler table
// - dynamic uniform buffer: 2 slots for a root descriptor
// - dynamic storage buffer: 3 slots for a root descriptor + root constant
// Available slots after base limits considered.
uint32_t availableRootSignatureSlots =
kMaxRootSignatureSize - kReservedSlots -
2 * (limits->v1.maxBindGroups + limits->v1.maxDynamicUniformBuffersPerPipelineLayout +
limits->v1.maxDynamicStorageBuffersPerPipelineLayout);
kMaxRootSignatureSize - kReservedSlots - 2 * limits->v1.maxBindGroups -
2 * limits->v1.maxDynamicUniformBuffersPerPipelineLayout -
3 * limits->v1.maxDynamicStorageBuffersPerPipelineLayout;
// Because we need either:
// - 1 cbv/uav/srv table + 1 sampler table
// - 2 slots for a root descriptor
uint32_t availableDynamicBufferOrBindGroup = availableRootSignatureSlots / 2;
while (availableRootSignatureSlots >= 2) {
// Start by incrementing maxDynamicStorageBuffersPerPipelineLayout since the
// default is just 4 and developers likely want more. This scheme currently
// gets us to 8.
if (availableRootSignatureSlots >= 3) {
limits->v1.maxDynamicStorageBuffersPerPipelineLayout += 1;
availableRootSignatureSlots -= 3;
}
if (availableRootSignatureSlots >= 2) {
limits->v1.maxBindGroups += 1;
availableRootSignatureSlots -= 2;
}
if (availableRootSignatureSlots >= 2) {
limits->v1.maxDynamicUniformBuffersPerPipelineLayout += 1;
availableRootSignatureSlots -= 2;
}
}
// We can either have a bind group, a dyn uniform buffer or a dyn storage buffer.
// Distribute evenly.
limits->v1.maxBindGroups += availableDynamicBufferOrBindGroup / 3;
limits->v1.maxDynamicUniformBuffersPerPipelineLayout += availableDynamicBufferOrBindGroup / 3;
limits->v1.maxDynamicStorageBuffersPerPipelineLayout +=
(availableDynamicBufferOrBindGroup - 2 * (availableDynamicBufferOrBindGroup / 3));
ASSERT(2 * (limits->v1.maxBindGroups + limits->v1.maxDynamicUniformBuffersPerPipelineLayout +
limits->v1.maxDynamicStorageBuffersPerPipelineLayout) <=
ASSERT(2 * limits->v1.maxBindGroups + 2 * limits->v1.maxDynamicUniformBuffersPerPipelineLayout +
3 * limits->v1.maxDynamicStorageBuffersPerPipelineLayout <=
kMaxRootSignatureSize - kReservedSlots);
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-attributes-numthreads

View File

@ -47,8 +47,7 @@ class BindGroup final : public BindGroupBase, public PlacementAllocated {
void SetSamplerAllocationEntry(Ref<SamplerHeapCacheEntry> entry);
using DynamicStorageBufferLengths =
ityp::stack_vec<uint32_t, uint32_t, kMaxDynamicStorageBuffersPerPipelineLayout>;
using DynamicStorageBufferLengths = ityp::stack_vec<uint32_t, uint32_t, 4u>;
const DynamicStorageBufferLengths& GetDynamicStorageBufferLengths() const;
private:

View File

@ -442,7 +442,7 @@ class BindGroupStateTracker : public BindGroupTrackerBase<false, uint64_t> {
for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
BindGroup* group = ToBackend(mBindGroups[index]);
ApplyBindGroup(commandList, ToBackend(mPipelineLayout), index, group,
mDynamicOffsetCounts[index], mDynamicOffsets[index].data());
mDynamicOffsets[index]);
}
AfterApply();
@ -484,10 +484,7 @@ class BindGroupStateTracker : public BindGroupTrackerBase<false, uint64_t> {
const PipelineLayout* pipelineLayout,
BindGroupIndex index,
BindGroup* group,
uint32_t dynamicOffsetCountIn,
const uint64_t* dynamicOffsetsIn) {
ityp::span<BindingIndex, const uint64_t> dynamicOffsets(dynamicOffsetsIn,
BindingIndex(dynamicOffsetCountIn));
const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
ASSERT(dynamicOffsets.size() == group->GetLayout()->GetDynamicBufferCount());
// Usually, the application won't set the same offsets many times,

View File

@ -139,6 +139,7 @@ MaybeError PipelineLayout::Initialize() {
// Init root descriptors in root signatures for dynamic buffer bindings.
// These are packed at the beginning of the layout binding info.
mDynamicRootParameterIndices[group].resize(bindGroupLayout->GetDynamicBufferCount());
for (BindingIndex dynamicBindingIndex{0};
dynamicBindingIndex < bindGroupLayout->GetDynamicBufferCount();
++dynamicBindingIndex) {
@ -224,8 +225,6 @@ MaybeError PipelineLayout::Initialize() {
ASSERT(mDynamicStorageBufferLengthInfo[group].bindingAndRegisterOffsets.size() ==
bgl->GetBindingCountInfo().dynamicStorageBufferCount);
}
ASSERT(dynamicStorageBufferLengthsShaderRegisterOffset <=
kMaxDynamicStorageBuffersPerPipelineLayout);
if (dynamicStorageBufferLengthsShaderRegisterOffset > 0) {
D3D12_ROOT_PARAMETER dynamicStorageBufferLengthConstants{};
@ -322,7 +321,6 @@ PipelineLayout::GetDynamicStorageBufferLengthInfo() const {
uint32_t PipelineLayout::GetDynamicRootParameterIndex(BindGroupIndex group,
BindingIndex bindingIndex) const {
ASSERT(group < kMaxBindGroupsTyped);
ASSERT(bindingIndex < kMaxDynamicBuffersPerPipelineLayoutTyped);
ASSERT(GetBindGroupLayout(group)->GetBindingInfo(bindingIndex).buffer.hasDynamicOffset);
ASSERT(GetBindGroupLayout(group)->GetBindingInfo(bindingIndex).visibility !=
wgpu::ShaderStage::None);

View File

@ -19,6 +19,7 @@
#include "dawn/common/Constants.h"
#include "dawn/common/ityp_array.h"
#include "dawn/common/ityp_vector.h"
#include "dawn/native/BindingInfo.h"
#include "dawn/native/PipelineLayout.h"
#include "dawn/native/d3d12/d3d12_platform.h"
@ -91,9 +92,7 @@ class PipelineLayout final : public PipelineLayoutBase {
ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mCbvUavSrvRootParameterInfo;
ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mSamplerRootParameterInfo;
ityp::array<BindGroupIndex,
ityp::array<BindingIndex, uint32_t, kMaxDynamicBuffersPerPipelineLayout>,
kMaxBindGroups>
ityp::array<BindGroupIndex, ityp::vector<BindingIndex, uint32_t>, kMaxBindGroups>
mDynamicRootParameterIndices;
DynamicStorageBufferLengthInfo mDynamicStorageBufferLengthInfo;
uint32_t mFirstIndexOffsetParameterIndex;

View File

@ -716,10 +716,10 @@ class Adapter : public AdapterBase {
// buffers, 128 textures, and 16 samplers. Mac GPU families
// with tier 2 argument buffers support 500000 buffers and
// textures, and 1024 unique samplers
limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
limits->v1.maxUniformBuffersPerShaderStage;
limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
limits->v1.maxStorageBuffersPerShaderStage;
// Without argument buffers, we have slots [0 -> 29], inclusive, which is 30 total.
// 8 are used by maxVertexBuffers.
limits->v1.maxDynamicUniformBuffersPerPipelineLayout = 11u;
limits->v1.maxDynamicStorageBuffersPerPipelineLayout = 11u;
// The WebGPU limit is the limit across all vertex buffers, combined.
limits->v1.maxVertexAttributes =

View File

@ -442,8 +442,7 @@ class BindGroupTracker : public BindGroupTrackerBase<true, uint64_t> {
void Apply(Encoder encoder) {
BeforeApply();
for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]),
mDynamicOffsetCounts[index], mDynamicOffsets[index].data(),
ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]), mDynamicOffsets[index],
ToBackend(mPipelineLayout));
}
AfterApply();
@ -458,11 +457,8 @@ class BindGroupTracker : public BindGroupTrackerBase<true, uint64_t> {
id<MTLComputeCommandEncoder> compute,
BindGroupIndex index,
BindGroup* group,
uint32_t dynamicOffsetCount,
uint64_t* dynamicOffsets,
const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets,
PipelineLayout* pipelineLayout) {
uint32_t currentDynamicBufferIndex = 0;
// TODO(crbug.com/dawn/854): Maintain buffers and offsets arrays in BindGroup
// so that we only have to do one setVertexBuffers and one setFragmentBuffers
// call here.
@ -504,8 +500,8 @@ class BindGroupTracker : public BindGroupTrackerBase<true, uint64_t> {
// TODO(crbug.com/dawn/854): Record bound buffer status to use
// setBufferOffset to achieve better performance.
if (bindingInfo.buffer.hasDynamicOffset) {
offset += dynamicOffsets[currentDynamicBufferIndex];
currentDynamicBufferIndex++;
// Dynamic buffers are packed at the front of BindingIndices.
offset += dynamicOffsets[bindingIndex];
}
if (hasVertStage) {

View File

@ -231,8 +231,7 @@ class BindGroupTracker : public BindGroupTrackerBase<false, uint64_t> {
void Apply(const OpenGLFunctions& gl) {
BeforeApply();
for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
ApplyBindGroup(gl, index, mBindGroups[index], mDynamicOffsetCounts[index],
mDynamicOffsets[index].data());
ApplyBindGroup(gl, index, mBindGroups[index], mDynamicOffsets[index]);
}
AfterApply();
}
@ -241,10 +240,8 @@ class BindGroupTracker : public BindGroupTrackerBase<false, uint64_t> {
void ApplyBindGroup(const OpenGLFunctions& gl,
BindGroupIndex index,
BindGroupBase* group,
uint32_t dynamicOffsetCount,
uint64_t* dynamicOffsets) {
const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
const auto& indices = ToBackend(mPipelineLayout)->GetBindingIndexInfo()[index];
uint32_t currentDynamicOffsetIndex = 0;
for (BindingIndex bindingIndex{0}; bindingIndex < group->GetLayout()->GetBindingCount();
++bindingIndex) {
@ -268,8 +265,8 @@ class BindGroupTracker : public BindGroupTrackerBase<false, uint64_t> {
GLuint offset = binding.offset;
if (bindingInfo.buffer.hasDynamicOffset) {
offset += dynamicOffsets[currentDynamicOffsetIndex];
++currentDynamicOffsetIndex;
// Dynamic buffers are packed at the front of BindingIndices.
offset += dynamicOffsets[bindingIndex];
}
GLenum target;

View File

@ -141,12 +141,12 @@ class DescriptorSetTracker : public BindGroupTrackerBase<true, uint32_t> {
BeforeApply();
for (BindGroupIndex dirtyIndex : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
VkDescriptorSet set = ToBackend(mBindGroups[dirtyIndex])->GetHandle();
uint32_t count = static_cast<uint32_t>(mDynamicOffsets[dirtyIndex].size());
const uint32_t* dynamicOffset =
mDynamicOffsetCounts[dirtyIndex] > 0 ? mDynamicOffsets[dirtyIndex].data() : nullptr;
device->fn.CmdBindDescriptorSets(recordingContext->commandBuffer, bindPoint,
ToBackend(mPipelineLayout)->GetHandle(),
static_cast<uint32_t>(dirtyIndex), 1, &*set,
mDynamicOffsetCounts[dirtyIndex], dynamicOffset);
count > 0 ? mDynamicOffsets[dirtyIndex].data() : nullptr;
device->fn.CmdBindDescriptorSets(
recordingContext->commandBuffer, bindPoint, ToBackend(mPipelineLayout)->GetHandle(),
static_cast<uint32_t>(dirtyIndex), 1, &*set, count, dynamicOffset);
}
AfterApply();
}

View File

@ -15,10 +15,12 @@
#include <algorithm>
#include <limits>
#include <string>
#include <vector>
#include "dawn/common/Math.h"
#include "dawn/common/Platform.h"
#include "dawn/tests/DawnTest.h"
#include "dawn/utils/ComboRenderPipelineDescriptor.h"
#include "dawn/utils/WGPUHelpers.h"
class MaxLimitTests : public DawnTest {
@ -240,6 +242,156 @@ TEST_P(MaxLimitTests, MaxBufferBindingSize) {
}
}
// Test using the maximum number of dynamic uniform and storage buffers
TEST_P(MaxLimitTests, MaxDynamicBuffers) {
wgpu::Limits limits = GetSupportedLimits().limits;
std::vector<wgpu::BindGroupLayoutEntry> bglEntries;
std::vector<wgpu::BindGroupEntry> bgEntries;
// Binding number counter which is bumped as we create bind group layout
// entries.
uint32_t bindingNumber = 1u;
// Lambda to create a buffer. The binding number is written at an offset of
// 256 bytes. The test binds at a 256-byte dynamic offset and checks that the
// contents of the buffer are equal to the binding number.
std::vector<uint32_t> bufferData(1 + 256 / sizeof(uint32_t));
auto MakeBuffer = [&](wgpu::BufferUsage usage) {
*bufferData.rbegin() = bindingNumber;
return utils::CreateBufferFromData(device, bufferData.data(),
sizeof(uint32_t) * bufferData.size(), usage);
};
// Create as many dynamic uniform buffers as the limits allow.
for (uint32_t i = 0u; i < limits.maxDynamicUniformBuffersPerPipelineLayout &&
i < 2 * limits.maxUniformBuffersPerShaderStage;
++i) {
wgpu::Buffer buffer = MakeBuffer(wgpu::BufferUsage::Uniform);
bglEntries.push_back(utils::BindingLayoutEntryInitializationHelper{
bindingNumber,
// When we surpass the per-stage limit, switch to the fragment shader.
i < limits.maxUniformBuffersPerShaderStage ? wgpu::ShaderStage::Vertex
: wgpu::ShaderStage::Fragment,
wgpu::BufferBindingType::Uniform, true});
bgEntries.push_back(
utils::BindingInitializationHelper(bindingNumber, buffer, 0, sizeof(uint32_t))
.GetAsBinding());
++bindingNumber;
}
// Create as many dynamic storage buffers as the limits allow.
for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout &&
i < 2 * limits.maxStorageBuffersPerShaderStage;
++i) {
wgpu::Buffer buffer = MakeBuffer(wgpu::BufferUsage::Storage);
bglEntries.push_back(utils::BindingLayoutEntryInitializationHelper{
bindingNumber,
// When we surpass the per-stage limit, switch to the fragment shader.
i < limits.maxStorageBuffersPerShaderStage ? wgpu::ShaderStage::Vertex
: wgpu::ShaderStage::Fragment,
wgpu::BufferBindingType::ReadOnlyStorage, true});
bgEntries.push_back(
utils::BindingInitializationHelper(bindingNumber, buffer, 0, sizeof(uint32_t))
.GetAsBinding());
++bindingNumber;
}
// Create the bind group layout.
wgpu::BindGroupLayoutDescriptor bglDesc;
bglDesc.entryCount = static_cast<uint32_t>(bglEntries.size());
bglDesc.entries = bglEntries.data();
wgpu::BindGroupLayout bgl = device.CreateBindGroupLayout(&bglDesc);
// Create the bind group.
wgpu::BindGroupDescriptor bgDesc;
bgDesc.layout = bgl;
bgDesc.entryCount = static_cast<uint32_t>(bgEntries.size());
bgDesc.entries = bgEntries.data();
wgpu::BindGroup bindGroup = device.CreateBindGroup(&bgDesc);
// Generate binding declarations at the top of the the shader.
std::ostringstream wgslShader;
for (const auto& binding : bglEntries) {
if (binding.buffer.type == wgpu::BufferBindingType::Uniform) {
wgslShader << "@group(0) @binding(" << binding.binding << ") var<uniform> b"
<< binding.binding << ": u32;\n";
} else if (binding.buffer.type == wgpu::BufferBindingType::ReadOnlyStorage) {
wgslShader << "@group(0) @binding(" << binding.binding << ") var<storage, read> b"
<< binding.binding << ": u32;\n";
}
}
// Generate a vertex shader which rasterizes primitives outside the viewport
// if the bound buffer contents are not expected.
wgslShader << "@vertex fn vert_main() -> @builtin(position) vec4f {\n";
for (const auto& binding : bglEntries) {
if (binding.visibility == wgpu::ShaderStage::Vertex) {
// If the value is not what is expected, return a vertex that will be clipped.
wgslShader << " if (b" << binding.binding << " != " << binding.binding
<< "u) { return vec4f(10.0, 10.0, 10.0, 1.0); }\n";
}
}
wgslShader << " return vec4f(0.0, 0.0, 0.5, 1.0);\n";
wgslShader << "}\n";
// Generate a fragment shader which discards fragments if the bound buffer
// contents are not expected.
wgslShader << "@fragment fn frag_main() -> @location(0) u32 {\n";
for (const auto& binding : bglEntries) {
if (binding.visibility == wgpu::ShaderStage::Fragment) {
// If the value is not what is expected, discard.
wgslShader << " if (b" << binding.binding << " != " << binding.binding
<< "u) { discard; }\n";
}
}
wgslShader << " return 1u;\n";
wgslShader << "}\n";
wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, wgslShader.str().c_str());
// Create a render target. Its contents will be 1 if the test passes.
wgpu::TextureDescriptor renderTargetDesc;
renderTargetDesc.size = {1, 1};
renderTargetDesc.format = wgpu::TextureFormat::R8Uint;
renderTargetDesc.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::RenderAttachment;
wgpu::Texture renderTarget = device.CreateTexture(&renderTargetDesc);
utils::ComboRenderPipelineDescriptor pipelineDesc;
pipelineDesc.layout = utils::MakePipelineLayout(device, {bgl});
pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
pipelineDesc.vertex.module = shaderModule;
pipelineDesc.vertex.entryPoint = "vert_main";
pipelineDesc.cFragment.module = shaderModule;
pipelineDesc.cFragment.entryPoint = "frag_main";
pipelineDesc.cTargets[0].format = renderTargetDesc.format;
wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&pipelineDesc);
utils::ComboRenderPassDescriptor rpDesc({renderTarget.CreateView()});
rpDesc.cColorAttachments[0].clearValue = {};
rpDesc.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear;
rpDesc.cColorAttachments[0].storeOp = wgpu::StoreOp::Store;
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&rpDesc);
// Bind the bind group with all resources at a 256-byte dynamic offset, and draw.
std::vector<uint32_t> dynamicOffsets(bglEntries.size(), 256u);
pass.SetBindGroup(0, bindGroup, dynamicOffsets.size(), dynamicOffsets.data());
pass.SetPipeline(pipeline);
pass.Draw(1);
pass.End();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
uint32_t expected = 1u;
EXPECT_TEXTURE_EQ(&expected, renderTarget, {0, 0}, {1, 1});
}
DAWN_INSTANTIATE_TEST(MaxLimitTests,
D3D12Backend(),
MetalBackend(),

View File

@ -28,7 +28,8 @@ TEST_P(PipelineLayoutTests, DynamicBuffersOverflow) {
wgpu::BindGroupLayout bglA;
{
std::vector<wgpu::BindGroupLayoutEntry> entries;
for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; i++) {
for (uint32_t i = 0;
i < GetSupportedLimits().limits.maxDynamicStorageBuffersPerPipelineLayout; i++) {
wgpu::BindGroupLayoutEntry entry;
entry.binding = i;
entry.visibility = wgpu::ShaderStage::Compute;

View File

@ -1476,22 +1476,24 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
std::vector<wgpu::BindGroupLayoutEntry> maxStorageDB;
std::vector<wgpu::BindGroupLayoutEntry> maxReadonlyStorageDB;
wgpu::Limits limits = GetSupportedLimits().limits;
// In this test, we use all the same shader stage. Ensure that this does not exceed the
// per-stage limit.
static_assert(kMaxDynamicUniformBuffersPerPipelineLayout <= kMaxUniformBuffersPerShaderStage);
static_assert(kMaxDynamicStorageBuffersPerPipelineLayout <= kMaxStorageBuffersPerShaderStage);
ASSERT(limits.maxDynamicUniformBuffersPerPipelineLayout <= kMaxUniformBuffersPerShaderStage);
ASSERT(limits.maxDynamicStorageBuffersPerPipelineLayout <= kMaxStorageBuffersPerShaderStage);
for (uint32_t i = 0; i < kMaxDynamicUniformBuffersPerPipelineLayout; ++i) {
for (uint32_t i = 0; i < limits.maxDynamicUniformBuffersPerPipelineLayout; ++i) {
maxUniformDB.push_back(utils::BindingLayoutEntryInitializationHelper(
i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Uniform, true));
}
for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; ++i) {
for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout; ++i) {
maxStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper(
i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage, true));
}
for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; ++i) {
for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout; ++i) {
maxReadonlyStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper(
i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::ReadOnlyStorage, true));
}
@ -1561,7 +1563,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
// Check dynamic uniform buffers exceed maximum in bind group layout.
{
maxUniformDB.push_back(utils::BindingLayoutEntryInitializationHelper(
kMaxDynamicUniformBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
limits.maxDynamicUniformBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
wgpu::BufferBindingType::Uniform, true));
TestCreateBindGroupLayout(maxUniformDB.data(), maxUniformDB.size(), false);
}
@ -1569,7 +1571,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
// Check dynamic storage buffers exceed maximum in bind group layout.
{
maxStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper(
kMaxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
limits.maxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
wgpu::BufferBindingType::Storage, true));
TestCreateBindGroupLayout(maxStorageDB.data(), maxStorageDB.size(), false);
}
@ -1577,7 +1579,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
// Check dynamic readonly storage buffers exceed maximum in bind group layout.
{
maxReadonlyStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper(
kMaxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
limits.maxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
wgpu::BufferBindingType::ReadOnlyStorage, true));
TestCreateBindGroupLayout(maxReadonlyStorageDB.data(), maxReadonlyStorageDB.size(), false);
}