Support higher limits for maxDynamicUniform/StorageBuffers
The higher tier currently supports all D3D12 devices, all Metal, and most Vulkan devices. Bug: dawn:685 Change-Id: I5bcb778b92a073c9c1af943acee193073c0741ff Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121101 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Austin Eng <enga@chromium.org> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
parent
8ef7311413
commit
2ea4aac080
|
@ -38,10 +38,6 @@ static constexpr uint32_t kMaxStorageBuffersPerShaderStage = 8;
|
|||
static constexpr uint32_t kMaxStorageTexturesPerShaderStage = 4;
|
||||
static constexpr uint32_t kMaxUniformBuffersPerShaderStage = 12;
|
||||
|
||||
// Per pipeline layout limits
|
||||
static constexpr uint32_t kMaxDynamicUniformBuffersPerPipelineLayout = 8u;
|
||||
static constexpr uint32_t kMaxDynamicStorageBuffersPerPipelineLayout = 4u;
|
||||
|
||||
// Indirect command sizes
|
||||
static constexpr uint64_t kDispatchIndirectSize = 3 * sizeof(uint32_t);
|
||||
static constexpr uint64_t kDrawIndirectSize = 4 * sizeof(uint32_t);
|
||||
|
|
|
@ -70,12 +70,6 @@ MaybeError AdapterBase::Initialize() {
|
|||
std::min(mLimits.v1.maxStorageTexturesPerShaderStage, kMaxStorageTexturesPerShaderStage);
|
||||
mLimits.v1.maxUniformBuffersPerShaderStage =
|
||||
std::min(mLimits.v1.maxUniformBuffersPerShaderStage, kMaxUniformBuffersPerShaderStage);
|
||||
mLimits.v1.maxDynamicUniformBuffersPerPipelineLayout =
|
||||
std::min(mLimits.v1.maxDynamicUniformBuffersPerPipelineLayout,
|
||||
kMaxDynamicUniformBuffersPerPipelineLayout);
|
||||
mLimits.v1.maxDynamicStorageBuffersPerPipelineLayout =
|
||||
std::min(mLimits.v1.maxDynamicStorageBuffersPerPipelineLayout,
|
||||
kMaxDynamicStorageBuffersPerPipelineLayout);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
|
|
@ -271,7 +271,8 @@ MaybeError ValidateBindGroupLayoutDescriptor(DeviceBase* device,
|
|||
bindingsSet.insert(bindingNumber);
|
||||
}
|
||||
|
||||
DAWN_TRY_CONTEXT(ValidateBindingCounts(bindingCounts), "validating binding counts");
|
||||
DAWN_TRY_CONTEXT(ValidateBindingCounts(device->GetLimits(), bindingCounts),
|
||||
"validating binding counts");
|
||||
|
||||
return {};
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#ifndef SRC_DAWN_NATIVE_BINDGROUPTRACKER_H_
|
||||
#define SRC_DAWN_NATIVE_BINDGROUPTRACKER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
|
||||
|
@ -54,8 +55,9 @@ class BindGroupTrackerBase {
|
|||
}
|
||||
|
||||
mBindGroups[index] = bindGroup;
|
||||
mDynamicOffsetCounts[index] = dynamicOffsetCount;
|
||||
SetDynamicOffsets(mDynamicOffsets[index].data(), dynamicOffsetCount, dynamicOffsets);
|
||||
mDynamicOffsets[index].resize(BindingIndex(dynamicOffsetCount));
|
||||
std::copy(dynamicOffsets, dynamicOffsets + dynamicOffsetCount,
|
||||
mDynamicOffsets[index].begin());
|
||||
}
|
||||
|
||||
void OnSetPipeline(PipelineBase* pipeline) { mPipelineLayout = pipeline->GetLayout(); }
|
||||
|
@ -105,10 +107,7 @@ class BindGroupTrackerBase {
|
|||
BindGroupLayoutMask mDirtyBindGroupsObjectChangedOrIsDynamic = 0;
|
||||
BindGroupLayoutMask mBindGroupLayoutsMask = 0;
|
||||
ityp::array<BindGroupIndex, BindGroupBase*, kMaxBindGroups> mBindGroups = {};
|
||||
ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mDynamicOffsetCounts = {};
|
||||
ityp::array<BindGroupIndex,
|
||||
std::array<DynamicOffset, kMaxDynamicBuffersPerPipelineLayout>,
|
||||
kMaxBindGroups>
|
||||
ityp::array<BindGroupIndex, ityp::vector<BindingIndex, DynamicOffset>, kMaxBindGroups>
|
||||
mDynamicOffsets = {};
|
||||
|
||||
// |mPipelineLayout| is the current pipeline layout set on the command buffer.
|
||||
|
@ -116,25 +115,6 @@ class BindGroupTrackerBase {
|
|||
// to the bind group bindings.
|
||||
PipelineLayoutBase* mPipelineLayout = nullptr;
|
||||
PipelineLayoutBase* mLastAppliedPipelineLayout = nullptr;
|
||||
|
||||
private:
|
||||
// We have two overloads here because offsets in Vulkan are uint32_t but uint64_t
|
||||
// in other backends.
|
||||
static void SetDynamicOffsets(uint64_t* data,
|
||||
uint32_t dynamicOffsetCount,
|
||||
uint32_t* dynamicOffsets) {
|
||||
for (uint32_t i = 0; i < dynamicOffsetCount; ++i) {
|
||||
data[i] = static_cast<uint64_t>(dynamicOffsets[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void SetDynamicOffsets(uint32_t* data,
|
||||
uint32_t dynamicOffsetCount,
|
||||
uint32_t* dynamicOffsets) {
|
||||
if (dynamicOffsetCount > 0) {
|
||||
memcpy(data, dynamicOffsets, sizeof(uint32_t) * dynamicOffsetCount);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace dawn::native
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "dawn/native/BindingInfo.h"
|
||||
|
||||
#include "dawn/native/ChainUtils_autogen.h"
|
||||
#include "dawn/native/Limits.h"
|
||||
|
||||
namespace dawn::native {
|
||||
|
||||
|
@ -93,18 +94,22 @@ void AccumulateBindingCounts(BindingCounts* bindingCounts, const BindingCounts&
|
|||
}
|
||||
}
|
||||
|
||||
MaybeError ValidateBindingCounts(const BindingCounts& bindingCounts) {
|
||||
MaybeError ValidateBindingCounts(const CombinedLimits& limits, const BindingCounts& bindingCounts) {
|
||||
DAWN_INVALID_IF(
|
||||
bindingCounts.dynamicUniformBufferCount > kMaxDynamicUniformBuffersPerPipelineLayout,
|
||||
bindingCounts.dynamicUniformBufferCount >
|
||||
limits.v1.maxDynamicUniformBuffersPerPipelineLayout,
|
||||
"The number of dynamic uniform buffers (%u) exceeds the maximum per-pipeline-layout "
|
||||
"limit (%u).",
|
||||
bindingCounts.dynamicUniformBufferCount, kMaxDynamicUniformBuffersPerPipelineLayout);
|
||||
bindingCounts.dynamicUniformBufferCount,
|
||||
limits.v1.maxDynamicUniformBuffersPerPipelineLayout);
|
||||
|
||||
DAWN_INVALID_IF(
|
||||
bindingCounts.dynamicStorageBufferCount > kMaxDynamicStorageBuffersPerPipelineLayout,
|
||||
bindingCounts.dynamicStorageBufferCount >
|
||||
limits.v1.maxDynamicStorageBuffersPerPipelineLayout,
|
||||
"The number of dynamic storage buffers (%u) exceeds the maximum per-pipeline-layout "
|
||||
"limit (%u).",
|
||||
bindingCounts.dynamicStorageBufferCount, kMaxDynamicStorageBuffersPerPipelineLayout);
|
||||
bindingCounts.dynamicStorageBufferCount,
|
||||
limits.v1.maxDynamicStorageBuffersPerPipelineLayout);
|
||||
|
||||
for (SingleShaderStage stage : IterateStages(kAllStages)) {
|
||||
DAWN_INVALID_IF(
|
||||
|
|
|
@ -29,13 +29,6 @@
|
|||
|
||||
namespace dawn::native {
|
||||
|
||||
// Not a real WebGPU limit, but the sum of the two limits is useful for internal optimizations.
|
||||
static constexpr uint32_t kMaxDynamicBuffersPerPipelineLayout =
|
||||
kMaxDynamicUniformBuffersPerPipelineLayout + kMaxDynamicStorageBuffersPerPipelineLayout;
|
||||
|
||||
static constexpr BindingIndex kMaxDynamicBuffersPerPipelineLayoutTyped =
|
||||
BindingIndex(kMaxDynamicBuffersPerPipelineLayout);
|
||||
|
||||
// Not a real WebGPU limit, but used to optimize parts of Dawn which expect valid usage of the
|
||||
// API. There should never be more bindings than the max per stage, for each stage.
|
||||
static constexpr uint32_t kMaxBindingsPerPipelineLayout =
|
||||
|
@ -87,9 +80,11 @@ struct BindingCounts {
|
|||
PerStage<PerStageBindingCounts> perStage;
|
||||
};
|
||||
|
||||
struct CombinedLimits;
|
||||
|
||||
void IncrementBindingCounts(BindingCounts* bindingCounts, const BindGroupLayoutEntry& entry);
|
||||
void AccumulateBindingCounts(BindingCounts* bindingCounts, const BindingCounts& rhs);
|
||||
MaybeError ValidateBindingCounts(const BindingCounts& bindingCounts);
|
||||
MaybeError ValidateBindingCounts(const CombinedLimits& limits, const BindingCounts& bindingCounts);
|
||||
|
||||
// For buffer size validation
|
||||
using RequiredBufferSizes = ityp::array<BindGroupIndex, std::vector<uint64_t>, kMaxBindGroups>;
|
||||
|
|
|
@ -32,6 +32,13 @@
|
|||
#define LIMITS_MAX_BUFFER_SIZE(X) \
|
||||
X(Maximum, maxBufferSize, 0x10000000, 0x40000000, 0x80000000)
|
||||
|
||||
// Tiers for limits related to resource bindings.
|
||||
// TODO(crbug.com/dawn/685): Define these better. For now, use two tiers where one
|
||||
// offers slightly better than default limits.
|
||||
#define LIMITS_RESOURCE_BINDINGS(X) \
|
||||
X(Maximum, maxDynamicUniformBuffersPerPipelineLayout, 8, 10) \
|
||||
X(Maximum, maxDynamicStorageBuffersPerPipelineLayout, 4, 8) \
|
||||
|
||||
// TODO(crbug.com/dawn/685):
|
||||
// These limits don't have tiers yet. Define two tiers with the same values since the macros
|
||||
// in this file expect more than one tier.
|
||||
|
@ -42,8 +49,6 @@
|
|||
X(Maximum, maxTextureArrayLayers, 256, 256) \
|
||||
X(Maximum, maxBindGroups, 4, 4) \
|
||||
X(Maximum, maxBindingsPerBindGroup, 640, 640) \
|
||||
X(Maximum, maxDynamicUniformBuffersPerPipelineLayout, 8, 8) \
|
||||
X(Maximum, maxDynamicStorageBuffersPerPipelineLayout, 4, 4) \
|
||||
X(Maximum, maxSampledTexturesPerShaderStage, 16, 16) \
|
||||
X(Maximum, maxSamplersPerShaderStage, 16, 16) \
|
||||
X(Maximum, maxStorageBuffersPerShaderStage, 8, 8) \
|
||||
|
@ -71,12 +76,14 @@
|
|||
X(LIMITS_WORKGROUP_STORAGE_SIZE) \
|
||||
X(LIMITS_STORAGE_BUFFER_BINDING_SIZE) \
|
||||
X(LIMITS_MAX_BUFFER_SIZE) \
|
||||
X(LIMITS_RESOURCE_BINDINGS) \
|
||||
X(LIMITS_OTHER)
|
||||
|
||||
#define LIMITS(X) \
|
||||
LIMITS_WORKGROUP_STORAGE_SIZE(X) \
|
||||
LIMITS_STORAGE_BUFFER_BINDING_SIZE(X) \
|
||||
LIMITS_MAX_BUFFER_SIZE(X) \
|
||||
LIMITS_RESOURCE_BINDINGS(X) \
|
||||
LIMITS_OTHER(X)
|
||||
|
||||
namespace dawn::native {
|
||||
|
|
|
@ -50,7 +50,7 @@ MaybeError ValidatePipelineLayoutDescriptor(DeviceBase* device,
|
|||
descriptor->bindGroupLayouts[i]->GetBindingCountInfo());
|
||||
}
|
||||
|
||||
DAWN_TRY(ValidateBindingCounts(bindingCounts));
|
||||
DAWN_TRY(ValidateBindingCounts(device->GetLimits(), bindingCounts));
|
||||
return {};
|
||||
}
|
||||
|
||||
|
|
|
@ -269,36 +269,45 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
|
|||
// CBVs/UAVs/SRVs for bind group are a root descriptor table
|
||||
// - (maxBindGroups)
|
||||
// Samplers for each bind group are a root descriptor table
|
||||
// - (2 * maxDynamicBuffers)
|
||||
// Each dynamic buffer is a root descriptor
|
||||
// - dynamic uniform buffers - root descriptor
|
||||
// - dynamic storage buffers - root descriptor plus a root constant for the size
|
||||
// RESERVED:
|
||||
// - 3 = max of:
|
||||
// - 2 root constants for the baseVertex/baseInstance constants.
|
||||
// - 3 root constants for num workgroups X, Y, Z
|
||||
// - 4 root constants (kMaxDynamicStorageBuffersPerPipelineLayout) for dynamic storage
|
||||
// buffer lengths.
|
||||
static constexpr uint32_t kReservedSlots = 7;
|
||||
static constexpr uint32_t kReservedSlots = 3;
|
||||
|
||||
// Costs:
|
||||
// - bind group: 2 = 1 cbv/uav/srv table + 1 sampler table
|
||||
// - dynamic uniform buffer: 2 slots for a root descriptor
|
||||
// - dynamic storage buffer: 3 slots for a root descriptor + root constant
|
||||
|
||||
// Available slots after base limits considered.
|
||||
uint32_t availableRootSignatureSlots =
|
||||
kMaxRootSignatureSize - kReservedSlots -
|
||||
2 * (limits->v1.maxBindGroups + limits->v1.maxDynamicUniformBuffersPerPipelineLayout +
|
||||
limits->v1.maxDynamicStorageBuffersPerPipelineLayout);
|
||||
kMaxRootSignatureSize - kReservedSlots - 2 * limits->v1.maxBindGroups -
|
||||
2 * limits->v1.maxDynamicUniformBuffersPerPipelineLayout -
|
||||
3 * limits->v1.maxDynamicStorageBuffersPerPipelineLayout;
|
||||
|
||||
// Because we need either:
|
||||
// - 1 cbv/uav/srv table + 1 sampler table
|
||||
// - 2 slots for a root descriptor
|
||||
uint32_t availableDynamicBufferOrBindGroup = availableRootSignatureSlots / 2;
|
||||
while (availableRootSignatureSlots >= 2) {
|
||||
// Start by incrementing maxDynamicStorageBuffersPerPipelineLayout since the
|
||||
// default is just 4 and developers likely want more. This scheme currently
|
||||
// gets us to 8.
|
||||
if (availableRootSignatureSlots >= 3) {
|
||||
limits->v1.maxDynamicStorageBuffersPerPipelineLayout += 1;
|
||||
availableRootSignatureSlots -= 3;
|
||||
}
|
||||
if (availableRootSignatureSlots >= 2) {
|
||||
limits->v1.maxBindGroups += 1;
|
||||
availableRootSignatureSlots -= 2;
|
||||
}
|
||||
if (availableRootSignatureSlots >= 2) {
|
||||
limits->v1.maxDynamicUniformBuffersPerPipelineLayout += 1;
|
||||
availableRootSignatureSlots -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
// We can either have a bind group, a dyn uniform buffer or a dyn storage buffer.
|
||||
// Distribute evenly.
|
||||
limits->v1.maxBindGroups += availableDynamicBufferOrBindGroup / 3;
|
||||
limits->v1.maxDynamicUniformBuffersPerPipelineLayout += availableDynamicBufferOrBindGroup / 3;
|
||||
limits->v1.maxDynamicStorageBuffersPerPipelineLayout +=
|
||||
(availableDynamicBufferOrBindGroup - 2 * (availableDynamicBufferOrBindGroup / 3));
|
||||
|
||||
ASSERT(2 * (limits->v1.maxBindGroups + limits->v1.maxDynamicUniformBuffersPerPipelineLayout +
|
||||
limits->v1.maxDynamicStorageBuffersPerPipelineLayout) <=
|
||||
ASSERT(2 * limits->v1.maxBindGroups + 2 * limits->v1.maxDynamicUniformBuffersPerPipelineLayout +
|
||||
3 * limits->v1.maxDynamicStorageBuffersPerPipelineLayout <=
|
||||
kMaxRootSignatureSize - kReservedSlots);
|
||||
|
||||
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-attributes-numthreads
|
||||
|
|
|
@ -47,8 +47,7 @@ class BindGroup final : public BindGroupBase, public PlacementAllocated {
|
|||
|
||||
void SetSamplerAllocationEntry(Ref<SamplerHeapCacheEntry> entry);
|
||||
|
||||
using DynamicStorageBufferLengths =
|
||||
ityp::stack_vec<uint32_t, uint32_t, kMaxDynamicStorageBuffersPerPipelineLayout>;
|
||||
using DynamicStorageBufferLengths = ityp::stack_vec<uint32_t, uint32_t, 4u>;
|
||||
const DynamicStorageBufferLengths& GetDynamicStorageBufferLengths() const;
|
||||
|
||||
private:
|
||||
|
|
|
@ -442,7 +442,7 @@ class BindGroupStateTracker : public BindGroupTrackerBase<false, uint64_t> {
|
|||
for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
|
||||
BindGroup* group = ToBackend(mBindGroups[index]);
|
||||
ApplyBindGroup(commandList, ToBackend(mPipelineLayout), index, group,
|
||||
mDynamicOffsetCounts[index], mDynamicOffsets[index].data());
|
||||
mDynamicOffsets[index]);
|
||||
}
|
||||
|
||||
AfterApply();
|
||||
|
@ -484,10 +484,7 @@ class BindGroupStateTracker : public BindGroupTrackerBase<false, uint64_t> {
|
|||
const PipelineLayout* pipelineLayout,
|
||||
BindGroupIndex index,
|
||||
BindGroup* group,
|
||||
uint32_t dynamicOffsetCountIn,
|
||||
const uint64_t* dynamicOffsetsIn) {
|
||||
ityp::span<BindingIndex, const uint64_t> dynamicOffsets(dynamicOffsetsIn,
|
||||
BindingIndex(dynamicOffsetCountIn));
|
||||
const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
|
||||
ASSERT(dynamicOffsets.size() == group->GetLayout()->GetDynamicBufferCount());
|
||||
|
||||
// Usually, the application won't set the same offsets many times,
|
||||
|
|
|
@ -139,6 +139,7 @@ MaybeError PipelineLayout::Initialize() {
|
|||
|
||||
// Init root descriptors in root signatures for dynamic buffer bindings.
|
||||
// These are packed at the beginning of the layout binding info.
|
||||
mDynamicRootParameterIndices[group].resize(bindGroupLayout->GetDynamicBufferCount());
|
||||
for (BindingIndex dynamicBindingIndex{0};
|
||||
dynamicBindingIndex < bindGroupLayout->GetDynamicBufferCount();
|
||||
++dynamicBindingIndex) {
|
||||
|
@ -224,8 +225,6 @@ MaybeError PipelineLayout::Initialize() {
|
|||
ASSERT(mDynamicStorageBufferLengthInfo[group].bindingAndRegisterOffsets.size() ==
|
||||
bgl->GetBindingCountInfo().dynamicStorageBufferCount);
|
||||
}
|
||||
ASSERT(dynamicStorageBufferLengthsShaderRegisterOffset <=
|
||||
kMaxDynamicStorageBuffersPerPipelineLayout);
|
||||
|
||||
if (dynamicStorageBufferLengthsShaderRegisterOffset > 0) {
|
||||
D3D12_ROOT_PARAMETER dynamicStorageBufferLengthConstants{};
|
||||
|
@ -322,7 +321,6 @@ PipelineLayout::GetDynamicStorageBufferLengthInfo() const {
|
|||
uint32_t PipelineLayout::GetDynamicRootParameterIndex(BindGroupIndex group,
|
||||
BindingIndex bindingIndex) const {
|
||||
ASSERT(group < kMaxBindGroupsTyped);
|
||||
ASSERT(bindingIndex < kMaxDynamicBuffersPerPipelineLayoutTyped);
|
||||
ASSERT(GetBindGroupLayout(group)->GetBindingInfo(bindingIndex).buffer.hasDynamicOffset);
|
||||
ASSERT(GetBindGroupLayout(group)->GetBindingInfo(bindingIndex).visibility !=
|
||||
wgpu::ShaderStage::None);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include "dawn/common/Constants.h"
|
||||
#include "dawn/common/ityp_array.h"
|
||||
#include "dawn/common/ityp_vector.h"
|
||||
#include "dawn/native/BindingInfo.h"
|
||||
#include "dawn/native/PipelineLayout.h"
|
||||
#include "dawn/native/d3d12/d3d12_platform.h"
|
||||
|
@ -91,9 +92,7 @@ class PipelineLayout final : public PipelineLayoutBase {
|
|||
|
||||
ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mCbvUavSrvRootParameterInfo;
|
||||
ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mSamplerRootParameterInfo;
|
||||
ityp::array<BindGroupIndex,
|
||||
ityp::array<BindingIndex, uint32_t, kMaxDynamicBuffersPerPipelineLayout>,
|
||||
kMaxBindGroups>
|
||||
ityp::array<BindGroupIndex, ityp::vector<BindingIndex, uint32_t>, kMaxBindGroups>
|
||||
mDynamicRootParameterIndices;
|
||||
DynamicStorageBufferLengthInfo mDynamicStorageBufferLengthInfo;
|
||||
uint32_t mFirstIndexOffsetParameterIndex;
|
||||
|
|
|
@ -716,10 +716,10 @@ class Adapter : public AdapterBase {
|
|||
// buffers, 128 textures, and 16 samplers. Mac GPU families
|
||||
// with tier 2 argument buffers support 500000 buffers and
|
||||
// textures, and 1024 unique samplers
|
||||
limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
|
||||
limits->v1.maxUniformBuffersPerShaderStage;
|
||||
limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
|
||||
limits->v1.maxStorageBuffersPerShaderStage;
|
||||
// Without argument buffers, we have slots [0 -> 29], inclusive, which is 30 total.
|
||||
// 8 are used by maxVertexBuffers.
|
||||
limits->v1.maxDynamicUniformBuffersPerPipelineLayout = 11u;
|
||||
limits->v1.maxDynamicStorageBuffersPerPipelineLayout = 11u;
|
||||
|
||||
// The WebGPU limit is the limit across all vertex buffers, combined.
|
||||
limits->v1.maxVertexAttributes =
|
||||
|
|
|
@ -442,8 +442,7 @@ class BindGroupTracker : public BindGroupTrackerBase<true, uint64_t> {
|
|||
void Apply(Encoder encoder) {
|
||||
BeforeApply();
|
||||
for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
|
||||
ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]),
|
||||
mDynamicOffsetCounts[index], mDynamicOffsets[index].data(),
|
||||
ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]), mDynamicOffsets[index],
|
||||
ToBackend(mPipelineLayout));
|
||||
}
|
||||
AfterApply();
|
||||
|
@ -458,11 +457,8 @@ class BindGroupTracker : public BindGroupTrackerBase<true, uint64_t> {
|
|||
id<MTLComputeCommandEncoder> compute,
|
||||
BindGroupIndex index,
|
||||
BindGroup* group,
|
||||
uint32_t dynamicOffsetCount,
|
||||
uint64_t* dynamicOffsets,
|
||||
const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets,
|
||||
PipelineLayout* pipelineLayout) {
|
||||
uint32_t currentDynamicBufferIndex = 0;
|
||||
|
||||
// TODO(crbug.com/dawn/854): Maintain buffers and offsets arrays in BindGroup
|
||||
// so that we only have to do one setVertexBuffers and one setFragmentBuffers
|
||||
// call here.
|
||||
|
@ -504,8 +500,8 @@ class BindGroupTracker : public BindGroupTrackerBase<true, uint64_t> {
|
|||
// TODO(crbug.com/dawn/854): Record bound buffer status to use
|
||||
// setBufferOffset to achieve better performance.
|
||||
if (bindingInfo.buffer.hasDynamicOffset) {
|
||||
offset += dynamicOffsets[currentDynamicBufferIndex];
|
||||
currentDynamicBufferIndex++;
|
||||
// Dynamic buffers are packed at the front of BindingIndices.
|
||||
offset += dynamicOffsets[bindingIndex];
|
||||
}
|
||||
|
||||
if (hasVertStage) {
|
||||
|
|
|
@ -231,8 +231,7 @@ class BindGroupTracker : public BindGroupTrackerBase<false, uint64_t> {
|
|||
void Apply(const OpenGLFunctions& gl) {
|
||||
BeforeApply();
|
||||
for (BindGroupIndex index : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
|
||||
ApplyBindGroup(gl, index, mBindGroups[index], mDynamicOffsetCounts[index],
|
||||
mDynamicOffsets[index].data());
|
||||
ApplyBindGroup(gl, index, mBindGroups[index], mDynamicOffsets[index]);
|
||||
}
|
||||
AfterApply();
|
||||
}
|
||||
|
@ -241,10 +240,8 @@ class BindGroupTracker : public BindGroupTrackerBase<false, uint64_t> {
|
|||
void ApplyBindGroup(const OpenGLFunctions& gl,
|
||||
BindGroupIndex index,
|
||||
BindGroupBase* group,
|
||||
uint32_t dynamicOffsetCount,
|
||||
uint64_t* dynamicOffsets) {
|
||||
const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
|
||||
const auto& indices = ToBackend(mPipelineLayout)->GetBindingIndexInfo()[index];
|
||||
uint32_t currentDynamicOffsetIndex = 0;
|
||||
|
||||
for (BindingIndex bindingIndex{0}; bindingIndex < group->GetLayout()->GetBindingCount();
|
||||
++bindingIndex) {
|
||||
|
@ -268,8 +265,8 @@ class BindGroupTracker : public BindGroupTrackerBase<false, uint64_t> {
|
|||
GLuint offset = binding.offset;
|
||||
|
||||
if (bindingInfo.buffer.hasDynamicOffset) {
|
||||
offset += dynamicOffsets[currentDynamicOffsetIndex];
|
||||
++currentDynamicOffsetIndex;
|
||||
// Dynamic buffers are packed at the front of BindingIndices.
|
||||
offset += dynamicOffsets[bindingIndex];
|
||||
}
|
||||
|
||||
GLenum target;
|
||||
|
|
|
@ -141,12 +141,12 @@ class DescriptorSetTracker : public BindGroupTrackerBase<true, uint32_t> {
|
|||
BeforeApply();
|
||||
for (BindGroupIndex dirtyIndex : IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
|
||||
VkDescriptorSet set = ToBackend(mBindGroups[dirtyIndex])->GetHandle();
|
||||
uint32_t count = static_cast<uint32_t>(mDynamicOffsets[dirtyIndex].size());
|
||||
const uint32_t* dynamicOffset =
|
||||
mDynamicOffsetCounts[dirtyIndex] > 0 ? mDynamicOffsets[dirtyIndex].data() : nullptr;
|
||||
device->fn.CmdBindDescriptorSets(recordingContext->commandBuffer, bindPoint,
|
||||
ToBackend(mPipelineLayout)->GetHandle(),
|
||||
static_cast<uint32_t>(dirtyIndex), 1, &*set,
|
||||
mDynamicOffsetCounts[dirtyIndex], dynamicOffset);
|
||||
count > 0 ? mDynamicOffsets[dirtyIndex].data() : nullptr;
|
||||
device->fn.CmdBindDescriptorSets(
|
||||
recordingContext->commandBuffer, bindPoint, ToBackend(mPipelineLayout)->GetHandle(),
|
||||
static_cast<uint32_t>(dirtyIndex), 1, &*set, count, dynamicOffset);
|
||||
}
|
||||
AfterApply();
|
||||
}
|
||||
|
|
|
@ -15,10 +15,12 @@
|
|||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "dawn/common/Math.h"
|
||||
#include "dawn/common/Platform.h"
|
||||
#include "dawn/tests/DawnTest.h"
|
||||
#include "dawn/utils/ComboRenderPipelineDescriptor.h"
|
||||
#include "dawn/utils/WGPUHelpers.h"
|
||||
|
||||
class MaxLimitTests : public DawnTest {
|
||||
|
@ -240,6 +242,156 @@ TEST_P(MaxLimitTests, MaxBufferBindingSize) {
|
|||
}
|
||||
}
|
||||
|
||||
// Test using the maximum number of dynamic uniform and storage buffers
|
||||
TEST_P(MaxLimitTests, MaxDynamicBuffers) {
|
||||
wgpu::Limits limits = GetSupportedLimits().limits;
|
||||
|
||||
std::vector<wgpu::BindGroupLayoutEntry> bglEntries;
|
||||
std::vector<wgpu::BindGroupEntry> bgEntries;
|
||||
|
||||
// Binding number counter which is bumped as we create bind group layout
|
||||
// entries.
|
||||
uint32_t bindingNumber = 1u;
|
||||
|
||||
// Lambda to create a buffer. The binding number is written at an offset of
|
||||
// 256 bytes. The test binds at a 256-byte dynamic offset and checks that the
|
||||
// contents of the buffer are equal to the binding number.
|
||||
std::vector<uint32_t> bufferData(1 + 256 / sizeof(uint32_t));
|
||||
auto MakeBuffer = [&](wgpu::BufferUsage usage) {
|
||||
*bufferData.rbegin() = bindingNumber;
|
||||
return utils::CreateBufferFromData(device, bufferData.data(),
|
||||
sizeof(uint32_t) * bufferData.size(), usage);
|
||||
};
|
||||
|
||||
// Create as many dynamic uniform buffers as the limits allow.
|
||||
for (uint32_t i = 0u; i < limits.maxDynamicUniformBuffersPerPipelineLayout &&
|
||||
i < 2 * limits.maxUniformBuffersPerShaderStage;
|
||||
++i) {
|
||||
wgpu::Buffer buffer = MakeBuffer(wgpu::BufferUsage::Uniform);
|
||||
|
||||
bglEntries.push_back(utils::BindingLayoutEntryInitializationHelper{
|
||||
bindingNumber,
|
||||
// When we surpass the per-stage limit, switch to the fragment shader.
|
||||
i < limits.maxUniformBuffersPerShaderStage ? wgpu::ShaderStage::Vertex
|
||||
: wgpu::ShaderStage::Fragment,
|
||||
wgpu::BufferBindingType::Uniform, true});
|
||||
bgEntries.push_back(
|
||||
utils::BindingInitializationHelper(bindingNumber, buffer, 0, sizeof(uint32_t))
|
||||
.GetAsBinding());
|
||||
|
||||
++bindingNumber;
|
||||
}
|
||||
|
||||
// Create as many dynamic storage buffers as the limits allow.
|
||||
for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout &&
|
||||
i < 2 * limits.maxStorageBuffersPerShaderStage;
|
||||
++i) {
|
||||
wgpu::Buffer buffer = MakeBuffer(wgpu::BufferUsage::Storage);
|
||||
|
||||
bglEntries.push_back(utils::BindingLayoutEntryInitializationHelper{
|
||||
bindingNumber,
|
||||
// When we surpass the per-stage limit, switch to the fragment shader.
|
||||
i < limits.maxStorageBuffersPerShaderStage ? wgpu::ShaderStage::Vertex
|
||||
: wgpu::ShaderStage::Fragment,
|
||||
wgpu::BufferBindingType::ReadOnlyStorage, true});
|
||||
bgEntries.push_back(
|
||||
utils::BindingInitializationHelper(bindingNumber, buffer, 0, sizeof(uint32_t))
|
||||
.GetAsBinding());
|
||||
|
||||
++bindingNumber;
|
||||
}
|
||||
|
||||
// Create the bind group layout.
|
||||
wgpu::BindGroupLayoutDescriptor bglDesc;
|
||||
bglDesc.entryCount = static_cast<uint32_t>(bglEntries.size());
|
||||
bglDesc.entries = bglEntries.data();
|
||||
wgpu::BindGroupLayout bgl = device.CreateBindGroupLayout(&bglDesc);
|
||||
|
||||
// Create the bind group.
|
||||
wgpu::BindGroupDescriptor bgDesc;
|
||||
bgDesc.layout = bgl;
|
||||
bgDesc.entryCount = static_cast<uint32_t>(bgEntries.size());
|
||||
bgDesc.entries = bgEntries.data();
|
||||
wgpu::BindGroup bindGroup = device.CreateBindGroup(&bgDesc);
|
||||
|
||||
// Generate binding declarations at the top of the the shader.
|
||||
std::ostringstream wgslShader;
|
||||
for (const auto& binding : bglEntries) {
|
||||
if (binding.buffer.type == wgpu::BufferBindingType::Uniform) {
|
||||
wgslShader << "@group(0) @binding(" << binding.binding << ") var<uniform> b"
|
||||
<< binding.binding << ": u32;\n";
|
||||
} else if (binding.buffer.type == wgpu::BufferBindingType::ReadOnlyStorage) {
|
||||
wgslShader << "@group(0) @binding(" << binding.binding << ") var<storage, read> b"
|
||||
<< binding.binding << ": u32;\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Generate a vertex shader which rasterizes primitives outside the viewport
|
||||
// if the bound buffer contents are not expected.
|
||||
wgslShader << "@vertex fn vert_main() -> @builtin(position) vec4f {\n";
|
||||
for (const auto& binding : bglEntries) {
|
||||
if (binding.visibility == wgpu::ShaderStage::Vertex) {
|
||||
// If the value is not what is expected, return a vertex that will be clipped.
|
||||
wgslShader << " if (b" << binding.binding << " != " << binding.binding
|
||||
<< "u) { return vec4f(10.0, 10.0, 10.0, 1.0); }\n";
|
||||
}
|
||||
}
|
||||
wgslShader << " return vec4f(0.0, 0.0, 0.5, 1.0);\n";
|
||||
wgslShader << "}\n";
|
||||
|
||||
// Generate a fragment shader which discards fragments if the bound buffer
|
||||
// contents are not expected.
|
||||
wgslShader << "@fragment fn frag_main() -> @location(0) u32 {\n";
|
||||
for (const auto& binding : bglEntries) {
|
||||
if (binding.visibility == wgpu::ShaderStage::Fragment) {
|
||||
// If the value is not what is expected, discard.
|
||||
wgslShader << " if (b" << binding.binding << " != " << binding.binding
|
||||
<< "u) { discard; }\n";
|
||||
}
|
||||
}
|
||||
wgslShader << " return 1u;\n";
|
||||
wgslShader << "}\n";
|
||||
|
||||
wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, wgslShader.str().c_str());
|
||||
|
||||
// Create a render target. Its contents will be 1 if the test passes.
|
||||
wgpu::TextureDescriptor renderTargetDesc;
|
||||
renderTargetDesc.size = {1, 1};
|
||||
renderTargetDesc.format = wgpu::TextureFormat::R8Uint;
|
||||
renderTargetDesc.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::RenderAttachment;
|
||||
wgpu::Texture renderTarget = device.CreateTexture(&renderTargetDesc);
|
||||
|
||||
utils::ComboRenderPipelineDescriptor pipelineDesc;
|
||||
pipelineDesc.layout = utils::MakePipelineLayout(device, {bgl});
|
||||
pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
|
||||
pipelineDesc.vertex.module = shaderModule;
|
||||
pipelineDesc.vertex.entryPoint = "vert_main";
|
||||
pipelineDesc.cFragment.module = shaderModule;
|
||||
pipelineDesc.cFragment.entryPoint = "frag_main";
|
||||
pipelineDesc.cTargets[0].format = renderTargetDesc.format;
|
||||
wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&pipelineDesc);
|
||||
|
||||
utils::ComboRenderPassDescriptor rpDesc({renderTarget.CreateView()});
|
||||
rpDesc.cColorAttachments[0].clearValue = {};
|
||||
rpDesc.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear;
|
||||
rpDesc.cColorAttachments[0].storeOp = wgpu::StoreOp::Store;
|
||||
|
||||
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&rpDesc);
|
||||
|
||||
// Bind the bind group with all resources at a 256-byte dynamic offset, and draw.
|
||||
std::vector<uint32_t> dynamicOffsets(bglEntries.size(), 256u);
|
||||
pass.SetBindGroup(0, bindGroup, dynamicOffsets.size(), dynamicOffsets.data());
|
||||
pass.SetPipeline(pipeline);
|
||||
pass.Draw(1);
|
||||
pass.End();
|
||||
wgpu::CommandBuffer commands = encoder.Finish();
|
||||
queue.Submit(1, &commands);
|
||||
|
||||
uint32_t expected = 1u;
|
||||
EXPECT_TEXTURE_EQ(&expected, renderTarget, {0, 0}, {1, 1});
|
||||
}
|
||||
|
||||
DAWN_INSTANTIATE_TEST(MaxLimitTests,
|
||||
D3D12Backend(),
|
||||
MetalBackend(),
|
||||
|
|
|
@ -28,7 +28,8 @@ TEST_P(PipelineLayoutTests, DynamicBuffersOverflow) {
|
|||
wgpu::BindGroupLayout bglA;
|
||||
{
|
||||
std::vector<wgpu::BindGroupLayoutEntry> entries;
|
||||
for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; i++) {
|
||||
for (uint32_t i = 0;
|
||||
i < GetSupportedLimits().limits.maxDynamicStorageBuffersPerPipelineLayout; i++) {
|
||||
wgpu::BindGroupLayoutEntry entry;
|
||||
entry.binding = i;
|
||||
entry.visibility = wgpu::ShaderStage::Compute;
|
||||
|
|
|
@ -1476,22 +1476,24 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
|
|||
std::vector<wgpu::BindGroupLayoutEntry> maxStorageDB;
|
||||
std::vector<wgpu::BindGroupLayoutEntry> maxReadonlyStorageDB;
|
||||
|
||||
wgpu::Limits limits = GetSupportedLimits().limits;
|
||||
|
||||
// In this test, we use all the same shader stage. Ensure that this does not exceed the
|
||||
// per-stage limit.
|
||||
static_assert(kMaxDynamicUniformBuffersPerPipelineLayout <= kMaxUniformBuffersPerShaderStage);
|
||||
static_assert(kMaxDynamicStorageBuffersPerPipelineLayout <= kMaxStorageBuffersPerShaderStage);
|
||||
ASSERT(limits.maxDynamicUniformBuffersPerPipelineLayout <= kMaxUniformBuffersPerShaderStage);
|
||||
ASSERT(limits.maxDynamicStorageBuffersPerPipelineLayout <= kMaxStorageBuffersPerShaderStage);
|
||||
|
||||
for (uint32_t i = 0; i < kMaxDynamicUniformBuffersPerPipelineLayout; ++i) {
|
||||
for (uint32_t i = 0; i < limits.maxDynamicUniformBuffersPerPipelineLayout; ++i) {
|
||||
maxUniformDB.push_back(utils::BindingLayoutEntryInitializationHelper(
|
||||
i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Uniform, true));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; ++i) {
|
||||
for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout; ++i) {
|
||||
maxStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper(
|
||||
i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage, true));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; ++i) {
|
||||
for (uint32_t i = 0; i < limits.maxDynamicStorageBuffersPerPipelineLayout; ++i) {
|
||||
maxReadonlyStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper(
|
||||
i, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::ReadOnlyStorage, true));
|
||||
}
|
||||
|
@ -1561,7 +1563,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
|
|||
// Check dynamic uniform buffers exceed maximum in bind group layout.
|
||||
{
|
||||
maxUniformDB.push_back(utils::BindingLayoutEntryInitializationHelper(
|
||||
kMaxDynamicUniformBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
|
||||
limits.maxDynamicUniformBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
|
||||
wgpu::BufferBindingType::Uniform, true));
|
||||
TestCreateBindGroupLayout(maxUniformDB.data(), maxUniformDB.size(), false);
|
||||
}
|
||||
|
@ -1569,7 +1571,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
|
|||
// Check dynamic storage buffers exceed maximum in bind group layout.
|
||||
{
|
||||
maxStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper(
|
||||
kMaxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
|
||||
limits.maxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
|
||||
wgpu::BufferBindingType::Storage, true));
|
||||
TestCreateBindGroupLayout(maxStorageDB.data(), maxStorageDB.size(), false);
|
||||
}
|
||||
|
@ -1577,7 +1579,7 @@ TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
|
|||
// Check dynamic readonly storage buffers exceed maximum in bind group layout.
|
||||
{
|
||||
maxReadonlyStorageDB.push_back(utils::BindingLayoutEntryInitializationHelper(
|
||||
kMaxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
|
||||
limits.maxDynamicStorageBuffersPerPipelineLayout, wgpu::ShaderStage::Fragment,
|
||||
wgpu::BufferBindingType::ReadOnlyStorage, true));
|
||||
TestCreateBindGroupLayout(maxReadonlyStorageDB.data(), maxReadonlyStorageDB.size(), false);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue