Remove kMaxBindingsPerGroup limit

kMaxBindingsPerGroup is not a real WebGPU limit. Code in Dawn that uses it now uses dynamically sized vectors, optimistic stack_vecs or a different limit like kMaxDynamicBuffersPerPipelineLayout. The CL introduces kMaxOptimalBindingsPerGroup=32 which is used to size the static portion of stack_vecs, but the value is tentative. Bug: dawn:443 Change-Id: I08e06bed6531bed8b4365f36cf2fc0579ac5f180 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/23502 Commit-Queue: Austin Eng <enga@chromium.org> Reviewed-by: Stephen White <senorblanco@chromium.org>
2025-10-25 19:20:30 +00:00 · 2020-07-14 22:20:35 +00:00 · 2020-07-14 22:20:35 +00:00 · b31f5e717e
commit b31f5e717e
parent 8b441db771
25 changed files with 257 additions and 106 deletions
--- a/src/common/Constants.h
+++ b/src/common/Constants.h
@ -18,8 +18,6 @@
 #include <cstdint>

 static constexpr uint32_t kMaxBindGroups = 4u;
-// TODO(cwallez@chromium.org): investigate bindgroup limits
-static constexpr uint32_t kMaxBindingsPerGroup = 24u;
 static constexpr uint32_t kMaxVertexAttributes = 16u;
 // Vulkan has a standalone limit named maxVertexInputAttributeOffset (2047u at least) for vertex
 // attribute offset. The limit might be meaningless because Vulkan has another limit named
--- a/src/dawn_native/BindGroup.cpp
+++ b/src/dawn_native/BindGroup.cpp
@ -178,8 +178,9 @@ namespace dawn_native {
        }

        const BindGroupLayoutBase::BindingMap& bindingMap = descriptor->layout->GetBindingMap();
+        ASSERT(bindingMap.size() <= kMaxBindingsPerPipelineLayout);

-        ityp::bitset<BindingIndex, kMaxBindingsPerGroup> bindingsSet;
+        ityp::bitset<BindingIndex, kMaxBindingsPerPipelineLayout> bindingsSet;
        for (uint32_t i = 0; i < descriptor->entryCount; ++i) {
            const BindGroupEntry& entry = descriptor->entries[i];

--- a/src/dawn_native/BindGroupAndStorageBarrierTracker.h
+++ b/src/dawn_native/BindGroupAndStorageBarrierTracker.h
@ -16,6 +16,7 @@
 #define DAWNNATIVE_BINDGROUPANDSTORAGEBARRIERTRACKER_H_

 #include "common/ityp_bitset.h"
+#include "common/ityp_stack_vec.h"
 #include "dawn_native/BindGroup.h"
 #include "dawn_native/BindGroupTracker.h"
 #include "dawn_native/Buffer.h"
@ -39,11 +40,12 @@ namespace dawn_native {
            ASSERT(index < kMaxBindGroupsTyped);

            if (this->mBindGroups[index] != bindGroup) {
-                mBindings[index] = {};
-                mBindingsNeedingBarrier[index] = {};
-
                const BindGroupLayoutBase* layout = bindGroup->GetLayout();

+                mBindings[index].resize(layout->GetBindingCount());
+                mBindingTypes[index].resize(layout->GetBindingCount());
+                mBindingsNeedingBarrier[index] = {};
+
                for (BindingIndex bindingIndex{0}; bindingIndex < layout->GetBindingCount();
                     ++bindingIndex) {
                    const BindingInfo& bindingInfo = layout->GetBindingInfo(bindingIndex);
@ -91,15 +93,16 @@ namespace dawn_native {
        }

      protected:
-        ityp::
-            array<BindGroupIndex, ityp::bitset<BindingIndex, kMaxBindingsPerGroup>, kMaxBindGroups>
-                mBindingsNeedingBarrier = {};
        ityp::array<BindGroupIndex,
-                    ityp::array<BindingIndex, wgpu::BindingType, kMaxBindingsPerGroup>,
+                    ityp::bitset<BindingIndex, kMaxBindingsPerPipelineLayout>,
+                    kMaxBindGroups>
+            mBindingsNeedingBarrier = {};
+        ityp::array<BindGroupIndex,
+                    ityp::stack_vec<BindingIndex, wgpu::BindingType, kMaxOptimalBindingsPerGroup>,
                    kMaxBindGroups>
            mBindingTypes = {};
        ityp::array<BindGroupIndex,
-                    ityp::array<BindingIndex, ObjectBase*, kMaxBindingsPerGroup>,
+                    ityp::stack_vec<BindingIndex, ObjectBase*, kMaxOptimalBindingsPerGroup>,
                    kMaxBindGroups>
            mBindings = {};
    };
--- a/src/dawn_native/BindGroupLayout.cpp
+++ b/src/dawn_native/BindGroupLayout.cpp
@ -258,10 +258,6 @@ namespace dawn_native {
            bindingsSet.insert(bindingNumber);
        }

-        if (bindingsSet.size() > kMaxBindingsPerGroup) {
-            return DAWN_VALIDATION_ERROR("The number of bindings exceeds kMaxBindingsPerGroup.");
-        }
-
        DAWN_TRY(ValidateBindingCounts(bindingCounts));

        return {};
@ -358,8 +354,6 @@ namespace dawn_native {
        // This is a utility function to help ASSERT that the BGL-binding comparator places buffers
        // first.
        bool CheckBufferBindingsFirst(ityp::span<BindingIndex, const BindingInfo> bindings) {
-            ASSERT(bindings.size() <= BindingIndex(kMaxBindingsPerGroup));
-
            BindingIndex lastBufferIndex{0};
            BindingIndex firstNonBufferIndex = std::numeric_limits<BindingIndex>::max();
            for (BindingIndex i{0}; i < bindings.size(); ++i) {
@ -381,13 +375,12 @@ namespace dawn_native {

    BindGroupLayoutBase::BindGroupLayoutBase(DeviceBase* device,
                                             const BindGroupLayoutDescriptor* descriptor)
-        : CachedObject(device) {
+        : CachedObject(device), mBindingInfo(BindingIndex(descriptor->entryCount)) {
        std::vector<BindGroupLayoutEntry> sortedBindings(
            descriptor->entries, descriptor->entries + descriptor->entryCount);
-
        std::sort(sortedBindings.begin(), sortedBindings.end(), SortBindingsCompare);

-        for (BindingIndex i{0}; i < BindingIndex(descriptor->entryCount); ++i) {
+        for (BindingIndex i{0}; i < mBindingInfo.size(); ++i) {
            const BindGroupLayoutEntry& binding = sortedBindings[static_cast<uint32_t>(i)];
            mBindingInfo[i].binding = BindingNumber(binding.binding);
            mBindingInfo[i].type = binding.type;
@ -416,6 +409,7 @@ namespace dawn_native {
            ASSERT(it.second);
        }
        ASSERT(CheckBufferBindingsFirst({mBindingInfo.data(), GetBindingCount()}));
+        ASSERT(mBindingInfo.size() <= kMaxBindingsPerPipelineLayoutTyped);
    }

    BindGroupLayoutBase::BindGroupLayoutBase(DeviceBase* device, ObjectBase::ErrorTag tag)
@ -471,7 +465,7 @@ namespace dawn_native {
    }

    BindingIndex BindGroupLayoutBase::GetBindingCount() const {
-        return BindingIndex(mBindingCounts.totalCount);
+        return mBindingInfo.size();
    }

    BindingIndex BindGroupLayoutBase::GetBufferCount() const {
--- a/src/dawn_native/BindGroupLayout.h
+++ b/src/dawn_native/BindGroupLayout.h
@ -18,8 +18,8 @@
 #include "common/Constants.h"
 #include "common/Math.h"
 #include "common/SlabAllocator.h"
-#include "common/ityp_array.h"
 #include "common/ityp_span.h"
+#include "common/ityp_vector.h"
 #include "dawn_native/BindingInfo.h"
 #include "dawn_native/CachedObject.h"
 #include "dawn_native/Error.h"
@ -64,7 +64,7 @@ namespace dawn_native {

        const BindingInfo& GetBindingInfo(BindingIndex bindingIndex) const {
            ASSERT(!IsError());
-            ASSERT(bindingIndex < BindingIndex(kMaxBindingsPerGroup));
+            ASSERT(bindingIndex < mBindingInfo.size());
            return mBindingInfo[bindingIndex];
        }
        const BindingMap& GetBindingMap() const;
@ -124,7 +124,7 @@ namespace dawn_native {
        BindGroupLayoutBase(DeviceBase* device, ObjectBase::ErrorTag tag);

        BindingCounts mBindingCounts = {};
-        ityp::array<BindingIndex, BindingInfo, kMaxBindingsPerGroup> mBindingInfo;
+        ityp::vector<BindingIndex, BindingInfo> mBindingInfo;

        // Map from BindGroupLayoutEntry.binding to packed indices.
        BindingMap mBindingMap;
--- a/src/dawn_native/BindGroupTracker.h
+++ b/src/dawn_native/BindGroupTracker.h
@ -103,7 +103,9 @@ namespace dawn_native {
        BindGroupLayoutMask mBindGroupLayoutsMask = 0;
        ityp::array<BindGroupIndex, BindGroupBase*, kMaxBindGroups> mBindGroups = {};
        ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mDynamicOffsetCounts = {};
-        ityp::array<BindGroupIndex, std::array<DynamicOffset, kMaxBindingsPerGroup>, kMaxBindGroups>
+        ityp::array<BindGroupIndex,
+                    std::array<DynamicOffset, kMaxDynamicBuffersPerPipelineLayout>,
+                    kMaxBindGroups>
            mDynamicOffsets = {};

        // |mPipelineLayout| is the current pipeline layout set on the command buffer.
--- a/src/dawn_native/BindingInfo.h
+++ b/src/dawn_native/BindingInfo.h
@ -36,9 +36,28 @@ namespace dawn_native {

    using BindGroupIndex = TypedInteger<struct BindGroupIndexT, uint32_t>;

-    static constexpr BindingIndex kMaxBindingsPerGroupTyped = BindingIndex(kMaxBindingsPerGroup);
    static constexpr BindGroupIndex kMaxBindGroupsTyped = BindGroupIndex(kMaxBindGroups);

+    // Not a real WebGPU limit, but the sum of the two limits is useful for internal optimizations.
+    static constexpr uint32_t kMaxDynamicBuffersPerPipelineLayout =
+        kMaxDynamicUniformBuffersPerPipelineLayout + kMaxDynamicStorageBuffersPerPipelineLayout;
+
+    static constexpr BindingIndex kMaxDynamicBuffersPerPipelineLayoutTyped =
+        BindingIndex(kMaxDynamicBuffersPerPipelineLayout);
+
+    // Not a real WebGPU limit, but used to optimize parts of Dawn which expect valid usage of the
+    // API. There should never be more bindings than the max per stage, for each stage.
+    static constexpr uint32_t kMaxBindingsPerPipelineLayout =
+        3 * (kMaxSampledTexturesPerShaderStage + kMaxSamplersPerShaderStage +
+             kMaxStorageBuffersPerShaderStage + kMaxStorageTexturesPerShaderStage +
+             kMaxUniformBuffersPerShaderStage);
+
+    static constexpr BindingIndex kMaxBindingsPerPipelineLayoutTyped =
+        BindingIndex(kMaxBindingsPerPipelineLayout);
+
+    // TODO(enga): Figure out a good number for this.
+    static constexpr uint32_t kMaxOptimalBindingsPerGroup = 32;
+
    struct BindingInfo {
        BindingNumber binding;
        wgpu::ShaderStage visibility;
--- a/src/dawn_native/PipelineLayout.cpp
+++ b/src/dawn_native/PipelineLayout.cpp
@ -17,6 +17,7 @@
 #include "common/Assert.h"
 #include "common/BitSetIterator.h"
 #include "common/HashUtils.h"
+#include "common/ityp_stack_vec.h"
 #include "dawn_native/BindGroupLayout.h"
 #include "dawn_native/Device.h"
 #include "dawn_native/ShaderModule.h"
@ -118,9 +119,10 @@ namespace dawn_native {
        ASSERT(count > 0);

        // Data which BindGroupLayoutDescriptor will point to for creation
-        ityp::array<BindGroupIndex,
-                    ityp::array<BindingIndex, BindGroupLayoutEntry, kMaxBindingsPerGroup>,
-                    kMaxBindGroups>
+        ityp::array<
+            BindGroupIndex,
+            ityp::stack_vec<BindingIndex, BindGroupLayoutEntry, kMaxOptimalBindingsPerGroup>,
+            kMaxBindGroups>
            entryData = {};

        // A map of bindings to the index in |entryData|
@ -194,6 +196,7 @@ namespace dawn_native {

                    IncrementBindingCounts(&bindingCounts, bindingSlot);
                    BindingIndex currentBindingCount = entryCounts[group];
+                    entryData[group].resize(currentBindingCount + BindingIndex(1));
                    entryData[group][currentBindingCount] = bindingSlot;

                    usedBindingsMap[group][bindingNumber] = currentBindingCount;
--- a/src/dawn_native/d3d12/BindGroupLayoutD3D12.cpp
+++ b/src/dawn_native/d3d12/BindGroupLayoutD3D12.cpp
@ -46,6 +46,7 @@ namespace dawn_native { namespace d3d12 {

    BindGroupLayout::BindGroupLayout(Device* device, const BindGroupLayoutDescriptor* descriptor)
        : BindGroupLayoutBase(device, descriptor),
+          mBindingOffsets(GetBindingCount()),
          mDescriptorCounts{},
          mBindGroupAllocator(MakeFrontendBindGroupAllocator<BindGroup>(4096)) {
        for (BindingIndex bindingIndex = GetDynamicBufferCount(); bindingIndex < GetBindingCount();
@ -170,9 +171,8 @@ namespace dawn_native { namespace d3d12 {
        mBindGroupAllocator.Deallocate(bindGroup);
    }

-    const ityp::array<BindingIndex, uint32_t, kMaxBindingsPerGroup>&
-    BindGroupLayout::GetBindingOffsets() const {
-        return mBindingOffsets;
+    ityp::span<BindingIndex, const uint32_t> BindGroupLayout::GetBindingOffsets() const {
+        return {mBindingOffsets.data(), mBindingOffsets.size()};
    }

    uint32_t BindGroupLayout::GetCbvUavSrvDescriptorTableSize() const {
--- a/src/dawn_native/d3d12/BindGroupLayoutD3D12.h
+++ b/src/dawn_native/d3d12/BindGroupLayoutD3D12.h
@ -18,6 +18,7 @@
 #include "dawn_native/BindGroupLayout.h"

 #include "common/SlabAllocator.h"
+#include "common/ityp_stack_vec.h"
 #include "dawn_native/d3d12/d3d12_platform.h"

 namespace dawn_native { namespace d3d12 {
@ -44,7 +45,7 @@ namespace dawn_native { namespace d3d12 {
            Count,
        };

-        const ityp::array<BindingIndex, uint32_t, kMaxBindingsPerGroup>& GetBindingOffsets() const;
+        ityp::span<BindingIndex, const uint32_t> GetBindingOffsets() const;
        uint32_t GetCbvUavSrvDescriptorTableSize() const;
        uint32_t GetSamplerDescriptorTableSize() const;
        uint32_t GetCbvUavSrvDescriptorCount() const;
@ -54,7 +55,7 @@ namespace dawn_native { namespace d3d12 {

      private:
        ~BindGroupLayout() override = default;
-        ityp::array<BindingIndex, uint32_t, kMaxBindingsPerGroup> mBindingOffsets;
+        ityp::stack_vec<BindingIndex, uint32_t, kMaxOptimalBindingsPerGroup> mBindingOffsets;
        std::array<uint32_t, DescriptorType::Count> mDescriptorCounts;
        D3D12_DESCRIPTOR_RANGE mRanges[DescriptorType::Count];

--- a/src/dawn_native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn_native/d3d12/DeviceD3D12.cpp
@ -90,12 +90,14 @@ namespace dawn_native { namespace d3d12 {
        mCommandAllocatorManager = std::make_unique<CommandAllocatorManager>(this);

        // Zero sized allocator is never requested and does not need to exist.
-        for (uint32_t countIndex = 1; countIndex < kNumOfStagingDescriptorAllocators;
-             countIndex++) {
+        for (uint32_t countIndex = 1; countIndex <= kMaxViewDescriptorsPerBindGroup; countIndex++) {
            mViewAllocators[countIndex] = std::make_unique<StagingDescriptorAllocator>(
                this, countIndex, kShaderVisibleDescriptorHeapSize,
                D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+        }

+        for (uint32_t countIndex = 1; countIndex <= kMaxSamplerDescriptorsPerBindGroup;
+             countIndex++) {
            mSamplerAllocators[countIndex] = std::make_unique<StagingDescriptorAllocator>(
                this, countIndex, kShaderVisibleDescriptorHeapSize,
                D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
@ -556,13 +558,13 @@ namespace dawn_native { namespace d3d12 {

    StagingDescriptorAllocator* Device::GetViewStagingDescriptorAllocator(
        uint32_t descriptorCount) const {
-        ASSERT(descriptorCount < kNumOfStagingDescriptorAllocators);
+        ASSERT(descriptorCount <= kMaxViewDescriptorsPerBindGroup);
        return mViewAllocators[descriptorCount].get();
    }

    StagingDescriptorAllocator* Device::GetSamplerStagingDescriptorAllocator(
        uint32_t descriptorCount) const {
-        ASSERT(descriptorCount < kNumOfStagingDescriptorAllocators);
+        ASSERT(descriptorCount <= kMaxSamplerDescriptorsPerBindGroup);
        return mSamplerAllocators[descriptorCount].get();
    }

--- a/src/dawn_native/d3d12/DeviceD3D12.h
+++ b/src/dawn_native/d3d12/DeviceD3D12.h
@ -19,6 +19,7 @@

 #include "common/Constants.h"
 #include "common/SerialQueue.h"
+#include "dawn_native/BindingInfo.h"
 #include "dawn_native/Device.h"
 #include "dawn_native/d3d12/CommandRecordingContext.h"
 #include "dawn_native/d3d12/D3D12Info.h"
@ -192,13 +193,21 @@ namespace dawn_native { namespace d3d12 {
        std::unique_ptr<ResourceAllocatorManager> mResourceAllocatorManager;
        std::unique_ptr<ResidencyManager> mResidencyManager;

-        // Index corresponds to the descriptor count in the range [0, kMaxBindingsPerGroup].
-        static constexpr uint32_t kNumOfStagingDescriptorAllocators = kMaxBindingsPerGroup + 1;
+        // TODO(enga): Consider bucketing these if the count is too many.
+        static constexpr uint32_t kMaxSamplerDescriptorsPerBindGroup =
+            3 * kMaxSamplersPerShaderStage;
+        static constexpr uint32_t kMaxViewDescriptorsPerBindGroup =
+            kMaxBindingsPerPipelineLayout - kMaxSamplerDescriptorsPerBindGroup;

-        std::array<std::unique_ptr<StagingDescriptorAllocator>, kNumOfStagingDescriptorAllocators>
+        // Index corresponds to the descriptor count in the range [0,
+        // kMaxSamplerDescriptorsPerBindGroup].
+        std::array<std::unique_ptr<StagingDescriptorAllocator>,
+                   kMaxSamplerDescriptorsPerBindGroup + 1>
            mViewAllocators;

-        std::array<std::unique_ptr<StagingDescriptorAllocator>, kNumOfStagingDescriptorAllocators>
+        // Index corresponds to the descriptor count in the range [0,
+        // kMaxViewDescriptorsPerBindGroup].
+        std::array<std::unique_ptr<StagingDescriptorAllocator>, kMaxViewDescriptorsPerBindGroup + 1>
            mSamplerAllocators;

        std::unique_ptr<StagingDescriptorAllocator> mRenderTargetViewAllocator;
--- a/src/dawn_native/d3d12/PipelineLayoutD3D12.cpp
+++ b/src/dawn_native/d3d12/PipelineLayoutD3D12.cpp
@ -194,7 +194,7 @@ namespace dawn_native { namespace d3d12 {
    uint32_t PipelineLayout::GetDynamicRootParameterIndex(BindGroupIndex group,
                                                          BindingIndex bindingIndex) const {
        ASSERT(group < kMaxBindGroupsTyped);
-        ASSERT(bindingIndex < kMaxBindingsPerGroupTyped);
+        ASSERT(bindingIndex < kMaxDynamicBuffersPerPipelineLayoutTyped);
        ASSERT(GetBindGroupLayout(group)->GetBindingInfo(bindingIndex).hasDynamicOffset);
        ASSERT(GetBindGroupLayout(group)->GetBindingInfo(bindingIndex).visibility !=
               wgpu::ShaderStage::None);
--- a/src/dawn_native/d3d12/PipelineLayoutD3D12.h
+++ b/src/dawn_native/d3d12/PipelineLayoutD3D12.h
@ -45,7 +45,7 @@ namespace dawn_native { namespace d3d12 {
        ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mCbvUavSrvRootParameterInfo;
        ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups> mSamplerRootParameterInfo;
        ityp::array<BindGroupIndex,
-                    ityp::array<BindingIndex, uint32_t, kMaxBindingsPerGroup>,
+                    ityp::array<BindingIndex, uint32_t, kMaxDynamicBuffersPerPipelineLayout>,
                    kMaxBindGroups>
            mDynamicRootParameterIndices;
        ComPtr<ID3D12RootSignature> mRootSignature;
--- a/src/dawn_native/metal/PipelineLayoutMTL.h
+++ b/src/dawn_native/metal/PipelineLayoutMTL.h
@ -15,7 +15,7 @@
 #ifndef DAWNNATIVE_METAL_PIPELINELAYOUTMTL_H_
 #define DAWNNATIVE_METAL_PIPELINELAYOUTMTL_H_

-#include "common/ityp_array.h"
+#include "common/ityp_stack_vec.h"
 #include "dawn_native/BindingInfo.h"
 #include "dawn_native/PipelineLayout.h"

@ -44,7 +44,7 @@ namespace dawn_native { namespace metal {

        using BindingIndexInfo =
            ityp::array<BindGroupIndex,
-                        ityp::array<BindingIndex, uint32_t, kMaxBindingsPerGroup>,
+                        ityp::stack_vec<BindingIndex, uint32_t, kMaxOptimalBindingsPerGroup>,
                        kMaxBindGroups>;
        const BindingIndexInfo& GetBindingIndexInfo(SingleShaderStage stage) const;

--- a/src/dawn_native/metal/PipelineLayoutMTL.mm
+++ b/src/dawn_native/metal/PipelineLayoutMTL.mm
@ -29,6 +29,8 @@ namespace dawn_native { namespace metal {
            uint32_t textureIndex = 0;

            for (BindGroupIndex group : IterateBitSet(GetBindGroupLayoutsMask())) {
+                mIndexInfo[stage][group].resize(GetBindGroupLayout(group)->GetBindingCount());
+
                for (BindingIndex bindingIndex{0};
                     bindingIndex < GetBindGroupLayout(group)->GetBindingCount(); ++bindingIndex) {
                    const BindingInfo& bindingInfo =
--- a/src/dawn_native/opengl/PipelineGL.h
+++ b/src/dawn_native/opengl/PipelineGL.h
@ -36,10 +36,6 @@ namespace dawn_native { namespace opengl {
                        const PipelineLayout* layout,
                        const PerStage<const ShaderModule*>& modules);

-        using BindingLocations = ityp::array<BindGroupIndex,
-                                             ityp::array<BindingIndex, GLint, kMaxBindingsPerGroup>,
-                                             kMaxBindGroups>;
-
        // For each unit a sampler is bound to we need to know if we should use filtering or not
        // because int and uint texture are only complete without filtering.
        struct SamplerUnit {
--- a/src/dawn_native/opengl/PipelineLayoutGL.cpp
+++ b/src/dawn_native/opengl/PipelineLayoutGL.cpp
@ -30,6 +30,7 @@ namespace dawn_native { namespace opengl {

        for (BindGroupIndex group : IterateBitSet(GetBindGroupLayoutsMask())) {
            const BindGroupLayoutBase* bgl = GetBindGroupLayout(group);
+            mIndexInfo[group].resize(bgl->GetBindingCount());

            for (BindingIndex bindingIndex{0}; bindingIndex < bgl->GetBindingCount();
                 ++bindingIndex) {
--- a/src/dawn_native/opengl/PipelineLayoutGL.h
+++ b/src/dawn_native/opengl/PipelineLayoutGL.h
@ -18,6 +18,7 @@
 #include "dawn_native/PipelineLayout.h"

 #include "common/ityp_array.h"
+#include "common/ityp_vector.h"
 #include "dawn_native/BindingInfo.h"
 #include "dawn_native/opengl/opengl_platform.h"

@ -30,9 +31,7 @@ namespace dawn_native { namespace opengl {
        PipelineLayout(Device* device, const PipelineLayoutDescriptor* descriptor);

        using BindingIndexInfo =
-            ityp::array<BindGroupIndex,
-                        ityp::array<BindingIndex, GLuint, kMaxBindingsPerGroup>,
-                        kMaxBindGroups>;
+            ityp::array<BindGroupIndex, ityp::vector<BindingIndex, GLuint>, kMaxBindGroups>;
        const BindingIndexInfo& GetBindingIndexInfo() const;

        GLuint GetTextureUnitsUsed() const;
--- a/src/dawn_native/vulkan/BindGroupLayoutVk.cpp
+++ b/src/dawn_native/vulkan/BindGroupLayoutVk.cpp
@ -15,6 +15,7 @@
 #include "dawn_native/vulkan/BindGroupLayoutVk.h"

 #include "common/BitSetIterator.h"
+#include "common/ityp_vector.h"
 #include "dawn_native/vulkan/BindGroupVk.h"
 #include "dawn_native/vulkan/DescriptorSetAllocator.h"
 #include "dawn_native/vulkan/DeviceVk.h"
@ -85,29 +86,30 @@ namespace dawn_native { namespace vulkan {
        // Compute the bindings that will be chained in the DescriptorSetLayout create info. We add
        // one entry per binding set. This might be optimized by computing continuous ranges of
        // bindings of the same type.
-        uint32_t numBindings = 0;
-        std::array<VkDescriptorSetLayoutBinding, kMaxBindingsPerGroup> bindings;
+        ityp::vector<BindingIndex, VkDescriptorSetLayoutBinding> bindings;
+        bindings.reserve(GetBindingCount());
+
        for (const auto& it : GetBindingMap()) {
            BindingNumber bindingNumber = it.first;
            BindingIndex bindingIndex = it.second;
            const BindingInfo& bindingInfo = GetBindingInfo(bindingIndex);

-            VkDescriptorSetLayoutBinding* vkBinding = &bindings[numBindings];
-            vkBinding->binding = static_cast<uint32_t>(bindingNumber);
-            vkBinding->descriptorType =
+            VkDescriptorSetLayoutBinding vkBinding;
+            vkBinding.binding = static_cast<uint32_t>(bindingNumber);
+            vkBinding.descriptorType =
                VulkanDescriptorType(bindingInfo.type, bindingInfo.hasDynamicOffset);
-            vkBinding->descriptorCount = 1;
-            vkBinding->stageFlags = VulkanShaderStageFlags(bindingInfo.visibility);
-            vkBinding->pImmutableSamplers = nullptr;
+            vkBinding.descriptorCount = 1;
+            vkBinding.stageFlags = VulkanShaderStageFlags(bindingInfo.visibility);
+            vkBinding.pImmutableSamplers = nullptr;

-            numBindings++;
+            bindings.emplace_back(vkBinding);
        }

        VkDescriptorSetLayoutCreateInfo createInfo;
        createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
        createInfo.pNext = nullptr;
        createInfo.flags = 0;
-        createInfo.bindingCount = numBindings;
+        createInfo.bindingCount = static_cast<uint32_t>(bindings.size());
        createInfo.pBindings = bindings.data();

        Device* device = ToBackend(GetDevice());
--- a/src/dawn_native/vulkan/BindGroupVk.cpp
+++ b/src/dawn_native/vulkan/BindGroupVk.cpp
@ -15,6 +15,7 @@
 #include "dawn_native/vulkan/BindGroupVk.h"

 #include "common/BitSetIterator.h"
+#include "common/ityp_stack_vec.h"
 #include "dawn_native/vulkan/BindGroupLayoutVk.h"
 #include "dawn_native/vulkan/BufferVk.h"
 #include "dawn_native/vulkan/DeviceVk.h"
@ -38,11 +39,15 @@ namespace dawn_native { namespace vulkan {
          mDescriptorSetAllocation(descriptorSetAllocation) {
        // Now do a write of a single descriptor set with all possible chained data allocated on the
        // stack.
-        uint32_t numWrites = 0;
-        std::array<VkWriteDescriptorSet, kMaxBindingsPerGroup> writes;
-        std::array<VkDescriptorBufferInfo, kMaxBindingsPerGroup> writeBufferInfo;
-        std::array<VkDescriptorImageInfo, kMaxBindingsPerGroup> writeImageInfo;
+        const uint32_t bindingCount = static_cast<uint32_t>((GetLayout()->GetBindingCount()));
+        ityp::stack_vec<uint32_t, VkWriteDescriptorSet, kMaxOptimalBindingsPerGroup> writes(
+            bindingCount);
+        ityp::stack_vec<uint32_t, VkDescriptorBufferInfo, kMaxOptimalBindingsPerGroup>
+            writeBufferInfo(bindingCount);
+        ityp::stack_vec<uint32_t, VkDescriptorImageInfo, kMaxOptimalBindingsPerGroup>
+            writeImageInfo(bindingCount);

+        uint32_t numWrites = 0;
        for (const auto& it : GetLayout()->GetBindingMap()) {
            BindingNumber bindingNumber = it.first;
            BindingIndex bindingIndex = it.second;
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@ -104,7 +104,7 @@ namespace dawn_native { namespace vulkan {
            const ityp::array<BindGroupIndex, BindGroupBase*, kMaxBindGroups>& bindGroups,
            const ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups>& dynamicOffsetCounts,
            const ityp::array<BindGroupIndex,
-                              std::array<uint32_t, kMaxBindingsPerGroup>,
+                              std::array<uint32_t, kMaxDynamicBuffersPerPipelineLayout>,
                              kMaxBindGroups>& dynamicOffsets) {
            for (BindGroupIndex dirtyIndex : IterateBitSet(bindGroupsToApply)) {
                VkDescriptorSet set = ToBackend(bindGroups[dirtyIndex])->GetHandle();
--- a/src/dawn_native/vulkan/DescriptorSetAllocator.cpp
+++ b/src/dawn_native/vulkan/DescriptorSetAllocator.cpp
@ -38,7 +38,6 @@ namespace dawn_native { namespace vulkan {
            totalDescriptorCount += it.second;
            mPoolSizes.push_back(VkDescriptorPoolSize{it.first, it.second});
        }
-        ASSERT(totalDescriptorCount <= kMaxBindingsPerGroup);

        if (totalDescriptorCount == 0) {
            // Vulkan requires that valid usage of vkCreateDescriptorPool must have a non-zero
@ -49,6 +48,9 @@ namespace dawn_native { namespace vulkan {
            mPoolSizes.push_back(VkDescriptorPoolSize{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1});
            mMaxSets = kMaxDescriptorsPerPool;
        } else {
+            ASSERT(totalDescriptorCount <= kMaxBindingsPerPipelineLayout);
+            static_assert(kMaxBindingsPerPipelineLayout <= kMaxDescriptorsPerPool, "");
+
            // Compute the total number of descriptors sets that fits given the max.
            mMaxSets = kMaxDescriptorsPerPool / totalDescriptorCount;
            ASSERT(mMaxSets > 0);
--- a/src/tests/end2end/BindGroupTests.cpp
+++ b/src/tests/end2end/BindGroupTests.cpp
@ -1091,6 +1091,149 @@ TEST_P(BindGroupTests, ReadonlyStorage) {
    EXPECT_PIXEL_RGBA8_EQ(RGBA8::kGreen, renderPass.color, 0, 0);
 }

+// Test that creating a large bind group, with each binding type at the max count, works and can be
+// used correctly. The test loads a different value from each binding, and writes 1 to a storage
+// buffer if all values are correct.
+TEST_P(BindGroupTests, ReallyLargeBindGroup) {
+    // When we run dawn_end2end_tests with "--use-spvc-parser", extracting the binding type of a
+    // read-only image will always return shaderc_spvc_binding_type_writeonly_storage_texture.
+    // TODO(jiawei.shao@intel.com): enable this test when we specify "--use-spvc-parser" after the
+    // bug in spvc parser is fixed.
+    DAWN_SKIP_TEST_IF(IsD3D12() && IsSpvcParserBeingUsed());
+
+    std::string interface = "#version 450\n";
+    std::string body;
+    uint32_t binding = 0;
+    uint32_t expectedValue = 42;
+
+    wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+
+    auto CreateTextureWithRedData = [&](uint32_t value, wgpu::TextureUsage usage) {
+        wgpu::TextureDescriptor textureDesc = {};
+        textureDesc.usage = wgpu::TextureUsage::CopyDst | usage;
+        textureDesc.size = {1, 1, 1};
+        textureDesc.format = wgpu::TextureFormat::R32Uint;
+        wgpu::Texture texture = device.CreateTexture(&textureDesc);
+
+        wgpu::Buffer textureData =
+            utils::CreateBufferFromData(device, wgpu::BufferUsage::CopySrc, {expectedValue});
+        wgpu::BufferCopyView bufferCopyView = {};
+        bufferCopyView.buffer = textureData;
+        bufferCopyView.bytesPerRow = 256;
+
+        wgpu::TextureCopyView textureCopyView = {};
+        textureCopyView.texture = texture;
+
+        wgpu::Extent3D copySize = {1, 1, 1};
+
+        commandEncoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copySize);
+        return texture;
+    };
+
+    std::vector<wgpu::BindGroupEntry> bgEntries;
+    static_assert(kMaxSampledTexturesPerShaderStage == kMaxSamplersPerShaderStage,
+                  "Please update this test");
+    body += "result = 0;\n";
+    for (uint32_t i = 0; i < kMaxSampledTexturesPerShaderStage; ++i) {
+        wgpu::Texture texture =
+            CreateTextureWithRedData(expectedValue, wgpu::TextureUsage::Sampled);
+        bgEntries.push_back({binding, nullptr, 0, 0, nullptr, texture.CreateView()});
+
+        interface += "layout(set = 0, binding = " + std::to_string(binding++) +
+                     ") uniform utexture2D tex" + std::to_string(i) + ";\n";
+
+        wgpu::SamplerDescriptor samplerDesc = {};
+        bgEntries.push_back({binding, nullptr, 0, 0, device.CreateSampler(&samplerDesc), nullptr});
+
+        interface += "layout(set = 0, binding = " + std::to_string(binding++) +
+                     ") uniform sampler samp" + std::to_string(i) + ";\n";
+
+        body += "if (texelFetch(usampler2D(tex" + std::to_string(i) + ", samp" + std::to_string(i) +
+                "), ivec2(0, 0), 0).r != " + std::to_string(expectedValue++) + ") {\n";
+        body += "    return;\n";
+        body += "}\n";
+    }
+    for (uint32_t i = 0; i < kMaxStorageTexturesPerShaderStage; ++i) {
+        wgpu::Texture texture =
+            CreateTextureWithRedData(expectedValue, wgpu::TextureUsage::Storage);
+        bgEntries.push_back({binding, nullptr, 0, 0, nullptr, texture.CreateView()});
+
+        interface += "layout(set = 0, binding = " + std::to_string(binding++) +
+                     ", r32ui) uniform readonly uimage2D image" + std::to_string(i) + ";\n";
+
+        body += "if (imageLoad(image" + std::to_string(i) +
+                ", ivec2(0, 0)).r != " + std::to_string(expectedValue++) + ") {\n";
+        body += "    return;\n";
+        body += "}\n";
+    }
+    for (uint32_t i = 0; i < kMaxUniformBuffersPerShaderStage; ++i) {
+        wgpu::Buffer buffer = utils::CreateBufferFromData<uint32_t>(
+            device, wgpu::BufferUsage::Uniform, {expectedValue, 0, 0, 0});
+        bgEntries.push_back({binding, buffer, 0, 4 * sizeof(uint32_t), nullptr, nullptr});
+
+        interface += "layout(std140, set = 0, binding = " + std::to_string(binding++) +
+                     ") uniform UBuf" + std::to_string(i) + " {\n";
+        interface += "    uint ubuf" + std::to_string(i) + ";\n";
+        interface += "};\n";
+
+        body += "if (ubuf" + std::to_string(i) + " != " + std::to_string(expectedValue++) + ") {\n";
+        body += "    return;\n";
+        body += "}\n";
+    }
+    // Save one storage buffer for writing the result
+    for (uint32_t i = 0; i < kMaxStorageBuffersPerShaderStage - 1; ++i) {
+        wgpu::Buffer buffer = utils::CreateBufferFromData<uint32_t>(
+            device, wgpu::BufferUsage::Storage, {expectedValue});
+        bgEntries.push_back({binding, buffer, 0, sizeof(uint32_t), nullptr, nullptr});
+
+        interface += "layout(std430, set = 0, binding = " + std::to_string(binding++) +
+                     ") readonly buffer SBuf" + std::to_string(i) + " {\n";
+        interface += "    uint sbuf" + std::to_string(i) + ";\n";
+        interface += "};\n";
+
+        body += "if (sbuf" + std::to_string(i) + " != " + std::to_string(expectedValue++) + ") {\n";
+        body += "    return;\n";
+        body += "}\n";
+    }
+
+    wgpu::Buffer result = utils::CreateBufferFromData<uint32_t>(
+        device, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc, {0});
+    bgEntries.push_back({binding, result, 0, sizeof(uint32_t), nullptr, nullptr});
+
+    interface += "layout(std430, set = 0, binding = " + std::to_string(binding++) +
+                 ") writeonly buffer Result {\n";
+    interface += "    uint result;\n";
+    interface += "};\n";
+
+    body += "result = 1;\n";
+
+    std::string shader = interface + "void main() {\n" + body + "}\n";
+
+    wgpu::ComputePipelineDescriptor cpDesc;
+    cpDesc.computeStage.module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, shader.c_str());
+    cpDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline cp = device.CreateComputePipeline(&cpDesc);
+
+    wgpu::BindGroupDescriptor bgDesc = {};
+    bgDesc.layout = cp.GetBindGroupLayout(0);
+    bgDesc.entryCount = static_cast<uint32_t>(bgEntries.size());
+    bgDesc.entries = bgEntries.data();
+
+    wgpu::BindGroup bg = device.CreateBindGroup(&bgDesc);
+
+    wgpu::ComputePassEncoder pass = commandEncoder.BeginComputePass();
+    pass.SetPipeline(cp);
+    pass.SetBindGroup(0, bg);
+    pass.Dispatch(1, 1, 1);
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = commandEncoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_EQ(1, result, 0);
+}
+
 DAWN_INSTANTIATE_TEST(BindGroupTests,
                      D3D12Backend(),
                      MetalBackend(),
--- a/src/tests/unittests/validation/BindGroupValidationTests.cpp
+++ b/src/tests/unittests/validation/BindGroupValidationTests.cpp
@ -537,43 +537,12 @@ TEST_F(BindGroupLayoutValidationTest, BindGroupLayoutStorageBindingsInVertexShad
        device, {{0, wgpu::ShaderStage::Fragment, wgpu::BindingType::ReadonlyStorageBuffer}});
 }

-// Tests setting that bind group layout bindings numbers may be >= kMaxBindingsPerGroup.
-TEST_F(BindGroupLayoutValidationTest, BindGroupLayoutEntryUnbounded) {
-    // Checks that kMaxBindingsPerGroup is valid.
-    utils::MakeBindGroupLayout(device, {{kMaxBindingsPerGroup, wgpu::ShaderStage::Vertex,
-                                         wgpu::BindingType::UniformBuffer}});
-
-    // Checks that kMaxBindingsPerGroup + 1 is valid.
-    utils::MakeBindGroupLayout(device, {{kMaxBindingsPerGroup + 1, wgpu::ShaderStage::Vertex,
-                                         wgpu::BindingType::UniformBuffer}});
-}
-
-// Test that there can't be more than kMaxBindingPerGroup bindings per group
-TEST_F(BindGroupLayoutValidationTest, BindGroupLayoutMaxBindings) {
-    wgpu::BindGroupLayoutEntry entries[kMaxBindingsPerGroup + 1];
-
-    wgpu::BindingType bindingsTypes[3] = {wgpu::BindingType::UniformBuffer,
-                                          wgpu::BindingType::SampledTexture,
-                                          wgpu::BindingType::Sampler};
-    for (uint32_t i = 0; i < kMaxBindingsPerGroup + 1; i++) {
-        // Alternate between uniform/sampled tex/sampler to avoid per-stage limits.
-        // Note: This is a temporary test and will be removed once the kMaxBindingsPerGroup
-        // limit is lifted.
-        entries[i].type = bindingsTypes[i % 3];
-        entries[i].binding = i;
-        entries[i].visibility = wgpu::ShaderStage::Compute;
-    }
-
-    wgpu::BindGroupLayoutDescriptor desc;
-    desc.entries = entries;
-
-    // Control case: kMaxBindingsPerGroup bindings is allowed.
-    desc.entryCount = kMaxBindingsPerGroup;
-    device.CreateBindGroupLayout(&desc);
-
-    // Error case: kMaxBindingsPerGroup + 1 bindings is not allowed.
-    desc.entryCount = kMaxBindingsPerGroup + 1;
-    ASSERT_DEVICE_ERROR(device.CreateBindGroupLayout(&desc));
+// Tests setting that bind group layout bindings numbers may be very large.
+TEST_F(BindGroupLayoutValidationTest, BindGroupLayoutEntryNumberLarge) {
+    // Checks that uint32_t max is valid.
+    utils::MakeBindGroupLayout(device,
+                               {{std::numeric_limits<uint32_t>::max(), wgpu::ShaderStage::Vertex,
+                                 wgpu::BindingType::UniformBuffer}});
 }

 // This test verifies that the BindGroupLayout bindings are correctly validated, even if the