Update maxFragmentCombinedOutputResources on the different backends.

- Adds an e2e test to test writing to outputs when at the limit. Bug: dawn:1665 Change-Id: I2b2b9c2d700be0e454dc945ed8e3e1fe6b191974 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/122801 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Loko Kung <lokokung@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
2025-12-16 08:27:05 +00:00 · 2023-03-08 22:38:40 +00:00
parent 2657b923c9
commit af4ca3891f
4 changed files with 163 additions and 52 deletions
--- a/src/dawn/native/d3d12/AdapterD3D12.cpp
+++ b/src/dawn/native/d3d12/AdapterD3D12.cpp
@@ -262,6 +262,9 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
    limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage;

    limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
+    limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
+                                                    limits->v1.maxStorageBuffersPerShaderStage +
+                                                    limits->v1.maxStorageTexturesPerShaderStage;

    // https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
    // In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2.
--- a/src/dawn/native/metal/BackendMTL.mm
+++ b/src/dawn/native/metal/BackendMTL.mm
@@ -706,6 +706,10 @@ class Adapter : public AdapterBase {
            limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
        }

+        limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
+                                                        limits->v1.maxStorageBuffersPerShaderStage +
+                                                        limits->v1.maxStorageTexturesPerShaderStage;
+
        limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;

        // Metal limits are per-function, so the layout limits are the same as the stage
--- a/src/dawn/native/vulkan/AdapterVk.cpp
+++ b/src/dawn/native/vulkan/AdapterVk.cpp
@@ -326,6 +326,8 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
                               maxUniformBuffersPerShaderStage);
    CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize);
    CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize);
+    CHECK_AND_SET_V1_MAX_LIMIT(maxFragmentCombinedOutputResources,
+                               maxFragmentCombinedOutputResources);

    CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment);
    CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment);
@@ -382,58 +384,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
        limits->v1.maxBufferSize = kAssumedMaxBufferSize;
    }

-    // Only check maxFragmentCombinedOutputResources on mobile GPUs. Desktop GPUs drivers seem
-    // to put incorrect values for this limit with things like 8 or 16 when they can do bindless
-    // storage buffers. Mesa llvmpipe driver also puts 8 here.
-    uint32_t vendorId = mDeviceInfo.properties.vendorID;
-    if (!gpu_info::IsAMD(vendorId) && !gpu_info::IsIntel(vendorId) && !gpu_info::IsMesa(vendorId) &&
-        !gpu_info::IsNvidia(vendorId)) {
-        if (vkLimits.maxFragmentCombinedOutputResources <
-            kMaxColorAttachments + baseLimits.v1.maxStorageTexturesPerShaderStage +
-                baseLimits.v1.maxStorageBuffersPerShaderStage) {
-            return DAWN_INTERNAL_ERROR(
-                "Insufficient Vulkan maxFragmentCombinedOutputResources limit");
-        }
-
-        uint32_t maxFragmentCombinedOutputResources = kMaxColorAttachments +
-                                                      limits->v1.maxStorageTexturesPerShaderStage +
-                                                      limits->v1.maxStorageBuffersPerShaderStage;
-
-        if (maxFragmentCombinedOutputResources > vkLimits.maxFragmentCombinedOutputResources) {
-            // WebGPU's maxFragmentCombinedOutputResources exceeds the Vulkan limit.
-            // Decrease |maxStorageTexturesPerShaderStage| and |maxStorageBuffersPerShaderStage|
-            // to fit within the Vulkan limit.
-            uint32_t countOverLimit =
-                maxFragmentCombinedOutputResources - vkLimits.maxFragmentCombinedOutputResources;
-
-            uint32_t maxStorageTexturesOverBase = limits->v1.maxStorageTexturesPerShaderStage -
-                                                  baseLimits.v1.maxStorageTexturesPerShaderStage;
-            uint32_t maxStorageBuffersOverBase = limits->v1.maxStorageBuffersPerShaderStage -
-                                                 baseLimits.v1.maxStorageBuffersPerShaderStage;
-
-            // Reduce the number of resources by half the overage count, but clamp to
-            // to ensure we don't go below the base limits.
-            uint32_t numFewerStorageTextures =
-                std::min(countOverLimit / 2, maxStorageTexturesOverBase);
-            uint32_t numFewerStorageBuffers =
-                std::min((countOverLimit + 1) / 2, maxStorageBuffersOverBase);
-
-            if (numFewerStorageTextures == maxStorageTexturesOverBase) {
-                // If |numFewerStorageTextures| was clamped, subtract the remaining
-                // from the storage buffers.
-                numFewerStorageBuffers = countOverLimit - numFewerStorageTextures;
-                ASSERT(numFewerStorageBuffers <= maxStorageBuffersOverBase);
-            } else if (numFewerStorageBuffers == maxStorageBuffersOverBase) {
-                // If |numFewerStorageBuffers| was clamped, subtract the remaining
-                // from the storage textures.
-                numFewerStorageTextures = countOverLimit - numFewerStorageBuffers;
-                ASSERT(numFewerStorageTextures <= maxStorageTexturesOverBase);
-            }
-            limits->v1.maxStorageTexturesPerShaderStage -= numFewerStorageTextures;
-            limits->v1.maxStorageBuffersPerShaderStage -= numFewerStorageBuffers;
-        }
-    }
-
    // Using base limits for:
    // TODO(crbug.com/dawn/1448):
    // - maxInterStageShaderVariables
--- a/src/dawn/tests/end2end/MaxLimitTests.cpp
+++ b/src/dawn/tests/end2end/MaxLimitTests.cpp
@@ -541,6 +541,160 @@ TEST_P(MaxLimitTests, ReallyLargeBindGroup) {
    EXPECT_BUFFER_U32_EQ(1, result, 0);
 }

+// Verifies that devices can write to at least maxFragmentCombinedOutputResources of non color
+// attachment resources.
+TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) {
+    // TODO(dawn:1692) Currently does not work on GL and GLES.
+    DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
+
+    // Compute the number of each resource type (storage buffers and storage textures) such that
+    // there is at least one color attachment, and as many of the buffer/textures as possible,
+    // splitting a shared remaining count between the two resources if they are not separately
+    // defined, or exceed the combined limit.
+    wgpu::Limits limits = GetSupportedLimits().limits;
+    uint32_t attachmentCount = 1;
+    uint32_t storageBuffers = limits.maxStorageBuffersPerShaderStage;
+    uint32_t storageTextures = limits.maxStorageTexturesPerShaderStage;
+    uint32_t maxCombinedResources = limits.maxFragmentCombinedOutputResources;
+    if (uint64_t(storageBuffers) + uint64_t(storageTextures) >= uint64_t(maxCombinedResources)) {
+        storageTextures = std::min(storageTextures, (maxCombinedResources - attachmentCount) / 2);
+        storageBuffers = maxCombinedResources - attachmentCount - storageTextures;
+    }
+    if (maxCombinedResources > attachmentCount + storageBuffers + storageTextures) {
+        // Increase the number of attachments if we still have bandwidth after maximizing the number
+        // of buffers and textures.
+        attachmentCount = std::min(limits.maxColorAttachments,
+                                   maxCombinedResources - storageBuffers - storageTextures);
+    }
+    ASSERT_LE(attachmentCount + storageBuffers + storageTextures, maxCombinedResources);
+
+    // Create a shader to write out to all the resources.
+    auto CreateShader = [&]() -> wgpu::ShaderModule {
+        // Header to declare storage buffer struct.
+        std::ostringstream bufferBindings;
+        std::ostringstream bufferOutputs;
+        for (uint32_t i = 0; i < storageBuffers; i++) {
+            bufferBindings << "@group(0) @binding(" << i << ") var<storage, read_write> b" << i
+                           << ": u32;\n";
+            bufferOutputs << "    b" << i << " = " << i << "u + 1u;\n";
+        }
+
+        std::ostringstream textureBindings;
+        std::ostringstream textureOutputs;
+        for (uint32_t i = 0; i < storageTextures; i++) {
+            textureBindings << "@group(1) @binding(" << i << ") var t" << i
+                            << ": texture_storage_2d<rgba8uint, write>;\n";
+            textureOutputs << "    textureStore(t" << i << ", vec2u(0, 0), vec4u(" << i
+                           << "u + 1u));\n";
+        }
+
+        std::ostringstream targetBindings;
+        std::ostringstream targetOutputs;
+        for (size_t i = 0; i < attachmentCount; i++) {
+            targetBindings << "@location(" << i << ") o" << i << " : u32, ";
+            targetOutputs << i << "u + 1u, ";
+        }
+
+        std::ostringstream fsShader;
+        fsShader << bufferBindings.str();
+        fsShader << textureBindings.str();
+        fsShader << "struct Outputs { " << targetBindings.str() << "}\n";
+        fsShader << "@fragment fn main() -> Outputs {\n";
+        fsShader << bufferOutputs.str();
+        fsShader << textureOutputs.str();
+        fsShader << "    return Outputs(" << targetOutputs.str() << ");\n";
+        fsShader << "}";
+        return utils::CreateShaderModule(device, fsShader.str().c_str());
+    };
+
+    // Constants used for the render pipeline.
+    wgpu::ColorTargetState kColorTargetState = {};
+    kColorTargetState.format = wgpu::TextureFormat::R8Uint;
+
+    // Create the render pipeline.
+    utils::ComboRenderPipelineDescriptor pipelineDesc;
+    pipelineDesc.vertex.module = utils::CreateShaderModule(device, R"(
+        @vertex fn main() -> @builtin(position) vec4f {
+            return vec4f(0.0, 0.0, 0.0, 1.0);
+        })");
+    pipelineDesc.vertex.entryPoint = "main";
+    pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
+    pipelineDesc.cFragment.module = CreateShader();
+    pipelineDesc.cFragment.entryPoint = "main";
+    pipelineDesc.cTargets.fill(kColorTargetState);
+    pipelineDesc.cFragment.targetCount = attachmentCount;
+    wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&pipelineDesc);
+
+    // Create all the resources and bindings for them.
+    std::vector<wgpu::Buffer> buffers;
+    std::vector<wgpu::BindGroupEntry> bufferEntries;
+    wgpu::BufferDescriptor bufferDesc = {};
+    bufferDesc.size = 4;
+    bufferDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
+    for (uint32_t i = 0; i < storageBuffers; i++) {
+        buffers.push_back(device.CreateBuffer(&bufferDesc));
+        bufferEntries.push_back(utils::BindingInitializationHelper(i, buffers[i]).GetAsBinding());
+    }
+    wgpu::BindGroupDescriptor bufferBindGroupDesc = {};
+    bufferBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(0);
+    bufferBindGroupDesc.entryCount = storageBuffers;
+    bufferBindGroupDesc.entries = bufferEntries.data();
+    wgpu::BindGroup bufferBindGroup = device.CreateBindGroup(&bufferBindGroupDesc);
+
+    std::vector<wgpu::Texture> textures;
+    std::vector<wgpu::BindGroupEntry> textureEntries;
+    wgpu::TextureDescriptor textureDesc = {};
+    textureDesc.size.width = 1;
+    textureDesc.size.height = 1;
+    textureDesc.format = wgpu::TextureFormat::RGBA8Uint;
+    textureDesc.usage = wgpu::TextureUsage::StorageBinding | wgpu::TextureUsage::CopySrc;
+    for (uint32_t i = 0; i < storageTextures; i++) {
+        textures.push_back(device.CreateTexture(&textureDesc));
+        textureEntries.push_back(
+            utils::BindingInitializationHelper(i, textures[i].CreateView()).GetAsBinding());
+    }
+    wgpu::BindGroupDescriptor textureBindGroupDesc = {};
+    textureBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(1);
+    textureBindGroupDesc.entryCount = storageTextures;
+    textureBindGroupDesc.entries = textureEntries.data();
+    wgpu::BindGroup textureBindGroup = device.CreateBindGroup(&textureBindGroupDesc);
+
+    std::vector<wgpu::Texture> attachments;
+    std::vector<wgpu::TextureView> attachmentViews;
+    wgpu::TextureDescriptor attachmentDesc = {};
+    attachmentDesc.size = {1, 1};
+    attachmentDesc.format = wgpu::TextureFormat::R8Uint;
+    attachmentDesc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
+    for (size_t i = 0; i < attachmentCount; i++) {
+        attachments.push_back(device.CreateTexture(&attachmentDesc));
+        attachmentViews.push_back(attachments[i].CreateView());
+    }
+
+    // Execute the pipeline.
+    utils::ComboRenderPassDescriptor passDesc(attachmentViews);
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&passDesc);
+    pass.SetBindGroup(0, bufferBindGroup);
+    pass.SetBindGroup(1, textureBindGroup);
+    pass.SetPipeline(renderPipeline);
+    pass.Draw(1);
+    pass.End();
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    // Verify the results.
+    for (uint32_t i = 0; i < storageBuffers; i++) {
+        EXPECT_BUFFER_U32_EQ(i + 1, buffers[i], 0);
+    }
+    for (uint32_t i = 0; i < storageTextures; i++) {
+        const uint32_t res = i + 1;
+        EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(res, res, res, res), textures[i], 0, 0);
+    }
+    for (uint32_t i = 0; i < attachmentCount; i++) {
+        EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(i + 1, 0, 0, 0), attachments[i], 0, 0);
+    }
+}
+
 // Verifies that supported buffer limits do not exceed maxBufferSize.
 TEST_P(MaxLimitTests, MaxBufferSizes) {
    // Base limits without tiering.