Update maxFragmentCombinedOutputResources on the different backends.

- Adds an e2e test to test writing to outputs when at the limit. Bug: dawn:1665 Change-Id: I2b2b9c2d700be0e454dc945ed8e3e1fe6b191974 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/122801 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Loko Kung <lokokung@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
2025-12-16 08:27:05 +00:00 · 2023-03-08 22:38:40 +00:00
parent 2657b923c9
commit af4ca3891f
4 changed files with 163 additions and 52 deletions
--- a/src/dawn/native/d3d12/AdapterD3D12.cpp
+++ b/src/dawn/native/d3d12/AdapterD3D12.cpp
@@ -262,6 +262,9 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
    limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage;
    limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
    limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
                                                    limits->v1.maxStorageBuffersPerShaderStage +
                                                    limits->v1.maxStorageTexturesPerShaderStage;
    // https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
    // In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2.
--- a/src/dawn/native/metal/BackendMTL.mm
+++ b/src/dawn/native/metal/BackendMTL.mm
@@ -706,6 +706,10 @@ class Adapter : public AdapterBase {
            limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
        }
        limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
                                                        limits->v1.maxStorageBuffersPerShaderStage +
                                                        limits->v1.maxStorageTexturesPerShaderStage;
        limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
        // Metal limits are per-function, so the layout limits are the same as the stage
--- a/src/dawn/native/vulkan/AdapterVk.cpp
+++ b/src/dawn/native/vulkan/AdapterVk.cpp
@@ -326,6 +326,8 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
                               maxUniformBuffersPerShaderStage);
    CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize);
    CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize);
    CHECK_AND_SET_V1_MAX_LIMIT(maxFragmentCombinedOutputResources,
                               maxFragmentCombinedOutputResources);
    CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment);
    CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment);
@@ -382,58 +384,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
        limits->v1.maxBufferSize = kAssumedMaxBufferSize;
    }
    // Only check maxFragmentCombinedOutputResources on mobile GPUs. Desktop GPUs drivers seem
    // to put incorrect values for this limit with things like 8 or 16 when they can do bindless
    // storage buffers. Mesa llvmpipe driver also puts 8 here.
    uint32_t vendorId = mDeviceInfo.properties.vendorID;
    if (!gpu_info::IsAMD(vendorId) && !gpu_info::IsIntel(vendorId) && !gpu_info::IsMesa(vendorId) &&
        !gpu_info::IsNvidia(vendorId)) {
        if (vkLimits.maxFragmentCombinedOutputResources <
            kMaxColorAttachments + baseLimits.v1.maxStorageTexturesPerShaderStage +
                baseLimits.v1.maxStorageBuffersPerShaderStage) {
            return DAWN_INTERNAL_ERROR(
                "Insufficient Vulkan maxFragmentCombinedOutputResources limit");
        }
        uint32_t maxFragmentCombinedOutputResources = kMaxColorAttachments +
                                                      limits->v1.maxStorageTexturesPerShaderStage +
                                                      limits->v1.maxStorageBuffersPerShaderStage;
        if (maxFragmentCombinedOutputResources > vkLimits.maxFragmentCombinedOutputResources) {
            // WebGPU's maxFragmentCombinedOutputResources exceeds the Vulkan limit.
            // Decrease |maxStorageTexturesPerShaderStage| and |maxStorageBuffersPerShaderStage|
            // to fit within the Vulkan limit.
            uint32_t countOverLimit =
                maxFragmentCombinedOutputResources - vkLimits.maxFragmentCombinedOutputResources;
            uint32_t maxStorageTexturesOverBase = limits->v1.maxStorageTexturesPerShaderStage -
                                                  baseLimits.v1.maxStorageTexturesPerShaderStage;
            uint32_t maxStorageBuffersOverBase = limits->v1.maxStorageBuffersPerShaderStage -
                                                 baseLimits.v1.maxStorageBuffersPerShaderStage;
            // Reduce the number of resources by half the overage count, but clamp to
            // to ensure we don't go below the base limits.
            uint32_t numFewerStorageTextures =
                std::min(countOverLimit / 2, maxStorageTexturesOverBase);
            uint32_t numFewerStorageBuffers =
                std::min((countOverLimit + 1) / 2, maxStorageBuffersOverBase);
            if (numFewerStorageTextures == maxStorageTexturesOverBase) {
                // If |numFewerStorageTextures| was clamped, subtract the remaining
                // from the storage buffers.
                numFewerStorageBuffers = countOverLimit - numFewerStorageTextures;
                ASSERT(numFewerStorageBuffers <= maxStorageBuffersOverBase);
            } else if (numFewerStorageBuffers == maxStorageBuffersOverBase) {
                // If |numFewerStorageBuffers| was clamped, subtract the remaining
                // from the storage textures.
                numFewerStorageTextures = countOverLimit - numFewerStorageBuffers;
                ASSERT(numFewerStorageTextures <= maxStorageTexturesOverBase);
            }
            limits->v1.maxStorageTexturesPerShaderStage -= numFewerStorageTextures;
            limits->v1.maxStorageBuffersPerShaderStage -= numFewerStorageBuffers;
        }
    }
    // Using base limits for:
    // TODO(crbug.com/dawn/1448):
    // - maxInterStageShaderVariables
--- a/src/dawn/tests/end2end/MaxLimitTests.cpp
+++ b/src/dawn/tests/end2end/MaxLimitTests.cpp
@@ -541,6 +541,160 @@ TEST_P(MaxLimitTests, ReallyLargeBindGroup) {
    EXPECT_BUFFER_U32_EQ(1, result, 0);
 }
 // Verifies that devices can write to at least maxFragmentCombinedOutputResources of non color
 // attachment resources.
 TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) {
    // TODO(dawn:1692) Currently does not work on GL and GLES.
    DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
    // Compute the number of each resource type (storage buffers and storage textures) such that
    // there is at least one color attachment, and as many of the buffer/textures as possible,
    // splitting a shared remaining count between the two resources if they are not separately
    // defined, or exceed the combined limit.
    wgpu::Limits limits = GetSupportedLimits().limits;
    uint32_t attachmentCount = 1;
    uint32_t storageBuffers = limits.maxStorageBuffersPerShaderStage;
    uint32_t storageTextures = limits.maxStorageTexturesPerShaderStage;
    uint32_t maxCombinedResources = limits.maxFragmentCombinedOutputResources;
    if (uint64_t(storageBuffers) + uint64_t(storageTextures) >= uint64_t(maxCombinedResources)) {
        storageTextures = std::min(storageTextures, (maxCombinedResources - attachmentCount) / 2);
        storageBuffers = maxCombinedResources - attachmentCount - storageTextures;
    }
    if (maxCombinedResources > attachmentCount + storageBuffers + storageTextures) {
        // Increase the number of attachments if we still have bandwidth after maximizing the number
        // of buffers and textures.
        attachmentCount = std::min(limits.maxColorAttachments,
                                   maxCombinedResources - storageBuffers - storageTextures);
    }
    ASSERT_LE(attachmentCount + storageBuffers + storageTextures, maxCombinedResources);
    // Create a shader to write out to all the resources.
    auto CreateShader = [&]() -> wgpu::ShaderModule {
        // Header to declare storage buffer struct.
        std::ostringstream bufferBindings;
        std::ostringstream bufferOutputs;
        for (uint32_t i = 0; i < storageBuffers; i++) {
            bufferBindings << "@group(0) @binding(" << i << ") var<storage, read_write> b" << i
                           << ": u32;\n";
            bufferOutputs << "    b" << i << " = " << i << "u + 1u;\n";
        }
        std::ostringstream textureBindings;
        std::ostringstream textureOutputs;
        for (uint32_t i = 0; i < storageTextures; i++) {
            textureBindings << "@group(1) @binding(" << i << ") var t" << i
                            << ": texture_storage_2d<rgba8uint, write>;\n";
            textureOutputs << "    textureStore(t" << i << ", vec2u(0, 0), vec4u(" << i
                           << "u + 1u));\n";
        }
        std::ostringstream targetBindings;
        std::ostringstream targetOutputs;
        for (size_t i = 0; i < attachmentCount; i++) {
            targetBindings << "@location(" << i << ") o" << i << " : u32, ";
            targetOutputs << i << "u + 1u, ";
        }
        std::ostringstream fsShader;
        fsShader << bufferBindings.str();
        fsShader << textureBindings.str();
        fsShader << "struct Outputs { " << targetBindings.str() << "}\n";
        fsShader << "@fragment fn main() -> Outputs {\n";
        fsShader << bufferOutputs.str();
        fsShader << textureOutputs.str();
        fsShader << "    return Outputs(" << targetOutputs.str() << ");\n";
        fsShader << "}";
        return utils::CreateShaderModule(device, fsShader.str().c_str());
    };
    // Constants used for the render pipeline.
    wgpu::ColorTargetState kColorTargetState = {};
    kColorTargetState.format = wgpu::TextureFormat::R8Uint;
    // Create the render pipeline.
    utils::ComboRenderPipelineDescriptor pipelineDesc;
    pipelineDesc.vertex.module = utils::CreateShaderModule(device, R"(
        @vertex fn main() -> @builtin(position) vec4f {
            return vec4f(0.0, 0.0, 0.0, 1.0);
        })");
    pipelineDesc.vertex.entryPoint = "main";
    pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
    pipelineDesc.cFragment.module = CreateShader();
    pipelineDesc.cFragment.entryPoint = "main";
    pipelineDesc.cTargets.fill(kColorTargetState);
    pipelineDesc.cFragment.targetCount = attachmentCount;
    wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&pipelineDesc);
    // Create all the resources and bindings for them.
    std::vector<wgpu::Buffer> buffers;
    std::vector<wgpu::BindGroupEntry> bufferEntries;
    wgpu::BufferDescriptor bufferDesc = {};
    bufferDesc.size = 4;
    bufferDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
    for (uint32_t i = 0; i < storageBuffers; i++) {
        buffers.push_back(device.CreateBuffer(&bufferDesc));
        bufferEntries.push_back(utils::BindingInitializationHelper(i, buffers[i]).GetAsBinding());
    }
    wgpu::BindGroupDescriptor bufferBindGroupDesc = {};
    bufferBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(0);
    bufferBindGroupDesc.entryCount = storageBuffers;
    bufferBindGroupDesc.entries = bufferEntries.data();
    wgpu::BindGroup bufferBindGroup = device.CreateBindGroup(&bufferBindGroupDesc);
    std::vector<wgpu::Texture> textures;
    std::vector<wgpu::BindGroupEntry> textureEntries;
    wgpu::TextureDescriptor textureDesc = {};
    textureDesc.size.width = 1;
    textureDesc.size.height = 1;
    textureDesc.format = wgpu::TextureFormat::RGBA8Uint;
    textureDesc.usage = wgpu::TextureUsage::StorageBinding | wgpu::TextureUsage::CopySrc;
    for (uint32_t i = 0; i < storageTextures; i++) {
        textures.push_back(device.CreateTexture(&textureDesc));
        textureEntries.push_back(
            utils::BindingInitializationHelper(i, textures[i].CreateView()).GetAsBinding());
    }
    wgpu::BindGroupDescriptor textureBindGroupDesc = {};
    textureBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(1);
    textureBindGroupDesc.entryCount = storageTextures;
    textureBindGroupDesc.entries = textureEntries.data();
    wgpu::BindGroup textureBindGroup = device.CreateBindGroup(&textureBindGroupDesc);
    std::vector<wgpu::Texture> attachments;
    std::vector<wgpu::TextureView> attachmentViews;
    wgpu::TextureDescriptor attachmentDesc = {};
    attachmentDesc.size = {1, 1};
    attachmentDesc.format = wgpu::TextureFormat::R8Uint;
    attachmentDesc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
    for (size_t i = 0; i < attachmentCount; i++) {
        attachments.push_back(device.CreateTexture(&attachmentDesc));
        attachmentViews.push_back(attachments[i].CreateView());
    }
    // Execute the pipeline.
    utils::ComboRenderPassDescriptor passDesc(attachmentViews);
    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
    wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&passDesc);
    pass.SetBindGroup(0, bufferBindGroup);
    pass.SetBindGroup(1, textureBindGroup);
    pass.SetPipeline(renderPipeline);
    pass.Draw(1);
    pass.End();
    wgpu::CommandBuffer commands = encoder.Finish();
    queue.Submit(1, &commands);
    // Verify the results.
    for (uint32_t i = 0; i < storageBuffers; i++) {
        EXPECT_BUFFER_U32_EQ(i + 1, buffers[i], 0);
    }
    for (uint32_t i = 0; i < storageTextures; i++) {
        const uint32_t res = i + 1;
        EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(res, res, res, res), textures[i], 0, 0);
    }
    for (uint32_t i = 0; i < attachmentCount; i++) {
        EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(i + 1, 0, 0, 0), attachments[i], 0, 0);
    }
 }
 // Verifies that supported buffer limits do not exceed maxBufferSize.
 TEST_P(MaxLimitTests, MaxBufferSizes) {
    // Base limits without tiering.