From d2aaf76eb95b6305017af32ade34976b524aaeb3 Mon Sep 17 00:00:00 2001 From: Loko Kung Date: Tue, 25 Apr 2023 22:46:23 +0000 Subject: [PATCH] Removes maxFragmentCombinedOutputResources limit and tests. Bug: dawn:1756 Change-Id: I1fe73eb7987cdffdb2653e1072c4946b5bc3ef4c Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127660 Kokoro: Kokoro Reviewed-by: Austin Eng Commit-Queue: Loko Kung --- dawn.json | 3 +- src/dawn/native/Limits.cpp | 3 +- src/dawn/native/RenderPipeline.cpp | 30 ----- src/dawn/native/d3d12/AdapterD3D12.cpp | 3 - src/dawn/native/metal/BackendMTL.mm | 4 - src/dawn/native/vulkan/AdapterVk.cpp | 35 +++++- src/dawn/tests/end2end/MaxLimitTests.cpp | 34 +----- .../RenderPipelineValidationTests.cpp | 112 ------------------ 8 files changed, 38 insertions(+), 186 deletions(-) diff --git a/dawn.json b/dawn.json index f6f8ea234e..9a2d53f6bd 100644 --- a/dawn.json +++ b/dawn.json @@ -1300,8 +1300,7 @@ {"name": "max compute workgroup size x", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"}, {"name": "max compute workgroup size y", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"}, {"name": "max compute workgroup size z", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"}, - {"name": "max compute workgroups per dimension", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"}, - {"name": "max fragment combined output resources", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"} + {"name": "max compute workgroups per dimension", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"} ] }, "required limits": { diff --git a/src/dawn/native/Limits.cpp b/src/dawn/native/Limits.cpp index bbdf464c79..3e593aeb18 100644 --- a/src/dawn/native/Limits.cpp +++ b/src/dawn/native/Limits.cpp @@ -73,8 +73,7 @@ X(Maximum, maxInterStageShaderComponents, 60, 60) \ X(Maximum, maxInterStageShaderVariables, 16, 16) \ X(Maximum, maxColorAttachments, 8, 8) \ - X(Maximum, maxColorAttachmentBytesPerSample, 32, 32) \ - X(Maximum, maxFragmentCombinedOutputResources, 8, 8) + X(Maximum, maxColorAttachmentBytesPerSample, 32, 32) // clang-format on #define LIMITS_EACH_GROUP(X) \ diff --git a/src/dawn/native/RenderPipeline.cpp b/src/dawn/native/RenderPipeline.cpp index 48f96fb16b..46723b3693 100644 --- a/src/dawn/native/RenderPipeline.cpp +++ b/src/dawn/native/RenderPipeline.cpp @@ -364,36 +364,6 @@ MaybeError ValidateFragmentState(DeviceBase* device, const EntryPointMetadata& fragmentMetadata = descriptor->module->GetEntryPoint(descriptor->entryPoint); - // Iterates through the bindings on the fragment state to count the number of storage buffer and - // storage textures. - uint32_t maxFragmentCombinedOutputResources = - device->GetLimits().v1.maxFragmentCombinedOutputResources; - uint32_t fragmentCombinedOutputResources = 0; - for (uint32_t i = 0; i < descriptor->targetCount; i++) { - if (descriptor->targets[i].format != wgpu::TextureFormat::Undefined) { - fragmentCombinedOutputResources += 1; - } - } - for (BindGroupIndex group(0); group < fragmentMetadata.bindings.size(); ++group) { - for (const auto& [_, shaderBinding] : fragmentMetadata.bindings[group]) { - switch (shaderBinding.bindingType) { - case BindingInfoType::Buffer: - if (shaderBinding.buffer.type == wgpu::BufferBindingType::Storage) { - fragmentCombinedOutputResources += 1; - } - break; - case BindingInfoType::StorageTexture: - fragmentCombinedOutputResources += 1; - break; - default: - break; - } - } - } - DAWN_INVALID_IF(fragmentCombinedOutputResources > maxFragmentCombinedOutputResources, - "Number of fragment output resources (%u) exceeds the maximum (%u).", - fragmentCombinedOutputResources, maxFragmentCombinedOutputResources); - if (fragmentMetadata.usesFragDepth) { DAWN_INVALID_IF( depthStencil == nullptr, diff --git a/src/dawn/native/d3d12/AdapterD3D12.cpp b/src/dawn/native/d3d12/AdapterD3D12.cpp index 4313f5683f..337b8c8acb 100644 --- a/src/dawn/native/d3d12/AdapterD3D12.cpp +++ b/src/dawn/native/d3d12/AdapterD3D12.cpp @@ -238,9 +238,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) { limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage; limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; - limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments + - limits->v1.maxStorageBuffersPerShaderStage + - limits->v1.maxStorageTexturesPerShaderStage; // https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits // In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2. diff --git a/src/dawn/native/metal/BackendMTL.mm b/src/dawn/native/metal/BackendMTL.mm index 25ac48e04b..241460d2ee 100644 --- a/src/dawn/native/metal/BackendMTL.mm +++ b/src/dawn/native/metal/BackendMTL.mm @@ -711,10 +711,6 @@ class Adapter : public AdapterBase { limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2); } - limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments + - limits->v1.maxStorageBuffersPerShaderStage + - limits->v1.maxStorageTexturesPerShaderStage; - limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc; // Metal limits are per-function, so the layout limits are the same as the stage diff --git a/src/dawn/native/vulkan/AdapterVk.cpp b/src/dawn/native/vulkan/AdapterVk.cpp index df4edd3745..6e71a8f1ba 100644 --- a/src/dawn/native/vulkan/AdapterVk.cpp +++ b/src/dawn/native/vulkan/AdapterVk.cpp @@ -328,8 +328,38 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) { maxUniformBuffersPerShaderStage); CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize); CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize); - CHECK_AND_SET_V1_MAX_LIMIT(maxFragmentCombinedOutputResources, - maxFragmentCombinedOutputResources); + CHECK_AND_SET_V1_MAX_LIMIT(maxColorAttachments, maxColorAttachments); + + // Validate against maxFragmentCombinedOutputResources, tightening the limits when necessary. + const uint32_t minFragmentCombinedOutputResources = + baseLimits.v1.maxStorageBuffersPerShaderStage + + baseLimits.v1.maxStorageTexturesPerShaderStage + baseLimits.v1.maxColorAttachments; + const uint64_t maxFragmentCombinedOutputResources = + limits->v1.maxStorageBuffersPerShaderStage + limits->v1.maxStorageTexturesPerShaderStage + + limits->v1.maxColorAttachments; + // Only re-adjust the limits when the limit makes sense w.r.t to the required WebGPU limits. + // Otherwise, we ignore the maxFragmentCombinedOutputResources since it is known to yield + // incorrect values on desktop drivers. + bool readjustFragmentCombinedOutputResources = + vkLimits.maxFragmentCombinedOutputResources > minFragmentCombinedOutputResources && + uint64_t(vkLimits.maxFragmentCombinedOutputResources) < maxFragmentCombinedOutputResources; + if (readjustFragmentCombinedOutputResources) { + // Split extra resources across the three other limits instead of using the default values + // since it would overflow. + uint32_t extraResources = + vkLimits.maxFragmentCombinedOutputResources - minFragmentCombinedOutputResources; + limits->v1.maxColorAttachments = std::min( + baseLimits.v1.maxColorAttachments + (extraResources / 3), vkLimits.maxColorAttachments); + extraResources -= limits->v1.maxColorAttachments - baseLimits.v1.maxColorAttachments; + limits->v1.maxStorageTexturesPerShaderStage = + std::min(baseLimits.v1.maxStorageTexturesPerShaderStage + (extraResources / 2), + vkLimits.maxPerStageDescriptorStorageImages); + extraResources -= limits->v1.maxStorageTexturesPerShaderStage - + baseLimits.v1.maxStorageTexturesPerShaderStage; + limits->v1.maxStorageBuffersPerShaderStage = + std::min(baseLimits.v1.maxStorageBuffersPerShaderStage + extraResources, + vkLimits.maxPerStageDescriptorStorageBuffers); + } CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment); CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment); @@ -366,7 +396,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) { vkLimits.maxComputeWorkGroupCount[2], }); - CHECK_AND_SET_V1_MAX_LIMIT(maxColorAttachments, maxColorAttachments); if (!IsSubset(VkSampleCountFlags(VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT), vkLimits.framebufferColorSampleCounts)) { return DAWN_INTERNAL_ERROR("Insufficient Vulkan limits for framebufferColorSampleCounts"); diff --git a/src/dawn/tests/end2end/MaxLimitTests.cpp b/src/dawn/tests/end2end/MaxLimitTests.cpp index 0632318ae1..fcffdfe908 100644 --- a/src/dawn/tests/end2end/MaxLimitTests.cpp +++ b/src/dawn/tests/end2end/MaxLimitTests.cpp @@ -541,8 +541,9 @@ TEST_P(MaxLimitTests, ReallyLargeBindGroup) { EXPECT_BUFFER_U32_EQ(1, result, 0); } -// Verifies that devices can write to at least maxFragmentCombinedOutputResources of non color -// attachment resources. +// Verifies that devices can write to the limits specified for fragment outputs. This test +// exercises an internal Vulkan maxFragmentCombinedOutputResources limit and makes sure that the +// sub parts of the limit work as intended. TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) { // TODO(dawn:1692) Currently does not work on GL and GLES. DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES()); @@ -552,21 +553,9 @@ TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) { // splitting a shared remaining count between the two resources if they are not separately // defined, or exceed the combined limit. wgpu::Limits limits = GetSupportedLimits().limits; - uint32_t attachmentCount = 1; + uint32_t attachmentCount = limits.maxColorAttachments; uint32_t storageBuffers = limits.maxStorageBuffersPerShaderStage; uint32_t storageTextures = limits.maxStorageTexturesPerShaderStage; - uint32_t maxCombinedResources = limits.maxFragmentCombinedOutputResources; - if (uint64_t(storageBuffers) + uint64_t(storageTextures) >= uint64_t(maxCombinedResources)) { - storageTextures = std::min(storageTextures, (maxCombinedResources - attachmentCount) / 2); - storageBuffers = maxCombinedResources - attachmentCount - storageTextures; - } - if (maxCombinedResources > attachmentCount + storageBuffers + storageTextures) { - // Increase the number of attachments if we still have bandwidth after maximizing the number - // of buffers and textures. - attachmentCount = std::min(limits.maxColorAttachments, - maxCombinedResources - storageBuffers - storageTextures); - } - ASSERT_LE(attachmentCount + storageBuffers + storageTextures, maxCombinedResources); // Create a shader to write out to all the resources. auto CreateShader = [&]() -> wgpu::ShaderModule { @@ -712,21 +701,6 @@ TEST_P(MaxLimitTests, MaxBufferSizes) { GetAdapter().SetUseTieredLimits(false); } -// Verifies that supported fragment combined output resource limits meet base requirements. -TEST_P(MaxLimitTests, MaxFragmentCombinedOutputResources) { - // Base limits without tiering. - wgpu::Limits baseLimits = GetAdapterLimits().limits; - EXPECT_LE(baseLimits.maxColorAttachments, baseLimits.maxFragmentCombinedOutputResources); - - // Base limits with tiering. - GetAdapter().SetUseTieredLimits(true); - wgpu::Limits tieredLimits = GetAdapterLimits().limits; - EXPECT_LE(tieredLimits.maxColorAttachments, tieredLimits.maxFragmentCombinedOutputResources); - - // Unset tiered limit usage to avoid affecting other tests. - GetAdapter().SetUseTieredLimits(false); -} - DAWN_INSTANTIATE_TEST(MaxLimitTests, D3D12Backend(), MetalBackend(), diff --git a/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp b/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp index eaf2558356..7887717315 100644 --- a/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp +++ b/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp @@ -1498,118 +1498,6 @@ TEST_F(RenderPipelineValidationTest, BindingsFromCorrectEntryPoint) { ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&descriptor)); } -// Test that total fragment output resource validations must be less than limit. -TEST_F(RenderPipelineValidationTest, MaxFragmentCombinedOutputResources) { - static constexpr wgpu::TextureFormat kFormat = wgpu::TextureFormat::R8Unorm; - wgpu::ColorTargetState kEmptyColorTargetState = {}; - kEmptyColorTargetState.format = wgpu::TextureFormat::Undefined; - wgpu::ColorTargetState kColorTargetState = {}; - kColorTargetState.format = kFormat; - - // Creates a shader with the given number of output resources. - auto CreateShader = [&](uint32_t numBuffers, uint32_t numTextures) -> wgpu::ShaderModule { - // Header to declare storage buffer struct. - static constexpr std::string_view kHeader = "struct Buf { data : array }\n"; - std::ostringstream bufferBindings; - std::ostringstream bufferOutputs; - for (uint32_t i = 0; i < numBuffers; i++) { - bufferBindings << "@group(0) @binding(" << i << ") var b" << i - << ": Buf;\n"; - bufferOutputs << " b" << i << ".data[i] = i;\n"; - } - - std::ostringstream textureBindings; - std::ostringstream textureOutputs; - for (uint32_t i = 0; i < numTextures; i++) { - textureBindings << "@group(1) @binding(" << i << ") var t" << i - << ": texture_storage_1d;\n"; - textureOutputs << " textureStore(t" << i << ", i, vec4u(i));\n"; - } - - std::ostringstream targetBindings; - std::ostringstream targetOutputs; - for (size_t i = 0; i < kMaxColorAttachments; i++) { - targetBindings << "@location(" << i << ") o" << i << " : vec4f, "; - targetOutputs << "vec4f(1), "; - } - - std::ostringstream fsShader; - fsShader << kHeader; - fsShader << bufferBindings.str(); - fsShader << textureBindings.str(); - fsShader << "struct Outputs { " << targetBindings.str() << "}\n"; - fsShader << "@fragment fn main(@builtin(sample_index) i : u32) -> Outputs {\n"; - fsShader << bufferOutputs.str(); - fsShader << textureOutputs.str(); - fsShader << " return Outputs(" << targetOutputs.str() << ");\n"; - fsShader << "}"; - return utils::CreateShaderModule(device, fsShader.str().c_str()); - }; - - utils::ComboRenderPipelineDescriptor descriptor; - descriptor.vertex.module = utils::CreateShaderModule(device, R"( - @vertex fn main() -> @builtin(position) vec4f { - return vec4f(0.0, 0.0, 0.0, 1.0); - })"); - descriptor.vertex.entryPoint = "main"; - descriptor.cFragment.targetCount = kMaxColorAttachments; - descriptor.cFragment.entryPoint = "main"; - - // Runs test using the given parameters. - auto DoTest = [&](uint32_t numBuffers, uint32_t numTextures, - const std::vector& attachmentIndices, bool useDepthStencil, - bool shouldError) { - descriptor.cFragment.module = CreateShader(numBuffers, numTextures); - descriptor.cTargets.fill(kEmptyColorTargetState); - for (const uint32_t attachmentIndex : attachmentIndices) { - descriptor.cTargets[attachmentIndex] = kColorTargetState; - } - - if (useDepthStencil) { - descriptor.EnableDepthStencil(); - } else { - descriptor.DisableDepthStencil(); - } - - if (shouldError) { - ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&descriptor)); - } else { - device.CreateRenderPipeline(&descriptor); - } - }; - - // All following tests assume we are using the default limits for the following: - // - maxStorageBuffersPerShaderStage = 8 - // - maxStorageTexturesPerShaderStage = 4 - // - maxColorAttachments = 8 - // - maxFragmentCombinedOutputResources = 8 - // Note we use the defaults for the validation tests because otherwise it is hard to verify that - // we are hitting the maxFragmentCombinedOutputResources limit validations versus potentially - // hitting other validation errors from the other limits. - - // One of each resource with and without depth-stencil should pass. (Control) - DoTest(1, 1, {0}, false, false); - DoTest(1, 1, {0}, true, false); - - // Max number of any single resource within limits should pass. Note we turn on depth stencil in - // some of these cases because empty attachments are not allowed. - DoTest(8, 0, {}, true, false); - DoTest(4, 4, {}, true, false); - DoTest(0, 4, {0, 1, 2, 3}, false, false); - DoTest(0, 0, {0, 1, 2, 3, 4, 5, 6, 7}, false, false); - DoTest(0, 0, {0, 1, 2, 3, 4, 5, 6, 7}, true, false); - - // Max number of resources with different combinations should also pass. - DoTest(3, 2, {0, 3, 5}, false, false); - DoTest(2, 3, {2, 4, 6}, true, false); - DoTest(3, 3, {1, 7}, true, false); - - // Max number of resources + 1 should fail. - DoTest(3, 3, {0, 3, 5}, false, true); - DoTest(3, 3, {2, 4, 6}, true, true); - DoTest(3, 3, {1, 5, 7}, true, true); -} - // Tests validation for per-pixel accounting for render targets. The tests currently assume that the // default maxColorAttachmentBytesPerSample limit of 32 is used. TEST_F(RenderPipelineValidationTest, RenderPipelineColorAttachmentBytesPerSample) {