Removes maxFragmentCombinedOutputResources limit and tests.

Bug: dawn:1756
Change-Id: I1fe73eb7987cdffdb2653e1072c4946b5bc3ef4c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127660
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Loko Kung <lokokung@google.com>
This commit is contained in:
Loko Kung 2023-04-25 22:46:23 +00:00 committed by Dawn LUCI CQ
parent 339719967e
commit d2aaf76eb9
8 changed files with 38 additions and 186 deletions

View File

@ -1300,8 +1300,7 @@
{"name": "max compute workgroup size x", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"},
{"name": "max compute workgroup size y", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"},
{"name": "max compute workgroup size z", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"},
{"name": "max compute workgroups per dimension", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"},
{"name": "max fragment combined output resources", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"}
{"name": "max compute workgroups per dimension", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"}
]
},
"required limits": {

View File

@ -73,8 +73,7 @@
X(Maximum, maxInterStageShaderComponents, 60, 60) \
X(Maximum, maxInterStageShaderVariables, 16, 16) \
X(Maximum, maxColorAttachments, 8, 8) \
X(Maximum, maxColorAttachmentBytesPerSample, 32, 32) \
X(Maximum, maxFragmentCombinedOutputResources, 8, 8)
X(Maximum, maxColorAttachmentBytesPerSample, 32, 32)
// clang-format on
#define LIMITS_EACH_GROUP(X) \

View File

@ -364,36 +364,6 @@ MaybeError ValidateFragmentState(DeviceBase* device,
const EntryPointMetadata& fragmentMetadata =
descriptor->module->GetEntryPoint(descriptor->entryPoint);
// Iterates through the bindings on the fragment state to count the number of storage buffer and
// storage textures.
uint32_t maxFragmentCombinedOutputResources =
device->GetLimits().v1.maxFragmentCombinedOutputResources;
uint32_t fragmentCombinedOutputResources = 0;
for (uint32_t i = 0; i < descriptor->targetCount; i++) {
if (descriptor->targets[i].format != wgpu::TextureFormat::Undefined) {
fragmentCombinedOutputResources += 1;
}
}
for (BindGroupIndex group(0); group < fragmentMetadata.bindings.size(); ++group) {
for (const auto& [_, shaderBinding] : fragmentMetadata.bindings[group]) {
switch (shaderBinding.bindingType) {
case BindingInfoType::Buffer:
if (shaderBinding.buffer.type == wgpu::BufferBindingType::Storage) {
fragmentCombinedOutputResources += 1;
}
break;
case BindingInfoType::StorageTexture:
fragmentCombinedOutputResources += 1;
break;
default:
break;
}
}
}
DAWN_INVALID_IF(fragmentCombinedOutputResources > maxFragmentCombinedOutputResources,
"Number of fragment output resources (%u) exceeds the maximum (%u).",
fragmentCombinedOutputResources, maxFragmentCombinedOutputResources);
if (fragmentMetadata.usesFragDepth) {
DAWN_INVALID_IF(
depthStencil == nullptr,

View File

@ -238,9 +238,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage;
limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
limits->v1.maxStorageBuffersPerShaderStage +
limits->v1.maxStorageTexturesPerShaderStage;
// https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
// In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2.

View File

@ -711,10 +711,6 @@ class Adapter : public AdapterBase {
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
}
limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
limits->v1.maxStorageBuffersPerShaderStage +
limits->v1.maxStorageTexturesPerShaderStage;
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
// Metal limits are per-function, so the layout limits are the same as the stage

View File

@ -328,8 +328,38 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
maxUniformBuffersPerShaderStage);
CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize);
CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize);
CHECK_AND_SET_V1_MAX_LIMIT(maxFragmentCombinedOutputResources,
maxFragmentCombinedOutputResources);
CHECK_AND_SET_V1_MAX_LIMIT(maxColorAttachments, maxColorAttachments);
// Validate against maxFragmentCombinedOutputResources, tightening the limits when necessary.
const uint32_t minFragmentCombinedOutputResources =
baseLimits.v1.maxStorageBuffersPerShaderStage +
baseLimits.v1.maxStorageTexturesPerShaderStage + baseLimits.v1.maxColorAttachments;
const uint64_t maxFragmentCombinedOutputResources =
limits->v1.maxStorageBuffersPerShaderStage + limits->v1.maxStorageTexturesPerShaderStage +
limits->v1.maxColorAttachments;
// Only re-adjust the limits when the limit makes sense w.r.t to the required WebGPU limits.
// Otherwise, we ignore the maxFragmentCombinedOutputResources since it is known to yield
// incorrect values on desktop drivers.
bool readjustFragmentCombinedOutputResources =
vkLimits.maxFragmentCombinedOutputResources > minFragmentCombinedOutputResources &&
uint64_t(vkLimits.maxFragmentCombinedOutputResources) < maxFragmentCombinedOutputResources;
if (readjustFragmentCombinedOutputResources) {
// Split extra resources across the three other limits instead of using the default values
// since it would overflow.
uint32_t extraResources =
vkLimits.maxFragmentCombinedOutputResources - minFragmentCombinedOutputResources;
limits->v1.maxColorAttachments = std::min(
baseLimits.v1.maxColorAttachments + (extraResources / 3), vkLimits.maxColorAttachments);
extraResources -= limits->v1.maxColorAttachments - baseLimits.v1.maxColorAttachments;
limits->v1.maxStorageTexturesPerShaderStage =
std::min(baseLimits.v1.maxStorageTexturesPerShaderStage + (extraResources / 2),
vkLimits.maxPerStageDescriptorStorageImages);
extraResources -= limits->v1.maxStorageTexturesPerShaderStage -
baseLimits.v1.maxStorageTexturesPerShaderStage;
limits->v1.maxStorageBuffersPerShaderStage =
std::min(baseLimits.v1.maxStorageBuffersPerShaderStage + extraResources,
vkLimits.maxPerStageDescriptorStorageBuffers);
}
CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment);
CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment);
@ -366,7 +396,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
vkLimits.maxComputeWorkGroupCount[2],
});
CHECK_AND_SET_V1_MAX_LIMIT(maxColorAttachments, maxColorAttachments);
if (!IsSubset(VkSampleCountFlags(VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT),
vkLimits.framebufferColorSampleCounts)) {
return DAWN_INTERNAL_ERROR("Insufficient Vulkan limits for framebufferColorSampleCounts");

View File

@ -541,8 +541,9 @@ TEST_P(MaxLimitTests, ReallyLargeBindGroup) {
EXPECT_BUFFER_U32_EQ(1, result, 0);
}
// Verifies that devices can write to at least maxFragmentCombinedOutputResources of non color
// attachment resources.
// Verifies that devices can write to the limits specified for fragment outputs. This test
// exercises an internal Vulkan maxFragmentCombinedOutputResources limit and makes sure that the
// sub parts of the limit work as intended.
TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) {
// TODO(dawn:1692) Currently does not work on GL and GLES.
DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
@ -552,21 +553,9 @@ TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) {
// splitting a shared remaining count between the two resources if they are not separately
// defined, or exceed the combined limit.
wgpu::Limits limits = GetSupportedLimits().limits;
uint32_t attachmentCount = 1;
uint32_t attachmentCount = limits.maxColorAttachments;
uint32_t storageBuffers = limits.maxStorageBuffersPerShaderStage;
uint32_t storageTextures = limits.maxStorageTexturesPerShaderStage;
uint32_t maxCombinedResources = limits.maxFragmentCombinedOutputResources;
if (uint64_t(storageBuffers) + uint64_t(storageTextures) >= uint64_t(maxCombinedResources)) {
storageTextures = std::min(storageTextures, (maxCombinedResources - attachmentCount) / 2);
storageBuffers = maxCombinedResources - attachmentCount - storageTextures;
}
if (maxCombinedResources > attachmentCount + storageBuffers + storageTextures) {
// Increase the number of attachments if we still have bandwidth after maximizing the number
// of buffers and textures.
attachmentCount = std::min(limits.maxColorAttachments,
maxCombinedResources - storageBuffers - storageTextures);
}
ASSERT_LE(attachmentCount + storageBuffers + storageTextures, maxCombinedResources);
// Create a shader to write out to all the resources.
auto CreateShader = [&]() -> wgpu::ShaderModule {
@ -712,21 +701,6 @@ TEST_P(MaxLimitTests, MaxBufferSizes) {
GetAdapter().SetUseTieredLimits(false);
}
// Verifies that supported fragment combined output resource limits meet base requirements.
TEST_P(MaxLimitTests, MaxFragmentCombinedOutputResources) {
// Base limits without tiering.
wgpu::Limits baseLimits = GetAdapterLimits().limits;
EXPECT_LE(baseLimits.maxColorAttachments, baseLimits.maxFragmentCombinedOutputResources);
// Base limits with tiering.
GetAdapter().SetUseTieredLimits(true);
wgpu::Limits tieredLimits = GetAdapterLimits().limits;
EXPECT_LE(tieredLimits.maxColorAttachments, tieredLimits.maxFragmentCombinedOutputResources);
// Unset tiered limit usage to avoid affecting other tests.
GetAdapter().SetUseTieredLimits(false);
}
DAWN_INSTANTIATE_TEST(MaxLimitTests,
D3D12Backend(),
MetalBackend(),

View File

@ -1498,118 +1498,6 @@ TEST_F(RenderPipelineValidationTest, BindingsFromCorrectEntryPoint) {
ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&descriptor));
}
// Test that total fragment output resource validations must be less than limit.
TEST_F(RenderPipelineValidationTest, MaxFragmentCombinedOutputResources) {
static constexpr wgpu::TextureFormat kFormat = wgpu::TextureFormat::R8Unorm;
wgpu::ColorTargetState kEmptyColorTargetState = {};
kEmptyColorTargetState.format = wgpu::TextureFormat::Undefined;
wgpu::ColorTargetState kColorTargetState = {};
kColorTargetState.format = kFormat;
// Creates a shader with the given number of output resources.
auto CreateShader = [&](uint32_t numBuffers, uint32_t numTextures) -> wgpu::ShaderModule {
// Header to declare storage buffer struct.
static constexpr std::string_view kHeader = "struct Buf { data : array<u32> }\n";
std::ostringstream bufferBindings;
std::ostringstream bufferOutputs;
for (uint32_t i = 0; i < numBuffers; i++) {
bufferBindings << "@group(0) @binding(" << i << ") var<storage, read_write> b" << i
<< ": Buf;\n";
bufferOutputs << " b" << i << ".data[i] = i;\n";
}
std::ostringstream textureBindings;
std::ostringstream textureOutputs;
for (uint32_t i = 0; i < numTextures; i++) {
textureBindings << "@group(1) @binding(" << i << ") var t" << i
<< ": texture_storage_1d<rgba8uint, write>;\n";
textureOutputs << " textureStore(t" << i << ", i, vec4u(i));\n";
}
std::ostringstream targetBindings;
std::ostringstream targetOutputs;
for (size_t i = 0; i < kMaxColorAttachments; i++) {
targetBindings << "@location(" << i << ") o" << i << " : vec4f, ";
targetOutputs << "vec4f(1), ";
}
std::ostringstream fsShader;
fsShader << kHeader;
fsShader << bufferBindings.str();
fsShader << textureBindings.str();
fsShader << "struct Outputs { " << targetBindings.str() << "}\n";
fsShader << "@fragment fn main(@builtin(sample_index) i : u32) -> Outputs {\n";
fsShader << bufferOutputs.str();
fsShader << textureOutputs.str();
fsShader << " return Outputs(" << targetOutputs.str() << ");\n";
fsShader << "}";
return utils::CreateShaderModule(device, fsShader.str().c_str());
};
utils::ComboRenderPipelineDescriptor descriptor;
descriptor.vertex.module = utils::CreateShaderModule(device, R"(
@vertex fn main() -> @builtin(position) vec4f {
return vec4f(0.0, 0.0, 0.0, 1.0);
})");
descriptor.vertex.entryPoint = "main";
descriptor.cFragment.targetCount = kMaxColorAttachments;
descriptor.cFragment.entryPoint = "main";
// Runs test using the given parameters.
auto DoTest = [&](uint32_t numBuffers, uint32_t numTextures,
const std::vector<uint32_t>& attachmentIndices, bool useDepthStencil,
bool shouldError) {
descriptor.cFragment.module = CreateShader(numBuffers, numTextures);
descriptor.cTargets.fill(kEmptyColorTargetState);
for (const uint32_t attachmentIndex : attachmentIndices) {
descriptor.cTargets[attachmentIndex] = kColorTargetState;
}
if (useDepthStencil) {
descriptor.EnableDepthStencil();
} else {
descriptor.DisableDepthStencil();
}
if (shouldError) {
ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&descriptor));
} else {
device.CreateRenderPipeline(&descriptor);
}
};
// All following tests assume we are using the default limits for the following:
// - maxStorageBuffersPerShaderStage = 8
// - maxStorageTexturesPerShaderStage = 4
// - maxColorAttachments = 8
// - maxFragmentCombinedOutputResources = 8
// Note we use the defaults for the validation tests because otherwise it is hard to verify that
// we are hitting the maxFragmentCombinedOutputResources limit validations versus potentially
// hitting other validation errors from the other limits.
// One of each resource with and without depth-stencil should pass. (Control)
DoTest(1, 1, {0}, false, false);
DoTest(1, 1, {0}, true, false);
// Max number of any single resource within limits should pass. Note we turn on depth stencil in
// some of these cases because empty attachments are not allowed.
DoTest(8, 0, {}, true, false);
DoTest(4, 4, {}, true, false);
DoTest(0, 4, {0, 1, 2, 3}, false, false);
DoTest(0, 0, {0, 1, 2, 3, 4, 5, 6, 7}, false, false);
DoTest(0, 0, {0, 1, 2, 3, 4, 5, 6, 7}, true, false);
// Max number of resources with different combinations should also pass.
DoTest(3, 2, {0, 3, 5}, false, false);
DoTest(2, 3, {2, 4, 6}, true, false);
DoTest(3, 3, {1, 7}, true, false);
// Max number of resources + 1 should fail.
DoTest(3, 3, {0, 3, 5}, false, true);
DoTest(3, 3, {2, 4, 6}, true, true);
DoTest(3, 3, {1, 5, 7}, true, true);
}
// Tests validation for per-pixel accounting for render targets. The tests currently assume that the
// default maxColorAttachmentBytesPerSample limit of 32 is used.
TEST_F(RenderPipelineValidationTest, RenderPipelineColorAttachmentBytesPerSample) {