Update maxFragmentCombinedOutputResources on the different backends.

- Adds an e2e test to test writing to outputs when at the limit.

Bug: dawn:1665
Change-Id: I2b2b9c2d700be0e454dc945ed8e3e1fe6b191974
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/122801
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Loko Kung <lokokung@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
Loko Kung 2023-03-08 22:38:40 +00:00 committed by Dawn LUCI CQ
parent 2657b923c9
commit af4ca3891f
4 changed files with 163 additions and 52 deletions

View File

@ -262,6 +262,9 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage; limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage;
limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
limits->v1.maxStorageBuffersPerShaderStage +
limits->v1.maxStorageTexturesPerShaderStage;
// https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits // https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
// In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2. // In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2.

View File

@ -706,6 +706,10 @@ class Adapter : public AdapterBase {
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2); limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
} }
limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
limits->v1.maxStorageBuffersPerShaderStage +
limits->v1.maxStorageTexturesPerShaderStage;
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc; limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
// Metal limits are per-function, so the layout limits are the same as the stage // Metal limits are per-function, so the layout limits are the same as the stage

View File

@ -326,6 +326,8 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
maxUniformBuffersPerShaderStage); maxUniformBuffersPerShaderStage);
CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize); CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize);
CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize); CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize);
CHECK_AND_SET_V1_MAX_LIMIT(maxFragmentCombinedOutputResources,
maxFragmentCombinedOutputResources);
CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment); CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment);
CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment); CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment);
@ -382,58 +384,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
limits->v1.maxBufferSize = kAssumedMaxBufferSize; limits->v1.maxBufferSize = kAssumedMaxBufferSize;
} }
// Only check maxFragmentCombinedOutputResources on mobile GPUs. Desktop GPUs drivers seem
// to put incorrect values for this limit with things like 8 or 16 when they can do bindless
// storage buffers. Mesa llvmpipe driver also puts 8 here.
uint32_t vendorId = mDeviceInfo.properties.vendorID;
if (!gpu_info::IsAMD(vendorId) && !gpu_info::IsIntel(vendorId) && !gpu_info::IsMesa(vendorId) &&
!gpu_info::IsNvidia(vendorId)) {
if (vkLimits.maxFragmentCombinedOutputResources <
kMaxColorAttachments + baseLimits.v1.maxStorageTexturesPerShaderStage +
baseLimits.v1.maxStorageBuffersPerShaderStage) {
return DAWN_INTERNAL_ERROR(
"Insufficient Vulkan maxFragmentCombinedOutputResources limit");
}
uint32_t maxFragmentCombinedOutputResources = kMaxColorAttachments +
limits->v1.maxStorageTexturesPerShaderStage +
limits->v1.maxStorageBuffersPerShaderStage;
if (maxFragmentCombinedOutputResources > vkLimits.maxFragmentCombinedOutputResources) {
// WebGPU's maxFragmentCombinedOutputResources exceeds the Vulkan limit.
// Decrease |maxStorageTexturesPerShaderStage| and |maxStorageBuffersPerShaderStage|
// to fit within the Vulkan limit.
uint32_t countOverLimit =
maxFragmentCombinedOutputResources - vkLimits.maxFragmentCombinedOutputResources;
uint32_t maxStorageTexturesOverBase = limits->v1.maxStorageTexturesPerShaderStage -
baseLimits.v1.maxStorageTexturesPerShaderStage;
uint32_t maxStorageBuffersOverBase = limits->v1.maxStorageBuffersPerShaderStage -
baseLimits.v1.maxStorageBuffersPerShaderStage;
// Reduce the number of resources by half the overage count, but clamp to
// to ensure we don't go below the base limits.
uint32_t numFewerStorageTextures =
std::min(countOverLimit / 2, maxStorageTexturesOverBase);
uint32_t numFewerStorageBuffers =
std::min((countOverLimit + 1) / 2, maxStorageBuffersOverBase);
if (numFewerStorageTextures == maxStorageTexturesOverBase) {
// If |numFewerStorageTextures| was clamped, subtract the remaining
// from the storage buffers.
numFewerStorageBuffers = countOverLimit - numFewerStorageTextures;
ASSERT(numFewerStorageBuffers <= maxStorageBuffersOverBase);
} else if (numFewerStorageBuffers == maxStorageBuffersOverBase) {
// If |numFewerStorageBuffers| was clamped, subtract the remaining
// from the storage textures.
numFewerStorageTextures = countOverLimit - numFewerStorageBuffers;
ASSERT(numFewerStorageTextures <= maxStorageTexturesOverBase);
}
limits->v1.maxStorageTexturesPerShaderStage -= numFewerStorageTextures;
limits->v1.maxStorageBuffersPerShaderStage -= numFewerStorageBuffers;
}
}
// Using base limits for: // Using base limits for:
// TODO(crbug.com/dawn/1448): // TODO(crbug.com/dawn/1448):
// - maxInterStageShaderVariables // - maxInterStageShaderVariables

View File

@ -541,6 +541,160 @@ TEST_P(MaxLimitTests, ReallyLargeBindGroup) {
EXPECT_BUFFER_U32_EQ(1, result, 0); EXPECT_BUFFER_U32_EQ(1, result, 0);
} }
// Verifies that devices can write to at least maxFragmentCombinedOutputResources of non color
// attachment resources.
TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) {
// TODO(dawn:1692) Currently does not work on GL and GLES.
DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
// Compute the number of each resource type (storage buffers and storage textures) such that
// there is at least one color attachment, and as many of the buffer/textures as possible,
// splitting a shared remaining count between the two resources if they are not separately
// defined, or exceed the combined limit.
wgpu::Limits limits = GetSupportedLimits().limits;
uint32_t attachmentCount = 1;
uint32_t storageBuffers = limits.maxStorageBuffersPerShaderStage;
uint32_t storageTextures = limits.maxStorageTexturesPerShaderStage;
uint32_t maxCombinedResources = limits.maxFragmentCombinedOutputResources;
if (uint64_t(storageBuffers) + uint64_t(storageTextures) >= uint64_t(maxCombinedResources)) {
storageTextures = std::min(storageTextures, (maxCombinedResources - attachmentCount) / 2);
storageBuffers = maxCombinedResources - attachmentCount - storageTextures;
}
if (maxCombinedResources > attachmentCount + storageBuffers + storageTextures) {
// Increase the number of attachments if we still have bandwidth after maximizing the number
// of buffers and textures.
attachmentCount = std::min(limits.maxColorAttachments,
maxCombinedResources - storageBuffers - storageTextures);
}
ASSERT_LE(attachmentCount + storageBuffers + storageTextures, maxCombinedResources);
// Create a shader to write out to all the resources.
auto CreateShader = [&]() -> wgpu::ShaderModule {
// Header to declare storage buffer struct.
std::ostringstream bufferBindings;
std::ostringstream bufferOutputs;
for (uint32_t i = 0; i < storageBuffers; i++) {
bufferBindings << "@group(0) @binding(" << i << ") var<storage, read_write> b" << i
<< ": u32;\n";
bufferOutputs << " b" << i << " = " << i << "u + 1u;\n";
}
std::ostringstream textureBindings;
std::ostringstream textureOutputs;
for (uint32_t i = 0; i < storageTextures; i++) {
textureBindings << "@group(1) @binding(" << i << ") var t" << i
<< ": texture_storage_2d<rgba8uint, write>;\n";
textureOutputs << " textureStore(t" << i << ", vec2u(0, 0), vec4u(" << i
<< "u + 1u));\n";
}
std::ostringstream targetBindings;
std::ostringstream targetOutputs;
for (size_t i = 0; i < attachmentCount; i++) {
targetBindings << "@location(" << i << ") o" << i << " : u32, ";
targetOutputs << i << "u + 1u, ";
}
std::ostringstream fsShader;
fsShader << bufferBindings.str();
fsShader << textureBindings.str();
fsShader << "struct Outputs { " << targetBindings.str() << "}\n";
fsShader << "@fragment fn main() -> Outputs {\n";
fsShader << bufferOutputs.str();
fsShader << textureOutputs.str();
fsShader << " return Outputs(" << targetOutputs.str() << ");\n";
fsShader << "}";
return utils::CreateShaderModule(device, fsShader.str().c_str());
};
// Constants used for the render pipeline.
wgpu::ColorTargetState kColorTargetState = {};
kColorTargetState.format = wgpu::TextureFormat::R8Uint;
// Create the render pipeline.
utils::ComboRenderPipelineDescriptor pipelineDesc;
pipelineDesc.vertex.module = utils::CreateShaderModule(device, R"(
@vertex fn main() -> @builtin(position) vec4f {
return vec4f(0.0, 0.0, 0.0, 1.0);
})");
pipelineDesc.vertex.entryPoint = "main";
pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
pipelineDesc.cFragment.module = CreateShader();
pipelineDesc.cFragment.entryPoint = "main";
pipelineDesc.cTargets.fill(kColorTargetState);
pipelineDesc.cFragment.targetCount = attachmentCount;
wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&pipelineDesc);
// Create all the resources and bindings for them.
std::vector<wgpu::Buffer> buffers;
std::vector<wgpu::BindGroupEntry> bufferEntries;
wgpu::BufferDescriptor bufferDesc = {};
bufferDesc.size = 4;
bufferDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
for (uint32_t i = 0; i < storageBuffers; i++) {
buffers.push_back(device.CreateBuffer(&bufferDesc));
bufferEntries.push_back(utils::BindingInitializationHelper(i, buffers[i]).GetAsBinding());
}
wgpu::BindGroupDescriptor bufferBindGroupDesc = {};
bufferBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(0);
bufferBindGroupDesc.entryCount = storageBuffers;
bufferBindGroupDesc.entries = bufferEntries.data();
wgpu::BindGroup bufferBindGroup = device.CreateBindGroup(&bufferBindGroupDesc);
std::vector<wgpu::Texture> textures;
std::vector<wgpu::BindGroupEntry> textureEntries;
wgpu::TextureDescriptor textureDesc = {};
textureDesc.size.width = 1;
textureDesc.size.height = 1;
textureDesc.format = wgpu::TextureFormat::RGBA8Uint;
textureDesc.usage = wgpu::TextureUsage::StorageBinding | wgpu::TextureUsage::CopySrc;
for (uint32_t i = 0; i < storageTextures; i++) {
textures.push_back(device.CreateTexture(&textureDesc));
textureEntries.push_back(
utils::BindingInitializationHelper(i, textures[i].CreateView()).GetAsBinding());
}
wgpu::BindGroupDescriptor textureBindGroupDesc = {};
textureBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(1);
textureBindGroupDesc.entryCount = storageTextures;
textureBindGroupDesc.entries = textureEntries.data();
wgpu::BindGroup textureBindGroup = device.CreateBindGroup(&textureBindGroupDesc);
std::vector<wgpu::Texture> attachments;
std::vector<wgpu::TextureView> attachmentViews;
wgpu::TextureDescriptor attachmentDesc = {};
attachmentDesc.size = {1, 1};
attachmentDesc.format = wgpu::TextureFormat::R8Uint;
attachmentDesc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
for (size_t i = 0; i < attachmentCount; i++) {
attachments.push_back(device.CreateTexture(&attachmentDesc));
attachmentViews.push_back(attachments[i].CreateView());
}
// Execute the pipeline.
utils::ComboRenderPassDescriptor passDesc(attachmentViews);
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&passDesc);
pass.SetBindGroup(0, bufferBindGroup);
pass.SetBindGroup(1, textureBindGroup);
pass.SetPipeline(renderPipeline);
pass.Draw(1);
pass.End();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
// Verify the results.
for (uint32_t i = 0; i < storageBuffers; i++) {
EXPECT_BUFFER_U32_EQ(i + 1, buffers[i], 0);
}
for (uint32_t i = 0; i < storageTextures; i++) {
const uint32_t res = i + 1;
EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(res, res, res, res), textures[i], 0, 0);
}
for (uint32_t i = 0; i < attachmentCount; i++) {
EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(i + 1, 0, 0, 0), attachments[i], 0, 0);
}
}
// Verifies that supported buffer limits do not exceed maxBufferSize. // Verifies that supported buffer limits do not exceed maxBufferSize.
TEST_P(MaxLimitTests, MaxBufferSizes) { TEST_P(MaxLimitTests, MaxBufferSizes) {
// Base limits without tiering. // Base limits without tiering.