Update maxFragmentCombinedOutputResources on the different backends.
- Adds an e2e test to test writing to outputs when at the limit. Bug: dawn:1665 Change-Id: I2b2b9c2d700be0e454dc945ed8e3e1fe6b191974 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/122801 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Loko Kung <lokokung@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
parent
2657b923c9
commit
af4ca3891f
|
@ -262,6 +262,9 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
|
||||||
limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage;
|
limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage;
|
||||||
|
|
||||||
limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
|
limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
|
||||||
|
limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
|
||||||
|
limits->v1.maxStorageBuffersPerShaderStage +
|
||||||
|
limits->v1.maxStorageTexturesPerShaderStage;
|
||||||
|
|
||||||
// https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
|
// https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
|
||||||
// In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2.
|
// In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2.
|
||||||
|
|
|
@ -706,6 +706,10 @@ class Adapter : public AdapterBase {
|
||||||
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
|
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
|
||||||
|
limits->v1.maxStorageBuffersPerShaderStage +
|
||||||
|
limits->v1.maxStorageTexturesPerShaderStage;
|
||||||
|
|
||||||
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
|
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
|
||||||
|
|
||||||
// Metal limits are per-function, so the layout limits are the same as the stage
|
// Metal limits are per-function, so the layout limits are the same as the stage
|
||||||
|
|
|
@ -326,6 +326,8 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
|
||||||
maxUniformBuffersPerShaderStage);
|
maxUniformBuffersPerShaderStage);
|
||||||
CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize);
|
CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize);
|
||||||
CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize);
|
CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize);
|
||||||
|
CHECK_AND_SET_V1_MAX_LIMIT(maxFragmentCombinedOutputResources,
|
||||||
|
maxFragmentCombinedOutputResources);
|
||||||
|
|
||||||
CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment);
|
CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment);
|
||||||
CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment);
|
CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment);
|
||||||
|
@ -382,58 +384,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
|
||||||
limits->v1.maxBufferSize = kAssumedMaxBufferSize;
|
limits->v1.maxBufferSize = kAssumedMaxBufferSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only check maxFragmentCombinedOutputResources on mobile GPUs. Desktop GPUs drivers seem
|
|
||||||
// to put incorrect values for this limit with things like 8 or 16 when they can do bindless
|
|
||||||
// storage buffers. Mesa llvmpipe driver also puts 8 here.
|
|
||||||
uint32_t vendorId = mDeviceInfo.properties.vendorID;
|
|
||||||
if (!gpu_info::IsAMD(vendorId) && !gpu_info::IsIntel(vendorId) && !gpu_info::IsMesa(vendorId) &&
|
|
||||||
!gpu_info::IsNvidia(vendorId)) {
|
|
||||||
if (vkLimits.maxFragmentCombinedOutputResources <
|
|
||||||
kMaxColorAttachments + baseLimits.v1.maxStorageTexturesPerShaderStage +
|
|
||||||
baseLimits.v1.maxStorageBuffersPerShaderStage) {
|
|
||||||
return DAWN_INTERNAL_ERROR(
|
|
||||||
"Insufficient Vulkan maxFragmentCombinedOutputResources limit");
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t maxFragmentCombinedOutputResources = kMaxColorAttachments +
|
|
||||||
limits->v1.maxStorageTexturesPerShaderStage +
|
|
||||||
limits->v1.maxStorageBuffersPerShaderStage;
|
|
||||||
|
|
||||||
if (maxFragmentCombinedOutputResources > vkLimits.maxFragmentCombinedOutputResources) {
|
|
||||||
// WebGPU's maxFragmentCombinedOutputResources exceeds the Vulkan limit.
|
|
||||||
// Decrease |maxStorageTexturesPerShaderStage| and |maxStorageBuffersPerShaderStage|
|
|
||||||
// to fit within the Vulkan limit.
|
|
||||||
uint32_t countOverLimit =
|
|
||||||
maxFragmentCombinedOutputResources - vkLimits.maxFragmentCombinedOutputResources;
|
|
||||||
|
|
||||||
uint32_t maxStorageTexturesOverBase = limits->v1.maxStorageTexturesPerShaderStage -
|
|
||||||
baseLimits.v1.maxStorageTexturesPerShaderStage;
|
|
||||||
uint32_t maxStorageBuffersOverBase = limits->v1.maxStorageBuffersPerShaderStage -
|
|
||||||
baseLimits.v1.maxStorageBuffersPerShaderStage;
|
|
||||||
|
|
||||||
// Reduce the number of resources by half the overage count, but clamp to
|
|
||||||
// to ensure we don't go below the base limits.
|
|
||||||
uint32_t numFewerStorageTextures =
|
|
||||||
std::min(countOverLimit / 2, maxStorageTexturesOverBase);
|
|
||||||
uint32_t numFewerStorageBuffers =
|
|
||||||
std::min((countOverLimit + 1) / 2, maxStorageBuffersOverBase);
|
|
||||||
|
|
||||||
if (numFewerStorageTextures == maxStorageTexturesOverBase) {
|
|
||||||
// If |numFewerStorageTextures| was clamped, subtract the remaining
|
|
||||||
// from the storage buffers.
|
|
||||||
numFewerStorageBuffers = countOverLimit - numFewerStorageTextures;
|
|
||||||
ASSERT(numFewerStorageBuffers <= maxStorageBuffersOverBase);
|
|
||||||
} else if (numFewerStorageBuffers == maxStorageBuffersOverBase) {
|
|
||||||
// If |numFewerStorageBuffers| was clamped, subtract the remaining
|
|
||||||
// from the storage textures.
|
|
||||||
numFewerStorageTextures = countOverLimit - numFewerStorageBuffers;
|
|
||||||
ASSERT(numFewerStorageTextures <= maxStorageTexturesOverBase);
|
|
||||||
}
|
|
||||||
limits->v1.maxStorageTexturesPerShaderStage -= numFewerStorageTextures;
|
|
||||||
limits->v1.maxStorageBuffersPerShaderStage -= numFewerStorageBuffers;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Using base limits for:
|
// Using base limits for:
|
||||||
// TODO(crbug.com/dawn/1448):
|
// TODO(crbug.com/dawn/1448):
|
||||||
// - maxInterStageShaderVariables
|
// - maxInterStageShaderVariables
|
||||||
|
|
|
@ -541,6 +541,160 @@ TEST_P(MaxLimitTests, ReallyLargeBindGroup) {
|
||||||
EXPECT_BUFFER_U32_EQ(1, result, 0);
|
EXPECT_BUFFER_U32_EQ(1, result, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verifies that devices can write to at least maxFragmentCombinedOutputResources of non color
|
||||||
|
// attachment resources.
|
||||||
|
TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) {
|
||||||
|
// TODO(dawn:1692) Currently does not work on GL and GLES.
|
||||||
|
DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
|
||||||
|
|
||||||
|
// Compute the number of each resource type (storage buffers and storage textures) such that
|
||||||
|
// there is at least one color attachment, and as many of the buffer/textures as possible,
|
||||||
|
// splitting a shared remaining count between the two resources if they are not separately
|
||||||
|
// defined, or exceed the combined limit.
|
||||||
|
wgpu::Limits limits = GetSupportedLimits().limits;
|
||||||
|
uint32_t attachmentCount = 1;
|
||||||
|
uint32_t storageBuffers = limits.maxStorageBuffersPerShaderStage;
|
||||||
|
uint32_t storageTextures = limits.maxStorageTexturesPerShaderStage;
|
||||||
|
uint32_t maxCombinedResources = limits.maxFragmentCombinedOutputResources;
|
||||||
|
if (uint64_t(storageBuffers) + uint64_t(storageTextures) >= uint64_t(maxCombinedResources)) {
|
||||||
|
storageTextures = std::min(storageTextures, (maxCombinedResources - attachmentCount) / 2);
|
||||||
|
storageBuffers = maxCombinedResources - attachmentCount - storageTextures;
|
||||||
|
}
|
||||||
|
if (maxCombinedResources > attachmentCount + storageBuffers + storageTextures) {
|
||||||
|
// Increase the number of attachments if we still have bandwidth after maximizing the number
|
||||||
|
// of buffers and textures.
|
||||||
|
attachmentCount = std::min(limits.maxColorAttachments,
|
||||||
|
maxCombinedResources - storageBuffers - storageTextures);
|
||||||
|
}
|
||||||
|
ASSERT_LE(attachmentCount + storageBuffers + storageTextures, maxCombinedResources);
|
||||||
|
|
||||||
|
// Create a shader to write out to all the resources.
|
||||||
|
auto CreateShader = [&]() -> wgpu::ShaderModule {
|
||||||
|
// Header to declare storage buffer struct.
|
||||||
|
std::ostringstream bufferBindings;
|
||||||
|
std::ostringstream bufferOutputs;
|
||||||
|
for (uint32_t i = 0; i < storageBuffers; i++) {
|
||||||
|
bufferBindings << "@group(0) @binding(" << i << ") var<storage, read_write> b" << i
|
||||||
|
<< ": u32;\n";
|
||||||
|
bufferOutputs << " b" << i << " = " << i << "u + 1u;\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostringstream textureBindings;
|
||||||
|
std::ostringstream textureOutputs;
|
||||||
|
for (uint32_t i = 0; i < storageTextures; i++) {
|
||||||
|
textureBindings << "@group(1) @binding(" << i << ") var t" << i
|
||||||
|
<< ": texture_storage_2d<rgba8uint, write>;\n";
|
||||||
|
textureOutputs << " textureStore(t" << i << ", vec2u(0, 0), vec4u(" << i
|
||||||
|
<< "u + 1u));\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostringstream targetBindings;
|
||||||
|
std::ostringstream targetOutputs;
|
||||||
|
for (size_t i = 0; i < attachmentCount; i++) {
|
||||||
|
targetBindings << "@location(" << i << ") o" << i << " : u32, ";
|
||||||
|
targetOutputs << i << "u + 1u, ";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostringstream fsShader;
|
||||||
|
fsShader << bufferBindings.str();
|
||||||
|
fsShader << textureBindings.str();
|
||||||
|
fsShader << "struct Outputs { " << targetBindings.str() << "}\n";
|
||||||
|
fsShader << "@fragment fn main() -> Outputs {\n";
|
||||||
|
fsShader << bufferOutputs.str();
|
||||||
|
fsShader << textureOutputs.str();
|
||||||
|
fsShader << " return Outputs(" << targetOutputs.str() << ");\n";
|
||||||
|
fsShader << "}";
|
||||||
|
return utils::CreateShaderModule(device, fsShader.str().c_str());
|
||||||
|
};
|
||||||
|
|
||||||
|
// Constants used for the render pipeline.
|
||||||
|
wgpu::ColorTargetState kColorTargetState = {};
|
||||||
|
kColorTargetState.format = wgpu::TextureFormat::R8Uint;
|
||||||
|
|
||||||
|
// Create the render pipeline.
|
||||||
|
utils::ComboRenderPipelineDescriptor pipelineDesc;
|
||||||
|
pipelineDesc.vertex.module = utils::CreateShaderModule(device, R"(
|
||||||
|
@vertex fn main() -> @builtin(position) vec4f {
|
||||||
|
return vec4f(0.0, 0.0, 0.0, 1.0);
|
||||||
|
})");
|
||||||
|
pipelineDesc.vertex.entryPoint = "main";
|
||||||
|
pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
|
||||||
|
pipelineDesc.cFragment.module = CreateShader();
|
||||||
|
pipelineDesc.cFragment.entryPoint = "main";
|
||||||
|
pipelineDesc.cTargets.fill(kColorTargetState);
|
||||||
|
pipelineDesc.cFragment.targetCount = attachmentCount;
|
||||||
|
wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&pipelineDesc);
|
||||||
|
|
||||||
|
// Create all the resources and bindings for them.
|
||||||
|
std::vector<wgpu::Buffer> buffers;
|
||||||
|
std::vector<wgpu::BindGroupEntry> bufferEntries;
|
||||||
|
wgpu::BufferDescriptor bufferDesc = {};
|
||||||
|
bufferDesc.size = 4;
|
||||||
|
bufferDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
|
||||||
|
for (uint32_t i = 0; i < storageBuffers; i++) {
|
||||||
|
buffers.push_back(device.CreateBuffer(&bufferDesc));
|
||||||
|
bufferEntries.push_back(utils::BindingInitializationHelper(i, buffers[i]).GetAsBinding());
|
||||||
|
}
|
||||||
|
wgpu::BindGroupDescriptor bufferBindGroupDesc = {};
|
||||||
|
bufferBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(0);
|
||||||
|
bufferBindGroupDesc.entryCount = storageBuffers;
|
||||||
|
bufferBindGroupDesc.entries = bufferEntries.data();
|
||||||
|
wgpu::BindGroup bufferBindGroup = device.CreateBindGroup(&bufferBindGroupDesc);
|
||||||
|
|
||||||
|
std::vector<wgpu::Texture> textures;
|
||||||
|
std::vector<wgpu::BindGroupEntry> textureEntries;
|
||||||
|
wgpu::TextureDescriptor textureDesc = {};
|
||||||
|
textureDesc.size.width = 1;
|
||||||
|
textureDesc.size.height = 1;
|
||||||
|
textureDesc.format = wgpu::TextureFormat::RGBA8Uint;
|
||||||
|
textureDesc.usage = wgpu::TextureUsage::StorageBinding | wgpu::TextureUsage::CopySrc;
|
||||||
|
for (uint32_t i = 0; i < storageTextures; i++) {
|
||||||
|
textures.push_back(device.CreateTexture(&textureDesc));
|
||||||
|
textureEntries.push_back(
|
||||||
|
utils::BindingInitializationHelper(i, textures[i].CreateView()).GetAsBinding());
|
||||||
|
}
|
||||||
|
wgpu::BindGroupDescriptor textureBindGroupDesc = {};
|
||||||
|
textureBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(1);
|
||||||
|
textureBindGroupDesc.entryCount = storageTextures;
|
||||||
|
textureBindGroupDesc.entries = textureEntries.data();
|
||||||
|
wgpu::BindGroup textureBindGroup = device.CreateBindGroup(&textureBindGroupDesc);
|
||||||
|
|
||||||
|
std::vector<wgpu::Texture> attachments;
|
||||||
|
std::vector<wgpu::TextureView> attachmentViews;
|
||||||
|
wgpu::TextureDescriptor attachmentDesc = {};
|
||||||
|
attachmentDesc.size = {1, 1};
|
||||||
|
attachmentDesc.format = wgpu::TextureFormat::R8Uint;
|
||||||
|
attachmentDesc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
|
||||||
|
for (size_t i = 0; i < attachmentCount; i++) {
|
||||||
|
attachments.push_back(device.CreateTexture(&attachmentDesc));
|
||||||
|
attachmentViews.push_back(attachments[i].CreateView());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute the pipeline.
|
||||||
|
utils::ComboRenderPassDescriptor passDesc(attachmentViews);
|
||||||
|
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||||
|
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&passDesc);
|
||||||
|
pass.SetBindGroup(0, bufferBindGroup);
|
||||||
|
pass.SetBindGroup(1, textureBindGroup);
|
||||||
|
pass.SetPipeline(renderPipeline);
|
||||||
|
pass.Draw(1);
|
||||||
|
pass.End();
|
||||||
|
wgpu::CommandBuffer commands = encoder.Finish();
|
||||||
|
queue.Submit(1, &commands);
|
||||||
|
|
||||||
|
// Verify the results.
|
||||||
|
for (uint32_t i = 0; i < storageBuffers; i++) {
|
||||||
|
EXPECT_BUFFER_U32_EQ(i + 1, buffers[i], 0);
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < storageTextures; i++) {
|
||||||
|
const uint32_t res = i + 1;
|
||||||
|
EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(res, res, res, res), textures[i], 0, 0);
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < attachmentCount; i++) {
|
||||||
|
EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(i + 1, 0, 0, 0), attachments[i], 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Verifies that supported buffer limits do not exceed maxBufferSize.
|
// Verifies that supported buffer limits do not exceed maxBufferSize.
|
||||||
TEST_P(MaxLimitTests, MaxBufferSizes) {
|
TEST_P(MaxLimitTests, MaxBufferSizes) {
|
||||||
// Base limits without tiering.
|
// Base limits without tiering.
|
||||||
|
|
Loading…
Reference in New Issue