Update maxFragmentCombinedOutputResources on the different backends.
- Adds an e2e test to test writing to outputs when at the limit. Bug: dawn:1665 Change-Id: I2b2b9c2d700be0e454dc945ed8e3e1fe6b191974 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/122801 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Loko Kung <lokokung@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
parent
2657b923c9
commit
af4ca3891f
|
@ -262,6 +262,9 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
|
|||
limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage;
|
||||
|
||||
limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
|
||||
limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
|
||||
limits->v1.maxStorageBuffersPerShaderStage +
|
||||
limits->v1.maxStorageTexturesPerShaderStage;
|
||||
|
||||
// https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
|
||||
// In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2.
|
||||
|
|
|
@ -706,6 +706,10 @@ class Adapter : public AdapterBase {
|
|||
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
|
||||
}
|
||||
|
||||
limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
|
||||
limits->v1.maxStorageBuffersPerShaderStage +
|
||||
limits->v1.maxStorageTexturesPerShaderStage;
|
||||
|
||||
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
|
||||
|
||||
// Metal limits are per-function, so the layout limits are the same as the stage
|
||||
|
|
|
@ -326,6 +326,8 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
|
|||
maxUniformBuffersPerShaderStage);
|
||||
CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize);
|
||||
CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize);
|
||||
CHECK_AND_SET_V1_MAX_LIMIT(maxFragmentCombinedOutputResources,
|
||||
maxFragmentCombinedOutputResources);
|
||||
|
||||
CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment);
|
||||
CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment);
|
||||
|
@ -382,58 +384,6 @@ MaybeError Adapter::InitializeSupportedLimitsImpl(CombinedLimits* limits) {
|
|||
limits->v1.maxBufferSize = kAssumedMaxBufferSize;
|
||||
}
|
||||
|
||||
// Only check maxFragmentCombinedOutputResources on mobile GPUs. Desktop GPUs drivers seem
|
||||
// to put incorrect values for this limit with things like 8 or 16 when they can do bindless
|
||||
// storage buffers. Mesa llvmpipe driver also puts 8 here.
|
||||
uint32_t vendorId = mDeviceInfo.properties.vendorID;
|
||||
if (!gpu_info::IsAMD(vendorId) && !gpu_info::IsIntel(vendorId) && !gpu_info::IsMesa(vendorId) &&
|
||||
!gpu_info::IsNvidia(vendorId)) {
|
||||
if (vkLimits.maxFragmentCombinedOutputResources <
|
||||
kMaxColorAttachments + baseLimits.v1.maxStorageTexturesPerShaderStage +
|
||||
baseLimits.v1.maxStorageBuffersPerShaderStage) {
|
||||
return DAWN_INTERNAL_ERROR(
|
||||
"Insufficient Vulkan maxFragmentCombinedOutputResources limit");
|
||||
}
|
||||
|
||||
uint32_t maxFragmentCombinedOutputResources = kMaxColorAttachments +
|
||||
limits->v1.maxStorageTexturesPerShaderStage +
|
||||
limits->v1.maxStorageBuffersPerShaderStage;
|
||||
|
||||
if (maxFragmentCombinedOutputResources > vkLimits.maxFragmentCombinedOutputResources) {
|
||||
// WebGPU's maxFragmentCombinedOutputResources exceeds the Vulkan limit.
|
||||
// Decrease |maxStorageTexturesPerShaderStage| and |maxStorageBuffersPerShaderStage|
|
||||
// to fit within the Vulkan limit.
|
||||
uint32_t countOverLimit =
|
||||
maxFragmentCombinedOutputResources - vkLimits.maxFragmentCombinedOutputResources;
|
||||
|
||||
uint32_t maxStorageTexturesOverBase = limits->v1.maxStorageTexturesPerShaderStage -
|
||||
baseLimits.v1.maxStorageTexturesPerShaderStage;
|
||||
uint32_t maxStorageBuffersOverBase = limits->v1.maxStorageBuffersPerShaderStage -
|
||||
baseLimits.v1.maxStorageBuffersPerShaderStage;
|
||||
|
||||
// Reduce the number of resources by half the overage count, but clamp to
|
||||
// to ensure we don't go below the base limits.
|
||||
uint32_t numFewerStorageTextures =
|
||||
std::min(countOverLimit / 2, maxStorageTexturesOverBase);
|
||||
uint32_t numFewerStorageBuffers =
|
||||
std::min((countOverLimit + 1) / 2, maxStorageBuffersOverBase);
|
||||
|
||||
if (numFewerStorageTextures == maxStorageTexturesOverBase) {
|
||||
// If |numFewerStorageTextures| was clamped, subtract the remaining
|
||||
// from the storage buffers.
|
||||
numFewerStorageBuffers = countOverLimit - numFewerStorageTextures;
|
||||
ASSERT(numFewerStorageBuffers <= maxStorageBuffersOverBase);
|
||||
} else if (numFewerStorageBuffers == maxStorageBuffersOverBase) {
|
||||
// If |numFewerStorageBuffers| was clamped, subtract the remaining
|
||||
// from the storage textures.
|
||||
numFewerStorageTextures = countOverLimit - numFewerStorageBuffers;
|
||||
ASSERT(numFewerStorageTextures <= maxStorageTexturesOverBase);
|
||||
}
|
||||
limits->v1.maxStorageTexturesPerShaderStage -= numFewerStorageTextures;
|
||||
limits->v1.maxStorageBuffersPerShaderStage -= numFewerStorageBuffers;
|
||||
}
|
||||
}
|
||||
|
||||
// Using base limits for:
|
||||
// TODO(crbug.com/dawn/1448):
|
||||
// - maxInterStageShaderVariables
|
||||
|
|
|
@ -541,6 +541,160 @@ TEST_P(MaxLimitTests, ReallyLargeBindGroup) {
|
|||
EXPECT_BUFFER_U32_EQ(1, result, 0);
|
||||
}
|
||||
|
||||
// Verifies that devices can write to at least maxFragmentCombinedOutputResources of non color
|
||||
// attachment resources.
|
||||
TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) {
|
||||
// TODO(dawn:1692) Currently does not work on GL and GLES.
|
||||
DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
|
||||
|
||||
// Compute the number of each resource type (storage buffers and storage textures) such that
|
||||
// there is at least one color attachment, and as many of the buffer/textures as possible,
|
||||
// splitting a shared remaining count between the two resources if they are not separately
|
||||
// defined, or exceed the combined limit.
|
||||
wgpu::Limits limits = GetSupportedLimits().limits;
|
||||
uint32_t attachmentCount = 1;
|
||||
uint32_t storageBuffers = limits.maxStorageBuffersPerShaderStage;
|
||||
uint32_t storageTextures = limits.maxStorageTexturesPerShaderStage;
|
||||
uint32_t maxCombinedResources = limits.maxFragmentCombinedOutputResources;
|
||||
if (uint64_t(storageBuffers) + uint64_t(storageTextures) >= uint64_t(maxCombinedResources)) {
|
||||
storageTextures = std::min(storageTextures, (maxCombinedResources - attachmentCount) / 2);
|
||||
storageBuffers = maxCombinedResources - attachmentCount - storageTextures;
|
||||
}
|
||||
if (maxCombinedResources > attachmentCount + storageBuffers + storageTextures) {
|
||||
// Increase the number of attachments if we still have bandwidth after maximizing the number
|
||||
// of buffers and textures.
|
||||
attachmentCount = std::min(limits.maxColorAttachments,
|
||||
maxCombinedResources - storageBuffers - storageTextures);
|
||||
}
|
||||
ASSERT_LE(attachmentCount + storageBuffers + storageTextures, maxCombinedResources);
|
||||
|
||||
// Create a shader to write out to all the resources.
|
||||
auto CreateShader = [&]() -> wgpu::ShaderModule {
|
||||
// Header to declare storage buffer struct.
|
||||
std::ostringstream bufferBindings;
|
||||
std::ostringstream bufferOutputs;
|
||||
for (uint32_t i = 0; i < storageBuffers; i++) {
|
||||
bufferBindings << "@group(0) @binding(" << i << ") var<storage, read_write> b" << i
|
||||
<< ": u32;\n";
|
||||
bufferOutputs << " b" << i << " = " << i << "u + 1u;\n";
|
||||
}
|
||||
|
||||
std::ostringstream textureBindings;
|
||||
std::ostringstream textureOutputs;
|
||||
for (uint32_t i = 0; i < storageTextures; i++) {
|
||||
textureBindings << "@group(1) @binding(" << i << ") var t" << i
|
||||
<< ": texture_storage_2d<rgba8uint, write>;\n";
|
||||
textureOutputs << " textureStore(t" << i << ", vec2u(0, 0), vec4u(" << i
|
||||
<< "u + 1u));\n";
|
||||
}
|
||||
|
||||
std::ostringstream targetBindings;
|
||||
std::ostringstream targetOutputs;
|
||||
for (size_t i = 0; i < attachmentCount; i++) {
|
||||
targetBindings << "@location(" << i << ") o" << i << " : u32, ";
|
||||
targetOutputs << i << "u + 1u, ";
|
||||
}
|
||||
|
||||
std::ostringstream fsShader;
|
||||
fsShader << bufferBindings.str();
|
||||
fsShader << textureBindings.str();
|
||||
fsShader << "struct Outputs { " << targetBindings.str() << "}\n";
|
||||
fsShader << "@fragment fn main() -> Outputs {\n";
|
||||
fsShader << bufferOutputs.str();
|
||||
fsShader << textureOutputs.str();
|
||||
fsShader << " return Outputs(" << targetOutputs.str() << ");\n";
|
||||
fsShader << "}";
|
||||
return utils::CreateShaderModule(device, fsShader.str().c_str());
|
||||
};
|
||||
|
||||
// Constants used for the render pipeline.
|
||||
wgpu::ColorTargetState kColorTargetState = {};
|
||||
kColorTargetState.format = wgpu::TextureFormat::R8Uint;
|
||||
|
||||
// Create the render pipeline.
|
||||
utils::ComboRenderPipelineDescriptor pipelineDesc;
|
||||
pipelineDesc.vertex.module = utils::CreateShaderModule(device, R"(
|
||||
@vertex fn main() -> @builtin(position) vec4f {
|
||||
return vec4f(0.0, 0.0, 0.0, 1.0);
|
||||
})");
|
||||
pipelineDesc.vertex.entryPoint = "main";
|
||||
pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
|
||||
pipelineDesc.cFragment.module = CreateShader();
|
||||
pipelineDesc.cFragment.entryPoint = "main";
|
||||
pipelineDesc.cTargets.fill(kColorTargetState);
|
||||
pipelineDesc.cFragment.targetCount = attachmentCount;
|
||||
wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&pipelineDesc);
|
||||
|
||||
// Create all the resources and bindings for them.
|
||||
std::vector<wgpu::Buffer> buffers;
|
||||
std::vector<wgpu::BindGroupEntry> bufferEntries;
|
||||
wgpu::BufferDescriptor bufferDesc = {};
|
||||
bufferDesc.size = 4;
|
||||
bufferDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
|
||||
for (uint32_t i = 0; i < storageBuffers; i++) {
|
||||
buffers.push_back(device.CreateBuffer(&bufferDesc));
|
||||
bufferEntries.push_back(utils::BindingInitializationHelper(i, buffers[i]).GetAsBinding());
|
||||
}
|
||||
wgpu::BindGroupDescriptor bufferBindGroupDesc = {};
|
||||
bufferBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(0);
|
||||
bufferBindGroupDesc.entryCount = storageBuffers;
|
||||
bufferBindGroupDesc.entries = bufferEntries.data();
|
||||
wgpu::BindGroup bufferBindGroup = device.CreateBindGroup(&bufferBindGroupDesc);
|
||||
|
||||
std::vector<wgpu::Texture> textures;
|
||||
std::vector<wgpu::BindGroupEntry> textureEntries;
|
||||
wgpu::TextureDescriptor textureDesc = {};
|
||||
textureDesc.size.width = 1;
|
||||
textureDesc.size.height = 1;
|
||||
textureDesc.format = wgpu::TextureFormat::RGBA8Uint;
|
||||
textureDesc.usage = wgpu::TextureUsage::StorageBinding | wgpu::TextureUsage::CopySrc;
|
||||
for (uint32_t i = 0; i < storageTextures; i++) {
|
||||
textures.push_back(device.CreateTexture(&textureDesc));
|
||||
textureEntries.push_back(
|
||||
utils::BindingInitializationHelper(i, textures[i].CreateView()).GetAsBinding());
|
||||
}
|
||||
wgpu::BindGroupDescriptor textureBindGroupDesc = {};
|
||||
textureBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(1);
|
||||
textureBindGroupDesc.entryCount = storageTextures;
|
||||
textureBindGroupDesc.entries = textureEntries.data();
|
||||
wgpu::BindGroup textureBindGroup = device.CreateBindGroup(&textureBindGroupDesc);
|
||||
|
||||
std::vector<wgpu::Texture> attachments;
|
||||
std::vector<wgpu::TextureView> attachmentViews;
|
||||
wgpu::TextureDescriptor attachmentDesc = {};
|
||||
attachmentDesc.size = {1, 1};
|
||||
attachmentDesc.format = wgpu::TextureFormat::R8Uint;
|
||||
attachmentDesc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
|
||||
for (size_t i = 0; i < attachmentCount; i++) {
|
||||
attachments.push_back(device.CreateTexture(&attachmentDesc));
|
||||
attachmentViews.push_back(attachments[i].CreateView());
|
||||
}
|
||||
|
||||
// Execute the pipeline.
|
||||
utils::ComboRenderPassDescriptor passDesc(attachmentViews);
|
||||
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&passDesc);
|
||||
pass.SetBindGroup(0, bufferBindGroup);
|
||||
pass.SetBindGroup(1, textureBindGroup);
|
||||
pass.SetPipeline(renderPipeline);
|
||||
pass.Draw(1);
|
||||
pass.End();
|
||||
wgpu::CommandBuffer commands = encoder.Finish();
|
||||
queue.Submit(1, &commands);
|
||||
|
||||
// Verify the results.
|
||||
for (uint32_t i = 0; i < storageBuffers; i++) {
|
||||
EXPECT_BUFFER_U32_EQ(i + 1, buffers[i], 0);
|
||||
}
|
||||
for (uint32_t i = 0; i < storageTextures; i++) {
|
||||
const uint32_t res = i + 1;
|
||||
EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(res, res, res, res), textures[i], 0, 0);
|
||||
}
|
||||
for (uint32_t i = 0; i < attachmentCount; i++) {
|
||||
EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(i + 1, 0, 0, 0), attachments[i], 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Verifies that supported buffer limits do not exceed maxBufferSize.
|
||||
TEST_P(MaxLimitTests, MaxBufferSizes) {
|
||||
// Base limits without tiering.
|
||||
|
|
Loading…
Reference in New Issue