From b38b3b0f8c4f2fa9dbcc7c607c0a2c0974316300 Mon Sep 17 00:00:00 2001 From: Brandon Jones Date: Mon, 19 Dec 2022 19:46:12 +0000 Subject: [PATCH] Implement AlwaysResolveIntoZeroLevelAndLayer globally Previously this toggle was implemented only for the Metal backend, but a need for it was identified on Android as well. This change moves the implementation to the backend-agnostic command encoder recording so that it works for all backends. Fundamentally it's still doing the same thing, however: Swapping resolve targets that point at a non-zero mip level or layer with a temporary texture and then performing a copy once the render pass has ended. Bug: dawn:1569 Change-Id: I292860cc74f653b2880e727d2ef3a7dfa3f10b91 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106040 Reviewed-by: Corentin Wallez Commit-Queue: Brandon Jones Kokoro: Kokoro --- src/dawn/native/CommandEncoder.cpp | 119 +++++++++++++++++- src/dawn/native/CommandEncoder.h | 6 + src/dawn/native/RenderPassEncoder.cpp | 15 ++- src/dawn/native/RenderPassEncoder.h | 8 +- src/dawn/native/Texture.cpp | 10 ++ src/dawn/native/Toggles.cpp | 3 +- src/dawn/native/metal/UtilsMetal.mm | 56 --------- src/dawn/native/vulkan/DeviceVk.cpp | 14 ++- .../end2end/MultisampledRenderingTests.cpp | 4 +- 9 files changed, 162 insertions(+), 73 deletions(-) diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp index f08204759a..36603e77b8 100644 --- a/src/dawn/native/CommandEncoder.cpp +++ b/src/dawn/native/CommandEncoder.cpp @@ -656,6 +656,17 @@ bool IsReadOnlyDepthStencilAttachment( return true; } +// Tracks the temporary resolve attachments used when the AlwaysResolveIntoZeroLevelAndLayer toggle +// is active so that the results can be copied from the temporary resolve attachment into the +// intended target after the render pass is complete. +struct TemporaryResolveAttachment { + TemporaryResolveAttachment(Ref src, Ref dst) + : copySrc(std::move(src)), copyDst(std::move(dst)) {} + + Ref copySrc; + Ref copyDst; +}; + } // namespace bool HasDeprecatedColor(const RenderPassColorAttachment& attachment) { @@ -863,6 +874,8 @@ Ref CommandEncoder::BeginRenderPass(const RenderPassDescripto return RenderPassEncoder::MakeError(device, this, &mEncodingContext); } + std::function passEndCallback = nullptr; + bool success = mEncodingContext.TryEncode( this, [&](CommandAllocator* allocator) -> MaybeError { @@ -1004,14 +1017,19 @@ Ref CommandEncoder::BeginRenderPass(const RenderPassDescripto usageTracker.TrackQueryAvailability(querySet, queryIndex); } + DAWN_TRY_ASSIGN(passEndCallback, + ApplyRenderPassWorkarounds(device, &usageTracker, cmd)); + return {}; }, "encoding %s.BeginRenderPass(%s).", this, descriptor); if (success) { - Ref passEncoder = RenderPassEncoder::Create( - device, descriptor, this, &mEncodingContext, std::move(usageTracker), - std::move(attachmentState), width, height, depthReadOnly, stencilReadOnly); + Ref passEncoder = + RenderPassEncoder::Create(device, descriptor, this, &mEncodingContext, + std::move(usageTracker), std::move(attachmentState), width, + height, depthReadOnly, stencilReadOnly, passEndCallback); + mEncodingContext.EnterPass(passEncoder.Get()); if (ShouldApplyClearBigIntegerColorValueWithDraw(device, descriptor)) { @@ -1028,6 +1046,101 @@ Ref CommandEncoder::BeginRenderPass(const RenderPassDescripto return RenderPassEncoder::MakeError(device, this, &mEncodingContext); } +// This function handles render pass workarounds. Because some cases may require +// multiple workarounds, it applies any workarounds one by one and calls itself +// recursively to handle the next workaround if needed. +ResultOrError> CommandEncoder::ApplyRenderPassWorkarounds( + DeviceBase* device, + RenderPassResourceUsageTracker* usageTracker, + BeginRenderPassCmd* cmd, + std::function passEndCallback) { + // dawn:56, dawn:1569 + // Handle Toggle AlwaysResolveIntoZeroLevelAndLayer. This swaps out the given resolve attachment + // for a temporary one that has no layers or mip levels. The results are copied from the + // temporary attachment into the given attachment when the render pass ends. (Handled at the + // bottom of this function) + if (device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer)) { + std::vector temporaryResolveAttachments; + + for (ColorAttachmentIndex index : + IterateBitSet(cmd->attachmentState->GetColorAttachmentsMask())) { + TextureViewBase* resolveTarget = cmd->colorAttachments[index].resolveTarget.Get(); + + if (resolveTarget != nullptr && (resolveTarget->GetBaseMipLevel() != 0 || + resolveTarget->GetBaseArrayLayer() != 0)) { + // Create a temporary texture to resolve into + // TODO(dawn:1618): Defer allocation of temporary textures till submit time. + TextureDescriptor descriptor = {}; + descriptor.usage = + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc; + descriptor.format = resolveTarget->GetFormat().format; + descriptor.size = + resolveTarget->GetTexture()->GetMipLevelSingleSubresourceVirtualSize( + resolveTarget->GetBaseMipLevel()); + descriptor.dimension = wgpu::TextureDimension::e2D; + descriptor.mipLevelCount = 1; + + Ref temporaryResolveTexture; + DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor)); + + TextureViewDescriptor viewDescriptor = {}; + Ref temporaryResolveView; + DAWN_TRY_ASSIGN( + temporaryResolveView, + device->CreateTextureView(temporaryResolveTexture.Get(), &viewDescriptor)); + + // Save the temporary and given render targets together for copying after + // the render pass ends. + temporaryResolveAttachments.emplace_back(temporaryResolveView, resolveTarget); + + // Replace the given resolve attachment with the temporary one. + usageTracker->TextureViewUsedAs(temporaryResolveView.Get(), + wgpu::TextureUsage::RenderAttachment); + cmd->colorAttachments[index].resolveTarget = temporaryResolveView; + } + } + + if (temporaryResolveAttachments.size()) { + // Check for other workarounds that need to be applied recursively. + return ApplyRenderPassWorkarounds( + device, usageTracker, cmd, + [this, passEndCallback = std::move(passEndCallback), + temporaryResolveAttachments = std::move(temporaryResolveAttachments)]() -> void { + // Called once the render pass has been ended. + // Handle any copies needed for the AlwaysResolveIntoZeroLevelAndLayer + // workaround immediately after the render pass ends and before any additional + // commands are recorded. + for (auto& copyTarget : temporaryResolveAttachments) { + ImageCopyTexture srcImageCopyTexture = {}; + srcImageCopyTexture.texture = copyTarget.copySrc->GetTexture(); + srcImageCopyTexture.aspect = wgpu::TextureAspect::All; + srcImageCopyTexture.mipLevel = 0; + srcImageCopyTexture.origin = {0, 0, 0}; + + ImageCopyTexture dstImageCopyTexture = {}; + dstImageCopyTexture.texture = copyTarget.copyDst->GetTexture(); + dstImageCopyTexture.aspect = wgpu::TextureAspect::All; + dstImageCopyTexture.mipLevel = copyTarget.copyDst->GetBaseMipLevel(); + dstImageCopyTexture.origin = {0, 0, + copyTarget.copyDst->GetBaseArrayLayer()}; + + Extent3D extent3D = copyTarget.copySrc->GetTexture()->GetSize(); + + this->APICopyTextureToTextureInternal(&srcImageCopyTexture, + &dstImageCopyTexture, &extent3D); + } + + // If there were any other callbacks in the workaround stack, call the next one. + if (passEndCallback) { + passEndCallback(); + } + }); + } + } + + return std::move(passEndCallback); +} + void CommandEncoder::APICopyBufferToBuffer(BufferBase* source, uint64_t sourceOffset, BufferBase* destination, diff --git a/src/dawn/native/CommandEncoder.h b/src/dawn/native/CommandEncoder.h index e43c6e77ce..d1e20f353a 100644 --- a/src/dawn/native/CommandEncoder.h +++ b/src/dawn/native/CommandEncoder.h @@ -102,6 +102,12 @@ class CommandEncoder final : public ApiObjectBase { void DestroyImpl() override; + ResultOrError> ApplyRenderPassWorkarounds( + DeviceBase* device, + RenderPassResourceUsageTracker* usageTracker, + BeginRenderPassCmd* cmd, + std::function passEndCallback = nullptr); + // Helper to be able to implement both APICopyTextureToTexture and // APICopyTextureToTextureInternal. The only difference between both // copies, is that the Internal one will also check internal usage. diff --git a/src/dawn/native/RenderPassEncoder.cpp b/src/dawn/native/RenderPassEncoder.cpp index 018081e109..0ebc0bf4a0 100644 --- a/src/dawn/native/RenderPassEncoder.cpp +++ b/src/dawn/native/RenderPassEncoder.cpp @@ -59,7 +59,8 @@ RenderPassEncoder::RenderPassEncoder(DeviceBase* device, uint32_t renderTargetWidth, uint32_t renderTargetHeight, bool depthReadOnly, - bool stencilReadOnly) + bool stencilReadOnly, + std::function endCallback) : RenderEncoderBase(device, descriptor->label, encodingContext, @@ -69,7 +70,8 @@ RenderPassEncoder::RenderPassEncoder(DeviceBase* device, mCommandEncoder(commandEncoder), mRenderTargetWidth(renderTargetWidth), mRenderTargetHeight(renderTargetHeight), - mOcclusionQuerySet(descriptor->occlusionQuerySet) { + mOcclusionQuerySet(descriptor->occlusionQuerySet), + mEndCallback(std::move(endCallback)) { mUsageTracker = std::move(usageTracker); const RenderPassDescriptorMaxDrawCount* maxDrawCountInfo = nullptr; FindInChain(descriptor->nextInChain, &maxDrawCountInfo); @@ -89,11 +91,12 @@ Ref RenderPassEncoder::Create(DeviceBase* device, uint32_t renderTargetWidth, uint32_t renderTargetHeight, bool depthReadOnly, - bool stencilReadOnly) { + bool stencilReadOnly, + std::function endCallback) { return AcquireRef(new RenderPassEncoder(device, descriptor, commandEncoder, encodingContext, std::move(usageTracker), std::move(attachmentState), renderTargetWidth, renderTargetHeight, depthReadOnly, - stencilReadOnly)); + stencilReadOnly, std::move(endCallback))); } RenderPassEncoder::RenderPassEncoder(DeviceBase* device, @@ -157,6 +160,10 @@ void RenderPassEncoder::APIEnd() { return {}; }, "encoding %s.End().", this); + + if (mEndCallback) { + mEndCallback(); + } } void RenderPassEncoder::APIEndPass() { diff --git a/src/dawn/native/RenderPassEncoder.h b/src/dawn/native/RenderPassEncoder.h index 32199f1654..6328852308 100644 --- a/src/dawn/native/RenderPassEncoder.h +++ b/src/dawn/native/RenderPassEncoder.h @@ -36,7 +36,8 @@ class RenderPassEncoder final : public RenderEncoderBase { uint32_t renderTargetWidth, uint32_t renderTargetHeight, bool depthReadOnly, - bool stencilReadOnly); + bool stencilReadOnly, + std::function endCallback = nullptr); static Ref MakeError(DeviceBase* device, CommandEncoder* commandEncoder, EncodingContext* encodingContext); @@ -72,7 +73,8 @@ class RenderPassEncoder final : public RenderEncoderBase { uint32_t renderTargetWidth, uint32_t renderTargetHeight, bool depthReadOnly, - bool stencilReadOnly); + bool stencilReadOnly, + std::function endCallback = nullptr); RenderPassEncoder(DeviceBase* device, CommandEncoder* commandEncoder, EncodingContext* encodingContext, @@ -97,6 +99,8 @@ class RenderPassEncoder final : public RenderEncoderBase { // This is the hardcoded value in the WebGPU spec. uint64_t mMaxDrawCount = 50000000; + + std::function mEndCallback; }; } // namespace dawn::native diff --git a/src/dawn/native/Texture.cpp b/src/dawn/native/Texture.cpp index 5c525daa54..c462009f60 100644 --- a/src/dawn/native/Texture.cpp +++ b/src/dawn/native/Texture.cpp @@ -553,6 +553,16 @@ TextureBase::TextureBase(DeviceBase* device, mInternalUsage |= internalUsageDesc->internalUsage; } GetObjectTrackingList()->Track(this); + + // dawn:1569: If a texture with multiple array layers or mip levels is specified as a texture + // attachment when this toggle is active, it needs to be given CopyDst usage internally. + bool applyAlwaysResolveIntoZeroLevelAndLayerToggle = + device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer) && + (GetArrayLayers() > 1 || GetNumMipLevels() > 1) && + (GetInternalUsage() & wgpu::TextureUsage::RenderAttachment); + if (applyAlwaysResolveIntoZeroLevelAndLayerToggle) { + AddInternalUsage(wgpu::TextureUsage::CopyDst); + } } TextureBase::~TextureBase() = default; diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp index 1095dec051..7371ee1b52 100644 --- a/src/dawn/native/Toggles.cpp +++ b/src/dawn/native/Toggles.cpp @@ -49,7 +49,8 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{ "a texture, we first resolve into a temporarily 2D texture with only one mipmap level and " "one array layer, and copy the result of MSAA resolve into the true resolve target. This " "workaround is enabled by default on the Metal drivers that have bugs when setting non-zero " - "resolveLevel or resolveSlice.", + "resolveLevel or resolveSlice. It is also enabled by default on Qualcomm Vulkan drivers, " + "which have similar bugs.", "https://crbug.com/dawn/56"}}, {Toggle::LazyClearResourceOnFirstUse, {"lazy_clear_resource_on_first_use", diff --git a/src/dawn/native/metal/UtilsMetal.mm b/src/dawn/native/metal/UtilsMetal.mm index 9a091b88fa..daaa390017 100644 --- a/src/dawn/native/metal/UtilsMetal.mm +++ b/src/dawn/native/metal/UtilsMetal.mm @@ -111,23 +111,6 @@ ResultOrError PatchAttachmentWithTemporary( return result; } -// Same as PatchAttachmentWithTemporary but for the resolve attachment. -ResultOrError PatchResolveAttachmentWithTemporary( - Device* device, - MTLRenderPassAttachmentDescriptor* attachment) { - SavedMetalAttachment result; - DAWN_TRY_ASSIGN( - result, SaveAttachmentCreateTemporary(device, attachment.resolveTexture, - attachment.resolveLevel, attachment.resolveSlice)); - - // Replace the resolve attachment with the tempoary. - attachment.resolveTexture = result.temporary.Get(); - attachment.resolveLevel = 0; - attachment.resolveSlice = 0; - - return result; -} - // Helper function for Toggle EmulateStoreAndMSAAResolve void ResolveInAnotherRenderPass( CommandRecordingContext* commandContext, @@ -334,45 +317,6 @@ MaybeError EncodeMetalRenderPass(Device* device, // workarounds to happen at the same time, it handles workarounds one by one and calls // itself recursively to handle the next workaround if needed. - // Handle Toggle AlwaysResolveIntoZeroLevelAndLayer. We must handle this before applying - // the store + MSAA resolve workaround, otherwise this toggle will never be handled because - // the resolve texture is removed when applying the store + MSAA resolve workaround. - if (device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer)) { - std::array trueResolveAttachments = {}; - bool workaroundUsed = false; - for (uint32_t i = 0; i < kMaxColorAttachments; ++i) { - if (mtlRenderPass.colorAttachments[i].resolveTexture == nullptr) { - continue; - } - - if (mtlRenderPass.colorAttachments[i].resolveLevel == 0 && - mtlRenderPass.colorAttachments[i].resolveSlice == 0) { - continue; - } - - DAWN_TRY_ASSIGN( - trueResolveAttachments[i], - PatchResolveAttachmentWithTemporary(device, mtlRenderPass.colorAttachments[i])); - workaroundUsed = true; - } - - // If we need to use a temporary resolve texture we need to copy the result of MSAA - // resolve back to the true resolve targets. - if (workaroundUsed) { - DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height, - std::move(encodeInside), renderPassCmd)); - - for (uint32_t i = 0; i < kMaxColorAttachments; ++i) { - if (trueResolveAttachments[i].texture == nullptr) { - continue; - } - - trueResolveAttachments[i].CopyFromTemporaryToAttachment(commandContext); - } - return {}; - } - } - // Handles the workaround for r8unorm rg8unorm mipmap rendering being broken on some // devices. Render to a temporary texture instead and then copy back to the attachment. if (device->IsToggleEnabled(Toggle::MetalRenderR8RG8UnormSmallMipToTempTexture)) { diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp index 63c94a2fc7..7a7b61ec15 100644 --- a/src/dawn/native/vulkan/DeviceVk.cpp +++ b/src/dawn/native/vulkan/DeviceVk.cpp @@ -639,12 +639,16 @@ void Device::InitTogglesFromDriver() { // By default try to use S8 if available. SetToggle(Toggle::VulkanUseS8, true); - // dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a - // compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around that - // bug, split the command buffer any time we can detect that situation. if (ToBackend(GetAdapter())->IsAndroidQualcomm()) { - ForceSetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass, - true); + // dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a + // compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around + // that bug, split the command buffer any time we can detect that situation. + SetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass, true); + + // dawn:1569: Qualcomm devices have a bug resolving into a non-zero level of an array + // texture. Work around it by resolving into a single level texture and then copying into + // the intended layer. + SetToggle(Toggle::AlwaysResolveIntoZeroLevelAndLayer, true); } } diff --git a/src/dawn/tests/end2end/MultisampledRenderingTests.cpp b/src/dawn/tests/end2end/MultisampledRenderingTests.cpp index ea7a28def2..fb6a7a3961 100644 --- a/src/dawn/tests/end2end/MultisampledRenderingTests.cpp +++ b/src/dawn/tests/end2end/MultisampledRenderingTests.cpp @@ -521,9 +521,8 @@ TEST_P(MultisampledRenderingTest, ResolveInto2DArrayTexture) { // TODO(dawn:462): Issue in the D3D12 validation layers. DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsBackendValidationEnabled()); - // TODO(dawn:1549) Fails on Qualcomm-based Android devices. // TODO(dawn:1550) Fails on ARM-based Android devices. - DAWN_SUPPRESS_TEST_IF(IsAndroid()); + DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsARM()); wgpu::TextureView multisampledColorView2 = CreateTextureForRenderAttachment(kColorFormat, kSampleCount).CreateView(); @@ -1138,6 +1137,7 @@ DAWN_INSTANTIATE_TEST(MultisampledRenderingTest, OpenGLBackend(), OpenGLESBackend(), VulkanBackend(), + VulkanBackend({"always_resolve_into_zero_level_and_layer"}), MetalBackend({"emulate_store_and_msaa_resolve"}), MetalBackend({"always_resolve_into_zero_level_and_layer"}), MetalBackend({"always_resolve_into_zero_level_and_layer",