Implement AlwaysResolveIntoZeroLevelAndLayer globally

Previously this toggle was implemented only for the Metal backend, but
a need for it was identified on Android as well. This change moves the
implementation to the backend-agnostic command encoder recording so that
it works for all backends. Fundamentally it's still doing the same
thing, however: Swapping resolve targets that point at a non-zero mip
level or layer with a temporary texture and then performing a copy once
the render pass has ended.

Bug: dawn:1569
Change-Id: I292860cc74f653b2880e727d2ef3a7dfa3f10b91
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106040
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Brandon Jones <bajones@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
Brandon Jones 2022-12-19 19:46:12 +00:00 committed by Dawn LUCI CQ
parent 915e55a0d8
commit b38b3b0f8c
9 changed files with 162 additions and 73 deletions

View File

@ -656,6 +656,17 @@ bool IsReadOnlyDepthStencilAttachment(
return true;
}
// Tracks the temporary resolve attachments used when the AlwaysResolveIntoZeroLevelAndLayer toggle
// is active so that the results can be copied from the temporary resolve attachment into the
// intended target after the render pass is complete.
struct TemporaryResolveAttachment {
TemporaryResolveAttachment(Ref<TextureViewBase> src, Ref<TextureViewBase> dst)
: copySrc(std::move(src)), copyDst(std::move(dst)) {}
Ref<TextureViewBase> copySrc;
Ref<TextureViewBase> copyDst;
};
} // namespace
bool HasDeprecatedColor(const RenderPassColorAttachment& attachment) {
@ -863,6 +874,8 @@ Ref<RenderPassEncoder> CommandEncoder::BeginRenderPass(const RenderPassDescripto
return RenderPassEncoder::MakeError(device, this, &mEncodingContext);
}
std::function<void()> passEndCallback = nullptr;
bool success = mEncodingContext.TryEncode(
this,
[&](CommandAllocator* allocator) -> MaybeError {
@ -1004,14 +1017,19 @@ Ref<RenderPassEncoder> CommandEncoder::BeginRenderPass(const RenderPassDescripto
usageTracker.TrackQueryAvailability(querySet, queryIndex);
}
DAWN_TRY_ASSIGN(passEndCallback,
ApplyRenderPassWorkarounds(device, &usageTracker, cmd));
return {};
},
"encoding %s.BeginRenderPass(%s).", this, descriptor);
if (success) {
Ref<RenderPassEncoder> passEncoder = RenderPassEncoder::Create(
device, descriptor, this, &mEncodingContext, std::move(usageTracker),
std::move(attachmentState), width, height, depthReadOnly, stencilReadOnly);
Ref<RenderPassEncoder> passEncoder =
RenderPassEncoder::Create(device, descriptor, this, &mEncodingContext,
std::move(usageTracker), std::move(attachmentState), width,
height, depthReadOnly, stencilReadOnly, passEndCallback);
mEncodingContext.EnterPass(passEncoder.Get());
if (ShouldApplyClearBigIntegerColorValueWithDraw(device, descriptor)) {
@ -1028,6 +1046,101 @@ Ref<RenderPassEncoder> CommandEncoder::BeginRenderPass(const RenderPassDescripto
return RenderPassEncoder::MakeError(device, this, &mEncodingContext);
}
// This function handles render pass workarounds. Because some cases may require
// multiple workarounds, it applies any workarounds one by one and calls itself
// recursively to handle the next workaround if needed.
ResultOrError<std::function<void()>> CommandEncoder::ApplyRenderPassWorkarounds(
DeviceBase* device,
RenderPassResourceUsageTracker* usageTracker,
BeginRenderPassCmd* cmd,
std::function<void()> passEndCallback) {
// dawn:56, dawn:1569
// Handle Toggle AlwaysResolveIntoZeroLevelAndLayer. This swaps out the given resolve attachment
// for a temporary one that has no layers or mip levels. The results are copied from the
// temporary attachment into the given attachment when the render pass ends. (Handled at the
// bottom of this function)
if (device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer)) {
std::vector<TemporaryResolveAttachment> temporaryResolveAttachments;
for (ColorAttachmentIndex index :
IterateBitSet(cmd->attachmentState->GetColorAttachmentsMask())) {
TextureViewBase* resolveTarget = cmd->colorAttachments[index].resolveTarget.Get();
if (resolveTarget != nullptr && (resolveTarget->GetBaseMipLevel() != 0 ||
resolveTarget->GetBaseArrayLayer() != 0)) {
// Create a temporary texture to resolve into
// TODO(dawn:1618): Defer allocation of temporary textures till submit time.
TextureDescriptor descriptor = {};
descriptor.usage =
wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
descriptor.format = resolveTarget->GetFormat().format;
descriptor.size =
resolveTarget->GetTexture()->GetMipLevelSingleSubresourceVirtualSize(
resolveTarget->GetBaseMipLevel());
descriptor.dimension = wgpu::TextureDimension::e2D;
descriptor.mipLevelCount = 1;
Ref<TextureBase> temporaryResolveTexture;
DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor));
TextureViewDescriptor viewDescriptor = {};
Ref<TextureViewBase> temporaryResolveView;
DAWN_TRY_ASSIGN(
temporaryResolveView,
device->CreateTextureView(temporaryResolveTexture.Get(), &viewDescriptor));
// Save the temporary and given render targets together for copying after
// the render pass ends.
temporaryResolveAttachments.emplace_back(temporaryResolveView, resolveTarget);
// Replace the given resolve attachment with the temporary one.
usageTracker->TextureViewUsedAs(temporaryResolveView.Get(),
wgpu::TextureUsage::RenderAttachment);
cmd->colorAttachments[index].resolveTarget = temporaryResolveView;
}
}
if (temporaryResolveAttachments.size()) {
// Check for other workarounds that need to be applied recursively.
return ApplyRenderPassWorkarounds(
device, usageTracker, cmd,
[this, passEndCallback = std::move(passEndCallback),
temporaryResolveAttachments = std::move(temporaryResolveAttachments)]() -> void {
// Called once the render pass has been ended.
// Handle any copies needed for the AlwaysResolveIntoZeroLevelAndLayer
// workaround immediately after the render pass ends and before any additional
// commands are recorded.
for (auto& copyTarget : temporaryResolveAttachments) {
ImageCopyTexture srcImageCopyTexture = {};
srcImageCopyTexture.texture = copyTarget.copySrc->GetTexture();
srcImageCopyTexture.aspect = wgpu::TextureAspect::All;
srcImageCopyTexture.mipLevel = 0;
srcImageCopyTexture.origin = {0, 0, 0};
ImageCopyTexture dstImageCopyTexture = {};
dstImageCopyTexture.texture = copyTarget.copyDst->GetTexture();
dstImageCopyTexture.aspect = wgpu::TextureAspect::All;
dstImageCopyTexture.mipLevel = copyTarget.copyDst->GetBaseMipLevel();
dstImageCopyTexture.origin = {0, 0,
copyTarget.copyDst->GetBaseArrayLayer()};
Extent3D extent3D = copyTarget.copySrc->GetTexture()->GetSize();
this->APICopyTextureToTextureInternal(&srcImageCopyTexture,
&dstImageCopyTexture, &extent3D);
}
// If there were any other callbacks in the workaround stack, call the next one.
if (passEndCallback) {
passEndCallback();
}
});
}
}
return std::move(passEndCallback);
}
void CommandEncoder::APICopyBufferToBuffer(BufferBase* source,
uint64_t sourceOffset,
BufferBase* destination,

View File

@ -102,6 +102,12 @@ class CommandEncoder final : public ApiObjectBase {
void DestroyImpl() override;
ResultOrError<std::function<void()>> ApplyRenderPassWorkarounds(
DeviceBase* device,
RenderPassResourceUsageTracker* usageTracker,
BeginRenderPassCmd* cmd,
std::function<void()> passEndCallback = nullptr);
// Helper to be able to implement both APICopyTextureToTexture and
// APICopyTextureToTextureInternal. The only difference between both
// copies, is that the Internal one will also check internal usage.

View File

@ -59,7 +59,8 @@ RenderPassEncoder::RenderPassEncoder(DeviceBase* device,
uint32_t renderTargetWidth,
uint32_t renderTargetHeight,
bool depthReadOnly,
bool stencilReadOnly)
bool stencilReadOnly,
std::function<void()> endCallback)
: RenderEncoderBase(device,
descriptor->label,
encodingContext,
@ -69,7 +70,8 @@ RenderPassEncoder::RenderPassEncoder(DeviceBase* device,
mCommandEncoder(commandEncoder),
mRenderTargetWidth(renderTargetWidth),
mRenderTargetHeight(renderTargetHeight),
mOcclusionQuerySet(descriptor->occlusionQuerySet) {
mOcclusionQuerySet(descriptor->occlusionQuerySet),
mEndCallback(std::move(endCallback)) {
mUsageTracker = std::move(usageTracker);
const RenderPassDescriptorMaxDrawCount* maxDrawCountInfo = nullptr;
FindInChain(descriptor->nextInChain, &maxDrawCountInfo);
@ -89,11 +91,12 @@ Ref<RenderPassEncoder> RenderPassEncoder::Create(DeviceBase* device,
uint32_t renderTargetWidth,
uint32_t renderTargetHeight,
bool depthReadOnly,
bool stencilReadOnly) {
bool stencilReadOnly,
std::function<void()> endCallback) {
return AcquireRef(new RenderPassEncoder(device, descriptor, commandEncoder, encodingContext,
std::move(usageTracker), std::move(attachmentState),
renderTargetWidth, renderTargetHeight, depthReadOnly,
stencilReadOnly));
stencilReadOnly, std::move(endCallback)));
}
RenderPassEncoder::RenderPassEncoder(DeviceBase* device,
@ -157,6 +160,10 @@ void RenderPassEncoder::APIEnd() {
return {};
},
"encoding %s.End().", this);
if (mEndCallback) {
mEndCallback();
}
}
void RenderPassEncoder::APIEndPass() {

View File

@ -36,7 +36,8 @@ class RenderPassEncoder final : public RenderEncoderBase {
uint32_t renderTargetWidth,
uint32_t renderTargetHeight,
bool depthReadOnly,
bool stencilReadOnly);
bool stencilReadOnly,
std::function<void()> endCallback = nullptr);
static Ref<RenderPassEncoder> MakeError(DeviceBase* device,
CommandEncoder* commandEncoder,
EncodingContext* encodingContext);
@ -72,7 +73,8 @@ class RenderPassEncoder final : public RenderEncoderBase {
uint32_t renderTargetWidth,
uint32_t renderTargetHeight,
bool depthReadOnly,
bool stencilReadOnly);
bool stencilReadOnly,
std::function<void()> endCallback = nullptr);
RenderPassEncoder(DeviceBase* device,
CommandEncoder* commandEncoder,
EncodingContext* encodingContext,
@ -97,6 +99,8 @@ class RenderPassEncoder final : public RenderEncoderBase {
// This is the hardcoded value in the WebGPU spec.
uint64_t mMaxDrawCount = 50000000;
std::function<void()> mEndCallback;
};
} // namespace dawn::native

View File

@ -553,6 +553,16 @@ TextureBase::TextureBase(DeviceBase* device,
mInternalUsage |= internalUsageDesc->internalUsage;
}
GetObjectTrackingList()->Track(this);
// dawn:1569: If a texture with multiple array layers or mip levels is specified as a texture
// attachment when this toggle is active, it needs to be given CopyDst usage internally.
bool applyAlwaysResolveIntoZeroLevelAndLayerToggle =
device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer) &&
(GetArrayLayers() > 1 || GetNumMipLevels() > 1) &&
(GetInternalUsage() & wgpu::TextureUsage::RenderAttachment);
if (applyAlwaysResolveIntoZeroLevelAndLayerToggle) {
AddInternalUsage(wgpu::TextureUsage::CopyDst);
}
}
TextureBase::~TextureBase() = default;

View File

@ -49,7 +49,8 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
"a texture, we first resolve into a temporarily 2D texture with only one mipmap level and "
"one array layer, and copy the result of MSAA resolve into the true resolve target. This "
"workaround is enabled by default on the Metal drivers that have bugs when setting non-zero "
"resolveLevel or resolveSlice.",
"resolveLevel or resolveSlice. It is also enabled by default on Qualcomm Vulkan drivers, "
"which have similar bugs.",
"https://crbug.com/dawn/56"}},
{Toggle::LazyClearResourceOnFirstUse,
{"lazy_clear_resource_on_first_use",

View File

@ -111,23 +111,6 @@ ResultOrError<SavedMetalAttachment> PatchAttachmentWithTemporary(
return result;
}
// Same as PatchAttachmentWithTemporary but for the resolve attachment.
ResultOrError<SavedMetalAttachment> PatchResolveAttachmentWithTemporary(
Device* device,
MTLRenderPassAttachmentDescriptor* attachment) {
SavedMetalAttachment result;
DAWN_TRY_ASSIGN(
result, SaveAttachmentCreateTemporary(device, attachment.resolveTexture,
attachment.resolveLevel, attachment.resolveSlice));
// Replace the resolve attachment with the tempoary.
attachment.resolveTexture = result.temporary.Get();
attachment.resolveLevel = 0;
attachment.resolveSlice = 0;
return result;
}
// Helper function for Toggle EmulateStoreAndMSAAResolve
void ResolveInAnotherRenderPass(
CommandRecordingContext* commandContext,
@ -334,45 +317,6 @@ MaybeError EncodeMetalRenderPass(Device* device,
// workarounds to happen at the same time, it handles workarounds one by one and calls
// itself recursively to handle the next workaround if needed.
// Handle Toggle AlwaysResolveIntoZeroLevelAndLayer. We must handle this before applying
// the store + MSAA resolve workaround, otherwise this toggle will never be handled because
// the resolve texture is removed when applying the store + MSAA resolve workaround.
if (device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer)) {
std::array<SavedMetalAttachment, kMaxColorAttachments> trueResolveAttachments = {};
bool workaroundUsed = false;
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (mtlRenderPass.colorAttachments[i].resolveTexture == nullptr) {
continue;
}
if (mtlRenderPass.colorAttachments[i].resolveLevel == 0 &&
mtlRenderPass.colorAttachments[i].resolveSlice == 0) {
continue;
}
DAWN_TRY_ASSIGN(
trueResolveAttachments[i],
PatchResolveAttachmentWithTemporary(device, mtlRenderPass.colorAttachments[i]));
workaroundUsed = true;
}
// If we need to use a temporary resolve texture we need to copy the result of MSAA
// resolve back to the true resolve targets.
if (workaroundUsed) {
DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height,
std::move(encodeInside), renderPassCmd));
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (trueResolveAttachments[i].texture == nullptr) {
continue;
}
trueResolveAttachments[i].CopyFromTemporaryToAttachment(commandContext);
}
return {};
}
}
// Handles the workaround for r8unorm rg8unorm mipmap rendering being broken on some
// devices. Render to a temporary texture instead and then copy back to the attachment.
if (device->IsToggleEnabled(Toggle::MetalRenderR8RG8UnormSmallMipToTempTexture)) {

View File

@ -639,12 +639,16 @@ void Device::InitTogglesFromDriver() {
// By default try to use S8 if available.
SetToggle(Toggle::VulkanUseS8, true);
// dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a
// compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around that
// bug, split the command buffer any time we can detect that situation.
if (ToBackend(GetAdapter())->IsAndroidQualcomm()) {
ForceSetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
true);
// dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a
// compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around
// that bug, split the command buffer any time we can detect that situation.
SetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass, true);
// dawn:1569: Qualcomm devices have a bug resolving into a non-zero level of an array
// texture. Work around it by resolving into a single level texture and then copying into
// the intended layer.
SetToggle(Toggle::AlwaysResolveIntoZeroLevelAndLayer, true);
}
}

View File

@ -521,9 +521,8 @@ TEST_P(MultisampledRenderingTest, ResolveInto2DArrayTexture) {
// TODO(dawn:462): Issue in the D3D12 validation layers.
DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsBackendValidationEnabled());
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
// TODO(dawn:1550) Fails on ARM-based Android devices.
DAWN_SUPPRESS_TEST_IF(IsAndroid());
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsARM());
wgpu::TextureView multisampledColorView2 =
CreateTextureForRenderAttachment(kColorFormat, kSampleCount).CreateView();
@ -1138,6 +1137,7 @@ DAWN_INSTANTIATE_TEST(MultisampledRenderingTest,
OpenGLBackend(),
OpenGLESBackend(),
VulkanBackend(),
VulkanBackend({"always_resolve_into_zero_level_and_layer"}),
MetalBackend({"emulate_store_and_msaa_resolve"}),
MetalBackend({"always_resolve_into_zero_level_and_layer"}),
MetalBackend({"always_resolve_into_zero_level_and_layer",