Split Vulkan command buffers to work around bug

There's a bug in some Qualcomm devices where using a depth/stencil
texture as a render attachment and then sampling it in a compute pass
causes a crash. This only happens, however, if the two passes occur as
part of the same Vulkan command buffer.

To work around the issue, this change splits the Vulkan command buffer
while recording any time it identifies that the problematic scenario may
occur.

Bug: dawn:1564
Change-Id: Ie137e9118ef9cc41f5908ca32c72c33f3798cd71
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/104860
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Brandon Jones <bajones@chromium.org>
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
This commit is contained in:
Brandon Jones 2022-10-07 20:53:17 +00:00 committed by Dawn LUCI CQ
parent 007ddcad32
commit b68cd1a212
8 changed files with 113 additions and 31 deletions

View File

@ -312,6 +312,13 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
"This toggle is enabled by default on Metal backend where GPU counters cannot be stored to"
"sampleBufferAttachments on empty blit encoder.",
"https://crbug.com/dawn/1473"}},
{Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
{"vulkan_split_command_buffer_on_depth_stencil_compute_sample_after_render_pass",
"Splits any command buffer that samples a depth/stencil texture in a compute pass after that "
"texture was used as an attachment for a prior render pass. This toggle is enabled by "
"default on Qualcomm GPUs, which have been observed experiencing a driver crash in this "
"situation.",
"https://crbug.com/dawn/1564"}},
// Comment to separate the }} so it is clearer what to copy-paste to add a toggle.
}};
} // anonymous namespace

View File

@ -80,6 +80,7 @@ enum class Toggle {
D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset,
ApplyClearBigIntegerColorValueWithDraw,
MetalUseDummyBlitEncoderForWriteTimestamp,
VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
EnumCount,
InvalidEnum = EnumCount,

View File

@ -24,6 +24,7 @@
#include "dawn/native/DynamicUploader.h"
#include "dawn/native/EnumMaskIterator.h"
#include "dawn/native/RenderBundle.h"
#include "dawn/native/vulkan/AdapterVk.h"
#include "dawn/native/vulkan/BindGroupVk.h"
#include "dawn/native/vulkan/BufferVk.h"
#include "dawn/native/vulkan/CommandRecordingContext.h"
@ -887,6 +888,24 @@ MaybeError CommandBuffer::RecordComputePass(CommandRecordingContext* recordingCo
const ComputePassResourceUsage& resourceUsages) {
Device* device = ToBackend(GetDevice());
// If required, split the command buffer any time we detect a dpeth/stencil attachment is
// used in a compute pass after being used as a render pass attachment in the same command
// buffer.
if (device->IsToggleEnabled(
Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass) &&
!mRenderPassDepthStencilAttachments.empty()) {
for (auto texture : resourceUsages.referencedTextures) {
if (texture->GetFormat().HasDepthOrStencil() &&
mRenderPassDepthStencilAttachments.find(texture) !=
mRenderPassDepthStencilAttachments.end()) {
// Identified a potential crash case, split the command buffer.
DAWN_TRY(device->SplitRecordingContext(recordingContext));
mRenderPassDepthStencilAttachments.clear();
break;
}
}
}
// Write timestamp at the beginning of compute pass if it's set
if (computePassCmd->beginTimestamp.querySet.Get() != nullptr) {
RecordWriteTimestampCmd(recordingContext, device,
@ -1038,6 +1057,14 @@ MaybeError CommandBuffer::RecordRenderPass(CommandRecordingContext* recordingCon
DAWN_TRY(RecordBeginRenderPass(recordingContext, device, renderPassCmd));
// If required, track depth/stencil textures used as render pass attachments.
if (device->IsToggleEnabled(
Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass) &&
renderPassCmd->attachmentState->HasDepthStencilAttachment()) {
mRenderPassDepthStencilAttachments.insert(
renderPassCmd->depthStencilAttachment.view->GetTexture());
}
// Write timestamp at the beginning of render pass if it's set.
if (renderPassCmd->beginTimestamp.querySet.Get() != nullptr) {
RecordWriteTimestampCmd(recordingContext, device,

View File

@ -15,6 +15,8 @@
#ifndef SRC_DAWN_NATIVE_VULKAN_COMMANDBUFFERVK_H_
#define SRC_DAWN_NATIVE_VULKAN_COMMANDBUFFERVK_H_
#include <set>
#include "dawn/native/CommandBuffer.h"
#include "dawn/native/Error.h"
@ -50,6 +52,10 @@ class CommandBuffer final : public CommandBufferBase {
const TextureCopy& srcCopy,
const TextureCopy& dstCopy,
const Extent3D& copySize);
// Need to track depth/stencil textures used by render passes if the
// VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass toggle is enabled.
std::set<TextureBase*> mRenderPassDepthStencilAttachments;
};
} // namespace dawn::native::vulkan

View File

@ -41,6 +41,13 @@ struct CommandRecordingContext {
// For Device state tracking only.
VkCommandPool commandPool = VK_NULL_HANDLE;
bool used = false;
// In some cases command buffer will need to be split to accomodate driver bug workarounds.
// See the VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass toggle as an
// example. This tracks the list of all command buffers used for this recording context,
// with commandBuffer always being the last element.
std::vector<VkCommandBuffer> commandBufferList;
std::vector<VkCommandPool> commandPoolList;
};
} // namespace dawn::native::vulkan

View File

@ -321,8 +321,8 @@ MaybeError Device::SubmitPendingCommands() {
submitInfo.waitSemaphoreCount = static_cast<uint32_t>(mRecordingContext.waitSemaphores.size());
submitInfo.pWaitSemaphores = AsVkArray(mRecordingContext.waitSemaphores.data());
submitInfo.pWaitDstStageMask = dstStageMasks.data();
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &mRecordingContext.commandBuffer;
submitInfo.commandBufferCount = mRecordingContext.commandBufferList.size();
submitInfo.pCommandBuffers = mRecordingContext.commandBufferList.data();
submitInfo.signalSemaphoreCount = (scopedSignalSemaphore.Get() == VK_NULL_HANDLE ? 0 : 1);
submitInfo.pSignalSemaphores = AsVkArray(scopedSignalSemaphore.InitializeInto());
@ -345,9 +345,11 @@ MaybeError Device::SubmitPendingCommands() {
ExecutionSerial lastSubmittedSerial = GetLastSubmittedCommandSerial();
mFencesInFlight.emplace(fence, lastSubmittedSerial);
CommandPoolAndBuffer submittedCommands = {mRecordingContext.commandPool,
mRecordingContext.commandBuffer};
mCommandsInFlight.Enqueue(submittedCommands, lastSubmittedSerial);
for (size_t i = 0; i < mRecordingContext.commandBufferList.size(); ++i) {
CommandPoolAndBuffer submittedCommands = {mRecordingContext.commandPoolList[i],
mRecordingContext.commandBufferList[i]};
mCommandsInFlight.Enqueue(submittedCommands, lastSubmittedSerial);
}
if (mRecordingContext.externalTexturesForEagerTransition.size() > 0) {
// Export the signal semaphore.
@ -610,6 +612,14 @@ void Device::InitTogglesFromDriver() {
// By default try to use S8 if available.
SetToggle(Toggle::VulkanUseS8, true);
// dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a
// compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around that
// bug, split the command buffer any time we can detect that situation.
if (ToBackend(GetAdapter())->IsAndroidQualcomm()) {
ForceSetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
true);
}
}
void Device::ApplyDepthStencilFormatToggles() {
@ -695,9 +705,43 @@ MaybeError Device::PrepareRecordingContext() {
ASSERT(mRecordingContext.commandBuffer == VK_NULL_HANDLE);
ASSERT(mRecordingContext.commandPool == VK_NULL_HANDLE);
CommandPoolAndBuffer commands;
DAWN_TRY_ASSIGN(commands, BeginVkCommandBuffer());
mRecordingContext.commandBuffer = commands.commandBuffer;
mRecordingContext.commandPool = commands.pool;
mRecordingContext.commandBufferList.push_back(commands.commandBuffer);
mRecordingContext.commandPoolList.push_back(commands.pool);
return {};
}
// Splits the recording context, ending the current command buffer and beginning a new one.
// This should not be necessary in most cases, and is provided only to work around driver issues
// on some hardware.
MaybeError Device::SplitRecordingContext(CommandRecordingContext* recordingContext) {
ASSERT(recordingContext->used);
DAWN_TRY(
CheckVkSuccess(fn.EndCommandBuffer(recordingContext->commandBuffer), "vkEndCommandBuffer"));
CommandPoolAndBuffer commands;
DAWN_TRY_ASSIGN(commands, BeginVkCommandBuffer());
recordingContext->commandBuffer = commands.commandBuffer;
recordingContext->commandPool = commands.pool;
recordingContext->commandBufferList.push_back(commands.commandBuffer);
recordingContext->commandPoolList.push_back(commands.pool);
return {};
}
ResultOrError<Device::CommandPoolAndBuffer> Device::BeginVkCommandBuffer() {
CommandPoolAndBuffer commands;
// First try to recycle unused command pools.
if (!mUnusedCommands.empty()) {
CommandPoolAndBuffer commands = mUnusedCommands.back();
commands = mUnusedCommands.back();
mUnusedCommands.pop_back();
DAWN_TRY_WITH_CLEANUP(
CheckVkSuccess(fn.ResetCommandPool(mVkDevice, commands.pool, 0), "vkResetCommandPool"),
@ -714,9 +758,6 @@ MaybeError Device::PrepareRecordingContext() {
fn.FreeCommandBuffers(mVkDevice, commands.pool, 1, &commands.commandBuffer);
fn.DestroyCommandPool(mVkDevice, commands.pool, nullptr);
});
mRecordingContext.commandBuffer = commands.commandBuffer;
mRecordingContext.commandPool = commands.pool;
} else {
// Create a new command pool for our commands and allocate the command buffer.
VkCommandPoolCreateInfo createInfo;
@ -725,19 +766,19 @@ MaybeError Device::PrepareRecordingContext() {
createInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
createInfo.queueFamilyIndex = mQueueFamily;
DAWN_TRY(CheckVkSuccess(
fn.CreateCommandPool(mVkDevice, &createInfo, nullptr, &*mRecordingContext.commandPool),
"vkCreateCommandPool"));
DAWN_TRY(
CheckVkSuccess(fn.CreateCommandPool(mVkDevice, &createInfo, nullptr, &*commands.pool),
"vkCreateCommandPool"));
VkCommandBufferAllocateInfo allocateInfo;
allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocateInfo.pNext = nullptr;
allocateInfo.commandPool = mRecordingContext.commandPool;
allocateInfo.commandPool = commands.pool;
allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocateInfo.commandBufferCount = 1;
DAWN_TRY(CheckVkSuccess(
fn.AllocateCommandBuffers(mVkDevice, &allocateInfo, &mRecordingContext.commandBuffer),
fn.AllocateCommandBuffers(mVkDevice, &allocateInfo, &commands.commandBuffer),
"vkAllocateCommandBuffers"));
}
@ -748,8 +789,10 @@ MaybeError Device::PrepareRecordingContext() {
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
return CheckVkSuccess(fn.BeginCommandBuffer(mRecordingContext.commandBuffer, &beginInfo),
"vkBeginCommandBuffer");
DAWN_TRY(CheckVkSuccess(fn.BeginCommandBuffer(commands.commandBuffer, &beginInfo),
"vkBeginCommandBuffer"));
return commands;
}
void Device::RecycleCompletedCommands() {

View File

@ -66,6 +66,7 @@ class Device final : public DeviceBase {
external_semaphore::Service* GetExternalSemaphoreService() const;
CommandRecordingContext* GetPendingRecordingContext();
MaybeError SplitRecordingContext(CommandRecordingContext* recordingContext);
MaybeError SubmitPendingCommands();
void EnqueueDeferredDeallocation(DescriptorSetAllocator* allocator);
@ -205,13 +206,15 @@ class Device final : public DeviceBase {
const std::string mDebugPrefix;
std::vector<std::string> mDebugMessages;
MaybeError PrepareRecordingContext();
void RecycleCompletedCommands();
struct CommandPoolAndBuffer {
VkCommandPool pool = VK_NULL_HANDLE;
VkCommandBuffer commandBuffer = VK_NULL_HANDLE;
};
MaybeError PrepareRecordingContext();
ResultOrError<CommandPoolAndBuffer> BeginVkCommandBuffer();
void RecycleCompletedCommands();
SerialQueue<ExecutionSerial, CommandPoolAndBuffer> mCommandsInFlight;
// Command pools in the unused list haven't been reset yet.
std::vector<CommandPoolAndBuffer> mUnusedCommands;

View File

@ -625,9 +625,6 @@ TEST_P(DepthStencilSamplingTest, SampleExtraComponents) {
// This test fails on ANGLE (both SwiftShader and D3D11).
DAWN_SUPPRESS_TEST_IF(IsANGLE());
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
wgpu::TextureFormat format = GetParam().mTextureFormat;
DoSamplingExtraStencilComponentsRenderTest(TestAspectAndSamplerType::StencilAsUint, format,
@ -639,9 +636,6 @@ TEST_P(DepthStencilSamplingTest, SampleExtraComponents) {
// Test sampling both depth and stencil with a render/compute pipeline works.
TEST_P(DepthStencilSamplingTest, SampleDepthAndStencilRender) {
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
wgpu::TextureFormat format = GetParam().mTextureFormat;
wgpu::SamplerDescriptor samplerDesc;
@ -760,9 +754,6 @@ class DepthSamplingTest : public DepthStencilSamplingTest {};
// Test that sampling a depth texture with a render/compute pipeline works
TEST_P(DepthSamplingTest, SampleDepthOnly) {
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
wgpu::TextureFormat format = GetParam().mTextureFormat;
float tolerance = format == wgpu::TextureFormat::Depth16Unorm ? 0.001f : 0.0f;
@ -812,9 +803,6 @@ TEST_P(StencilSamplingTest, SampleStencilOnly) {
// This test fails on SwANGLE (although it passes on other ANGLE backends).
DAWN_TEST_UNSUPPORTED_IF(IsANGLE());
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
wgpu::TextureFormat format = GetParam().mTextureFormat;
DoSamplingTest(TestAspectAndSamplerType::StencilAsUint,