Split Vulkan command buffers to work around bug
There's a bug in some Qualcomm devices where using a depth/stencil texture as a render attachment and then sampling it in a compute pass causes a crash. This only happens, however, if the two passes occur as part of the same Vulkan command buffer. To work around the issue, this change splits the Vulkan command buffer while recording any time it identifies that the problematic scenario may occur. Bug: dawn:1564 Change-Id: Ie137e9118ef9cc41f5908ca32c72c33f3798cd71 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/104860 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Brandon Jones <bajones@chromium.org> Reviewed-by: Kai Ninomiya <kainino@chromium.org>
This commit is contained in:
parent
007ddcad32
commit
b68cd1a212
|
@ -312,6 +312,13 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
|
|||
"This toggle is enabled by default on Metal backend where GPU counters cannot be stored to"
|
||||
"sampleBufferAttachments on empty blit encoder.",
|
||||
"https://crbug.com/dawn/1473"}},
|
||||
{Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
|
||||
{"vulkan_split_command_buffer_on_depth_stencil_compute_sample_after_render_pass",
|
||||
"Splits any command buffer that samples a depth/stencil texture in a compute pass after that "
|
||||
"texture was used as an attachment for a prior render pass. This toggle is enabled by "
|
||||
"default on Qualcomm GPUs, which have been observed experiencing a driver crash in this "
|
||||
"situation.",
|
||||
"https://crbug.com/dawn/1564"}},
|
||||
// Comment to separate the }} so it is clearer what to copy-paste to add a toggle.
|
||||
}};
|
||||
} // anonymous namespace
|
||||
|
|
|
@ -80,6 +80,7 @@ enum class Toggle {
|
|||
D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset,
|
||||
ApplyClearBigIntegerColorValueWithDraw,
|
||||
MetalUseDummyBlitEncoderForWriteTimestamp,
|
||||
VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
|
||||
|
||||
EnumCount,
|
||||
InvalidEnum = EnumCount,
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "dawn/native/DynamicUploader.h"
|
||||
#include "dawn/native/EnumMaskIterator.h"
|
||||
#include "dawn/native/RenderBundle.h"
|
||||
#include "dawn/native/vulkan/AdapterVk.h"
|
||||
#include "dawn/native/vulkan/BindGroupVk.h"
|
||||
#include "dawn/native/vulkan/BufferVk.h"
|
||||
#include "dawn/native/vulkan/CommandRecordingContext.h"
|
||||
|
@ -887,6 +888,24 @@ MaybeError CommandBuffer::RecordComputePass(CommandRecordingContext* recordingCo
|
|||
const ComputePassResourceUsage& resourceUsages) {
|
||||
Device* device = ToBackend(GetDevice());
|
||||
|
||||
// If required, split the command buffer any time we detect a dpeth/stencil attachment is
|
||||
// used in a compute pass after being used as a render pass attachment in the same command
|
||||
// buffer.
|
||||
if (device->IsToggleEnabled(
|
||||
Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass) &&
|
||||
!mRenderPassDepthStencilAttachments.empty()) {
|
||||
for (auto texture : resourceUsages.referencedTextures) {
|
||||
if (texture->GetFormat().HasDepthOrStencil() &&
|
||||
mRenderPassDepthStencilAttachments.find(texture) !=
|
||||
mRenderPassDepthStencilAttachments.end()) {
|
||||
// Identified a potential crash case, split the command buffer.
|
||||
DAWN_TRY(device->SplitRecordingContext(recordingContext));
|
||||
mRenderPassDepthStencilAttachments.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write timestamp at the beginning of compute pass if it's set
|
||||
if (computePassCmd->beginTimestamp.querySet.Get() != nullptr) {
|
||||
RecordWriteTimestampCmd(recordingContext, device,
|
||||
|
@ -1038,6 +1057,14 @@ MaybeError CommandBuffer::RecordRenderPass(CommandRecordingContext* recordingCon
|
|||
|
||||
DAWN_TRY(RecordBeginRenderPass(recordingContext, device, renderPassCmd));
|
||||
|
||||
// If required, track depth/stencil textures used as render pass attachments.
|
||||
if (device->IsToggleEnabled(
|
||||
Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass) &&
|
||||
renderPassCmd->attachmentState->HasDepthStencilAttachment()) {
|
||||
mRenderPassDepthStencilAttachments.insert(
|
||||
renderPassCmd->depthStencilAttachment.view->GetTexture());
|
||||
}
|
||||
|
||||
// Write timestamp at the beginning of render pass if it's set.
|
||||
if (renderPassCmd->beginTimestamp.querySet.Get() != nullptr) {
|
||||
RecordWriteTimestampCmd(recordingContext, device,
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#ifndef SRC_DAWN_NATIVE_VULKAN_COMMANDBUFFERVK_H_
|
||||
#define SRC_DAWN_NATIVE_VULKAN_COMMANDBUFFERVK_H_
|
||||
|
||||
#include <set>
|
||||
|
||||
#include "dawn/native/CommandBuffer.h"
|
||||
#include "dawn/native/Error.h"
|
||||
|
||||
|
@ -50,6 +52,10 @@ class CommandBuffer final : public CommandBufferBase {
|
|||
const TextureCopy& srcCopy,
|
||||
const TextureCopy& dstCopy,
|
||||
const Extent3D& copySize);
|
||||
|
||||
// Need to track depth/stencil textures used by render passes if the
|
||||
// VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass toggle is enabled.
|
||||
std::set<TextureBase*> mRenderPassDepthStencilAttachments;
|
||||
};
|
||||
|
||||
} // namespace dawn::native::vulkan
|
||||
|
|
|
@ -41,6 +41,13 @@ struct CommandRecordingContext {
|
|||
// For Device state tracking only.
|
||||
VkCommandPool commandPool = VK_NULL_HANDLE;
|
||||
bool used = false;
|
||||
|
||||
// In some cases command buffer will need to be split to accomodate driver bug workarounds.
|
||||
// See the VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass toggle as an
|
||||
// example. This tracks the list of all command buffers used for this recording context,
|
||||
// with commandBuffer always being the last element.
|
||||
std::vector<VkCommandBuffer> commandBufferList;
|
||||
std::vector<VkCommandPool> commandPoolList;
|
||||
};
|
||||
|
||||
} // namespace dawn::native::vulkan
|
||||
|
|
|
@ -321,8 +321,8 @@ MaybeError Device::SubmitPendingCommands() {
|
|||
submitInfo.waitSemaphoreCount = static_cast<uint32_t>(mRecordingContext.waitSemaphores.size());
|
||||
submitInfo.pWaitSemaphores = AsVkArray(mRecordingContext.waitSemaphores.data());
|
||||
submitInfo.pWaitDstStageMask = dstStageMasks.data();
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &mRecordingContext.commandBuffer;
|
||||
submitInfo.commandBufferCount = mRecordingContext.commandBufferList.size();
|
||||
submitInfo.pCommandBuffers = mRecordingContext.commandBufferList.data();
|
||||
submitInfo.signalSemaphoreCount = (scopedSignalSemaphore.Get() == VK_NULL_HANDLE ? 0 : 1);
|
||||
submitInfo.pSignalSemaphores = AsVkArray(scopedSignalSemaphore.InitializeInto());
|
||||
|
||||
|
@ -345,9 +345,11 @@ MaybeError Device::SubmitPendingCommands() {
|
|||
ExecutionSerial lastSubmittedSerial = GetLastSubmittedCommandSerial();
|
||||
mFencesInFlight.emplace(fence, lastSubmittedSerial);
|
||||
|
||||
CommandPoolAndBuffer submittedCommands = {mRecordingContext.commandPool,
|
||||
mRecordingContext.commandBuffer};
|
||||
mCommandsInFlight.Enqueue(submittedCommands, lastSubmittedSerial);
|
||||
for (size_t i = 0; i < mRecordingContext.commandBufferList.size(); ++i) {
|
||||
CommandPoolAndBuffer submittedCommands = {mRecordingContext.commandPoolList[i],
|
||||
mRecordingContext.commandBufferList[i]};
|
||||
mCommandsInFlight.Enqueue(submittedCommands, lastSubmittedSerial);
|
||||
}
|
||||
|
||||
if (mRecordingContext.externalTexturesForEagerTransition.size() > 0) {
|
||||
// Export the signal semaphore.
|
||||
|
@ -610,6 +612,14 @@ void Device::InitTogglesFromDriver() {
|
|||
|
||||
// By default try to use S8 if available.
|
||||
SetToggle(Toggle::VulkanUseS8, true);
|
||||
|
||||
// dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a
|
||||
// compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around that
|
||||
// bug, split the command buffer any time we can detect that situation.
|
||||
if (ToBackend(GetAdapter())->IsAndroidQualcomm()) {
|
||||
ForceSetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
void Device::ApplyDepthStencilFormatToggles() {
|
||||
|
@ -695,9 +705,43 @@ MaybeError Device::PrepareRecordingContext() {
|
|||
ASSERT(mRecordingContext.commandBuffer == VK_NULL_HANDLE);
|
||||
ASSERT(mRecordingContext.commandPool == VK_NULL_HANDLE);
|
||||
|
||||
CommandPoolAndBuffer commands;
|
||||
DAWN_TRY_ASSIGN(commands, BeginVkCommandBuffer());
|
||||
|
||||
mRecordingContext.commandBuffer = commands.commandBuffer;
|
||||
mRecordingContext.commandPool = commands.pool;
|
||||
mRecordingContext.commandBufferList.push_back(commands.commandBuffer);
|
||||
mRecordingContext.commandPoolList.push_back(commands.pool);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
// Splits the recording context, ending the current command buffer and beginning a new one.
|
||||
// This should not be necessary in most cases, and is provided only to work around driver issues
|
||||
// on some hardware.
|
||||
MaybeError Device::SplitRecordingContext(CommandRecordingContext* recordingContext) {
|
||||
ASSERT(recordingContext->used);
|
||||
|
||||
DAWN_TRY(
|
||||
CheckVkSuccess(fn.EndCommandBuffer(recordingContext->commandBuffer), "vkEndCommandBuffer"));
|
||||
|
||||
CommandPoolAndBuffer commands;
|
||||
DAWN_TRY_ASSIGN(commands, BeginVkCommandBuffer());
|
||||
|
||||
recordingContext->commandBuffer = commands.commandBuffer;
|
||||
recordingContext->commandPool = commands.pool;
|
||||
recordingContext->commandBufferList.push_back(commands.commandBuffer);
|
||||
recordingContext->commandPoolList.push_back(commands.pool);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ResultOrError<Device::CommandPoolAndBuffer> Device::BeginVkCommandBuffer() {
|
||||
CommandPoolAndBuffer commands;
|
||||
|
||||
// First try to recycle unused command pools.
|
||||
if (!mUnusedCommands.empty()) {
|
||||
CommandPoolAndBuffer commands = mUnusedCommands.back();
|
||||
commands = mUnusedCommands.back();
|
||||
mUnusedCommands.pop_back();
|
||||
DAWN_TRY_WITH_CLEANUP(
|
||||
CheckVkSuccess(fn.ResetCommandPool(mVkDevice, commands.pool, 0), "vkResetCommandPool"),
|
||||
|
@ -714,9 +758,6 @@ MaybeError Device::PrepareRecordingContext() {
|
|||
fn.FreeCommandBuffers(mVkDevice, commands.pool, 1, &commands.commandBuffer);
|
||||
fn.DestroyCommandPool(mVkDevice, commands.pool, nullptr);
|
||||
});
|
||||
|
||||
mRecordingContext.commandBuffer = commands.commandBuffer;
|
||||
mRecordingContext.commandPool = commands.pool;
|
||||
} else {
|
||||
// Create a new command pool for our commands and allocate the command buffer.
|
||||
VkCommandPoolCreateInfo createInfo;
|
||||
|
@ -725,19 +766,19 @@ MaybeError Device::PrepareRecordingContext() {
|
|||
createInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
|
||||
createInfo.queueFamilyIndex = mQueueFamily;
|
||||
|
||||
DAWN_TRY(CheckVkSuccess(
|
||||
fn.CreateCommandPool(mVkDevice, &createInfo, nullptr, &*mRecordingContext.commandPool),
|
||||
"vkCreateCommandPool"));
|
||||
DAWN_TRY(
|
||||
CheckVkSuccess(fn.CreateCommandPool(mVkDevice, &createInfo, nullptr, &*commands.pool),
|
||||
"vkCreateCommandPool"));
|
||||
|
||||
VkCommandBufferAllocateInfo allocateInfo;
|
||||
allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
allocateInfo.pNext = nullptr;
|
||||
allocateInfo.commandPool = mRecordingContext.commandPool;
|
||||
allocateInfo.commandPool = commands.pool;
|
||||
allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
allocateInfo.commandBufferCount = 1;
|
||||
|
||||
DAWN_TRY(CheckVkSuccess(
|
||||
fn.AllocateCommandBuffers(mVkDevice, &allocateInfo, &mRecordingContext.commandBuffer),
|
||||
fn.AllocateCommandBuffers(mVkDevice, &allocateInfo, &commands.commandBuffer),
|
||||
"vkAllocateCommandBuffers"));
|
||||
}
|
||||
|
||||
|
@ -748,8 +789,10 @@ MaybeError Device::PrepareRecordingContext() {
|
|||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
beginInfo.pInheritanceInfo = nullptr;
|
||||
|
||||
return CheckVkSuccess(fn.BeginCommandBuffer(mRecordingContext.commandBuffer, &beginInfo),
|
||||
"vkBeginCommandBuffer");
|
||||
DAWN_TRY(CheckVkSuccess(fn.BeginCommandBuffer(commands.commandBuffer, &beginInfo),
|
||||
"vkBeginCommandBuffer"));
|
||||
|
||||
return commands;
|
||||
}
|
||||
|
||||
void Device::RecycleCompletedCommands() {
|
||||
|
|
|
@ -66,6 +66,7 @@ class Device final : public DeviceBase {
|
|||
external_semaphore::Service* GetExternalSemaphoreService() const;
|
||||
|
||||
CommandRecordingContext* GetPendingRecordingContext();
|
||||
MaybeError SplitRecordingContext(CommandRecordingContext* recordingContext);
|
||||
MaybeError SubmitPendingCommands();
|
||||
|
||||
void EnqueueDeferredDeallocation(DescriptorSetAllocator* allocator);
|
||||
|
@ -205,13 +206,15 @@ class Device final : public DeviceBase {
|
|||
const std::string mDebugPrefix;
|
||||
std::vector<std::string> mDebugMessages;
|
||||
|
||||
MaybeError PrepareRecordingContext();
|
||||
void RecycleCompletedCommands();
|
||||
|
||||
struct CommandPoolAndBuffer {
|
||||
VkCommandPool pool = VK_NULL_HANDLE;
|
||||
VkCommandBuffer commandBuffer = VK_NULL_HANDLE;
|
||||
};
|
||||
|
||||
MaybeError PrepareRecordingContext();
|
||||
ResultOrError<CommandPoolAndBuffer> BeginVkCommandBuffer();
|
||||
void RecycleCompletedCommands();
|
||||
|
||||
SerialQueue<ExecutionSerial, CommandPoolAndBuffer> mCommandsInFlight;
|
||||
// Command pools in the unused list haven't been reset yet.
|
||||
std::vector<CommandPoolAndBuffer> mUnusedCommands;
|
||||
|
|
|
@ -625,9 +625,6 @@ TEST_P(DepthStencilSamplingTest, SampleExtraComponents) {
|
|||
// This test fails on ANGLE (both SwiftShader and D3D11).
|
||||
DAWN_SUPPRESS_TEST_IF(IsANGLE());
|
||||
|
||||
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
|
||||
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
|
||||
|
||||
wgpu::TextureFormat format = GetParam().mTextureFormat;
|
||||
|
||||
DoSamplingExtraStencilComponentsRenderTest(TestAspectAndSamplerType::StencilAsUint, format,
|
||||
|
@ -639,9 +636,6 @@ TEST_P(DepthStencilSamplingTest, SampleExtraComponents) {
|
|||
|
||||
// Test sampling both depth and stencil with a render/compute pipeline works.
|
||||
TEST_P(DepthStencilSamplingTest, SampleDepthAndStencilRender) {
|
||||
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
|
||||
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
|
||||
|
||||
wgpu::TextureFormat format = GetParam().mTextureFormat;
|
||||
|
||||
wgpu::SamplerDescriptor samplerDesc;
|
||||
|
@ -760,9 +754,6 @@ class DepthSamplingTest : public DepthStencilSamplingTest {};
|
|||
|
||||
// Test that sampling a depth texture with a render/compute pipeline works
|
||||
TEST_P(DepthSamplingTest, SampleDepthOnly) {
|
||||
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
|
||||
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
|
||||
|
||||
wgpu::TextureFormat format = GetParam().mTextureFormat;
|
||||
float tolerance = format == wgpu::TextureFormat::Depth16Unorm ? 0.001f : 0.0f;
|
||||
|
||||
|
@ -812,9 +803,6 @@ TEST_P(StencilSamplingTest, SampleStencilOnly) {
|
|||
// This test fails on SwANGLE (although it passes on other ANGLE backends).
|
||||
DAWN_TEST_UNSUPPORTED_IF(IsANGLE());
|
||||
|
||||
// TODO(dawn:1549) Fails on Qualcomm-based Android devices.
|
||||
DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
|
||||
|
||||
wgpu::TextureFormat format = GetParam().mTextureFormat;
|
||||
|
||||
DoSamplingTest(TestAspectAndSamplerType::StencilAsUint,
|
||||
|
|
Loading…
Reference in New Issue