Fixes bad mem-read in Vulkan's ~DescriptorSetAllocator.

Bug was a result of an external BGL reference that lingered after device was destroyed leading to a bad read on the device's FencedDeleter when the BGL reference was finally released. Fix just makes sure that the previous code path runs during the device destruction instead of afterwards.

- Removes passthrough call in BGL to the allocator and instead has the device keep track of the allocator directly so that the list can be used to both deallocate bind groups and bind group layouts at the end.
- Makes the allocator an ObjectBase so that we can have an explicit copy of the device since getting it from the layout can be dangerous now that the allocator may outlive the layout.

Bug: chromium:1276928
Change-Id: Ibca5e3c313fc0c0980ecaaa9ad2c871e204ac153
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/71860
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Loko Kung <lokokung@google.com>
This commit is contained in:
Loko Kung 2021-12-08 20:27:14 +00:00 committed by Dawn LUCI CQ
parent bb62d62d31
commit 79f62081c0
7 changed files with 54 additions and 28 deletions

View File

@ -138,7 +138,7 @@ namespace dawn_native { namespace vulkan {
// TODO(enga): Consider deduping allocators for layouts with the same descriptor type
// counts.
mDescriptorSetAllocator =
std::make_unique<DescriptorSetAllocator>(this, std::move(descriptorCountPerType));
DescriptorSetAllocator::Create(this, std::move(descriptorCountPerType));
SetLabelImpl();
@ -169,6 +169,7 @@ namespace dawn_native { namespace vulkan {
device->fn.DestroyDescriptorSetLayout(device->GetVkDevice(), mHandle, nullptr);
mHandle = VK_NULL_HANDLE;
}
mDescriptorSetAllocator = nullptr;
}
VkDescriptorSetLayout BindGroupLayout::GetHandle() const {
@ -191,10 +192,6 @@ namespace dawn_native { namespace vulkan {
mBindGroupAllocator.Deallocate(bindGroup);
}
void BindGroupLayout::FinishDeallocation(ExecutionSerial completedSerial) {
mDescriptorSetAllocator->FinishDeallocation(completedSerial);
}
void BindGroupLayout::SetLabelImpl() {
SetDebugName(ToBackend(GetDevice()), VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT,
reinterpret_cast<uint64_t&>(mHandle), "Dawn_BindGroupLayout", GetLabel());

View File

@ -60,7 +60,6 @@ namespace dawn_native { namespace vulkan {
const BindGroupDescriptor* descriptor);
void DeallocateBindGroup(BindGroup* bindGroup,
DescriptorSetAllocation* descriptorSetAllocation);
void FinishDeallocation(ExecutionSerial completedSerial);
private:
~BindGroupLayout() override;
@ -73,7 +72,7 @@ namespace dawn_native { namespace vulkan {
VkDescriptorSetLayout mHandle = VK_NULL_HANDLE;
SlabAllocator<BindGroup> mBindGroupAllocator;
std::unique_ptr<DescriptorSetAllocator> mDescriptorSetAllocator;
Ref<DescriptorSetAllocator> mDescriptorSetAllocator;
};
}} // namespace dawn_native::vulkan

View File

@ -24,10 +24,17 @@ namespace dawn_native { namespace vulkan {
// TODO(enga): Figure out this value.
static constexpr uint32_t kMaxDescriptorsPerPool = 512;
// static
Ref<DescriptorSetAllocator> DescriptorSetAllocator::Create(
BindGroupLayout* layout,
std::map<VkDescriptorType, uint32_t> descriptorCountPerType) {
return AcquireRef(new DescriptorSetAllocator(layout, descriptorCountPerType));
}
DescriptorSetAllocator::DescriptorSetAllocator(
BindGroupLayout* layout,
std::map<VkDescriptorType, uint32_t> descriptorCountPerType)
: mLayout(layout) {
: ObjectBase(layout->GetDevice()), mLayout(layout) {
ASSERT(layout != nullptr);
// Compute the total number of descriptors for this layout.
@ -66,7 +73,7 @@ namespace dawn_native { namespace vulkan {
for (auto& pool : mDescriptorPools) {
ASSERT(pool.freeSetIndices.size() == mMaxSets);
if (pool.vkPool != VK_NULL_HANDLE) {
Device* device = ToBackend(mLayout->GetDevice());
Device* device = ToBackend(GetDevice());
device->GetFencedDeleter()->DeleteWhenUnused(pool.vkPool);
}
}
@ -101,13 +108,13 @@ namespace dawn_native { namespace vulkan {
// We can't reuse the descriptor set right away because the Vulkan spec says in the
// documentation for vkCmdBindDescriptorSets that the set may be consumed any time between
// host execution of the command and the end of the draw/dispatch.
Device* device = ToBackend(mLayout->GetDevice());
Device* device = ToBackend(GetDevice());
const ExecutionSerial serial = device->GetPendingCommandSerial();
mPendingDeallocations.Enqueue({allocationInfo->poolIndex, allocationInfo->setIndex},
serial);
if (mLastDeallocationSerial != serial) {
device->EnqueueDeferredDeallocation(mLayout);
device->EnqueueDeferredDeallocation(this);
mLastDeallocationSerial = serial;
}
@ -137,7 +144,7 @@ namespace dawn_native { namespace vulkan {
createInfo.poolSizeCount = static_cast<PoolIndex>(mPoolSizes.size());
createInfo.pPoolSizes = mPoolSizes.data();
Device* device = ToBackend(mLayout->GetDevice());
Device* device = ToBackend(GetDevice());
VkDescriptorPool descriptorPool;
DAWN_TRY(CheckVkSuccess(device->fn.CreateDescriptorPool(device->GetVkDevice(), &createInfo,

View File

@ -19,6 +19,7 @@
#include "common/vulkan_platform.h"
#include "dawn_native/Error.h"
#include "dawn_native/IntegerTypes.h"
#include "dawn_native/ObjectBase.h"
#include "dawn_native/vulkan/DescriptorSetAllocation.h"
#include <map>
@ -28,20 +29,24 @@ namespace dawn_native { namespace vulkan {
class BindGroupLayout;
class DescriptorSetAllocator {
class DescriptorSetAllocator : public ObjectBase {
using PoolIndex = uint32_t;
using SetIndex = uint16_t;
public:
DescriptorSetAllocator(BindGroupLayout* layout,
std::map<VkDescriptorType, uint32_t> descriptorCountPerType);
~DescriptorSetAllocator();
static Ref<DescriptorSetAllocator> Create(
BindGroupLayout* layout,
std::map<VkDescriptorType, uint32_t> descriptorCountPerType);
ResultOrError<DescriptorSetAllocation> Allocate();
void Deallocate(DescriptorSetAllocation* allocationInfo);
void FinishDeallocation(ExecutionSerial completedSerial);
private:
DescriptorSetAllocator(BindGroupLayout* layout,
std::map<VkDescriptorType, uint32_t> descriptorCountPerType);
~DescriptorSetAllocator();
MaybeError AllocateDescriptorPool();
BindGroupLayout* mLayout;

View File

@ -179,14 +179,14 @@ namespace dawn_native { namespace vulkan {
ExecutionSerial completedSerial = GetCompletedCommandSerial();
for (Ref<BindGroupLayout>& bgl :
mBindGroupLayoutsPendingDeallocation.IterateUpTo(completedSerial)) {
bgl->FinishDeallocation(completedSerial);
for (Ref<DescriptorSetAllocator>& allocator :
mDescriptorAllocatorsPendingDeallocation.IterateUpTo(completedSerial)) {
allocator->FinishDeallocation(completedSerial);
}
mBindGroupLayoutsPendingDeallocation.ClearUpTo(completedSerial);
mResourceMemoryAllocator->Tick(completedSerial);
mDeleter->Tick(completedSerial);
mDescriptorAllocatorsPendingDeallocation.ClearUpTo(completedSerial);
if (mRecordingContext.used) {
DAWN_TRY(SubmitPendingCommands());
@ -230,8 +230,8 @@ namespace dawn_native { namespace vulkan {
return mResourceMemoryAllocator.get();
}
void Device::EnqueueDeferredDeallocation(BindGroupLayout* bindGroupLayout) {
mBindGroupLayoutsPendingDeallocation.Enqueue(bindGroupLayout, GetPendingCommandSerial());
void Device::EnqueueDeferredDeallocation(DescriptorSetAllocator* allocator) {
mDescriptorAllocatorsPendingDeallocation.Enqueue(allocator, GetPendingCommandSerial());
}
CommandRecordingContext* Device::GetPendingRecordingContext() {
@ -969,16 +969,16 @@ namespace dawn_native { namespace vulkan {
mUnusedFences.clear();
ExecutionSerial completedSerial = GetCompletedCommandSerial();
for (Ref<BindGroupLayout>& bgl :
mBindGroupLayoutsPendingDeallocation.IterateUpTo(completedSerial)) {
bgl->FinishDeallocation(completedSerial);
for (Ref<DescriptorSetAllocator>& allocator :
mDescriptorAllocatorsPendingDeallocation.IterateUpTo(completedSerial)) {
allocator->FinishDeallocation(completedSerial);
}
mBindGroupLayoutsPendingDeallocation.ClearUpTo(completedSerial);
// Releasing the uploader enqueues buffers to be released.
// Call Tick() again to clear them before releasing the deleter.
mResourceMemoryAllocator->Tick(completedSerial);
mDeleter->Tick(completedSerial);
mDescriptorAllocatorsPendingDeallocation.ClearUpTo(completedSerial);
// Allow recycled memory to be deleted.
mResourceMemoryAllocator->DestroyPool();

View File

@ -21,6 +21,7 @@
#include "dawn_native/Commands.h"
#include "dawn_native/Device.h"
#include "dawn_native/vulkan/CommandRecordingContext.h"
#include "dawn_native/vulkan/DescriptorSetAllocator.h"
#include "dawn_native/vulkan/Forward.h"
#include "dawn_native/vulkan/VulkanFunctions.h"
#include "dawn_native/vulkan/VulkanInfo.h"
@ -65,7 +66,7 @@ namespace dawn_native { namespace vulkan {
CommandRecordingContext* GetPendingRecordingContext();
MaybeError SubmitPendingCommands();
void EnqueueDeferredDeallocation(BindGroupLayout* bindGroupLayout);
void EnqueueDeferredDeallocation(DescriptorSetAllocator* allocator);
// Dawn Native API
@ -165,7 +166,8 @@ namespace dawn_native { namespace vulkan {
VkQueue mQueue = VK_NULL_HANDLE;
uint32_t mComputeSubgroupSize = 0;
SerialQueue<ExecutionSerial, Ref<BindGroupLayout>> mBindGroupLayoutsPendingDeallocation;
SerialQueue<ExecutionSerial, Ref<DescriptorSetAllocator>>
mDescriptorAllocatorsPendingDeallocation;
std::unique_ptr<FencedDeleter> mDeleter;
std::unique_ptr<ResourceMemoryAllocator> mResourceMemoryAllocator;
std::unique_ptr<RenderPassCache> mRenderPassCache;

View File

@ -179,6 +179,22 @@ TEST_P(DestroyTest, DestroyDeviceBeforeSubmit) {
ASSERT_DEVICE_ERROR_MSG(queue.Submit(1, &commands), HasSubstr("[Device] is lost."));
}
// Regression test for crbug.com/1276928 where a lingering BGL reference in Vulkan with at least one
// BG instance could cause bad memory reads because members in the BGL whose destuctors expected a
// live device were not released until after the device was destroyed.
TEST_P(DestroyTest, DestroyDeviceLingeringBGL) {
// Create and hold the layout reference so that its destructor gets called after the device has
// been destroyed via device.Destroy().
wgpu::BindGroupLayout layout = utils::MakeBindGroupLayout(
device, {{0, wgpu::ShaderStage::Fragment, wgpu::SamplerBindingType::Filtering}});
utils::MakeBindGroup(device, layout, {{0, device.CreateSampler()}});
// Tests normally don't expect a device lost error, but since we are destroying the device, we
// actually do, so we need to override the default device lost callback.
ExpectDeviceDestruction();
device.Destroy();
}
DAWN_INSTANTIATE_TEST(DestroyTest,
D3D12Backend(),
MetalBackend(),