From 52d0627d5607cf191485165fe515062c2a61ce81 Mon Sep 17 00:00:00 2001 From: Bryan Bernhart Date: Mon, 9 Mar 2020 22:56:59 +0000 Subject: [PATCH] D3D12: Pool-allocate shader-visible descriptor heaps. Rather than destory GPU descriptor heaps upon being switched out, heaps are stored in a list where they can be re-used once the GPU is no longer using them. BUG=dawn:155 Change-Id: I2074573e354f114c45afe9895e8515980d325852 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/16282 Reviewed-by: Austin Eng Commit-Queue: Bryan Bernhart --- .../ShaderVisibleDescriptorAllocatorD3D12.cpp | 86 ++++++++----- .../ShaderVisibleDescriptorAllocatorD3D12.h | 15 ++- .../white_box/D3D12DescriptorHeapTests.cpp | 115 +++++++++++++++++- 3 files changed, 184 insertions(+), 32 deletions(-) diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp index 6843d212fb..76e2db2109 100644 --- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp +++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp @@ -55,22 +55,21 @@ namespace dawn_native { namespace d3d12 { MaybeError ShaderVisibleDescriptorAllocator::Initialize() { ASSERT(mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap.Get() == nullptr); + mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heapType = + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + ASSERT(mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER].heap.Get() == nullptr); + mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER].heapType = + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + DAWN_TRY(AllocateAndSwitchShaderVisibleHeaps()); + return {}; } MaybeError ShaderVisibleDescriptorAllocator::AllocateAndSwitchShaderVisibleHeaps() { - // TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory - // should the developer not allocate any bindings for the heap type. - // Consider dynamically re-sizing GPU heaps. - DAWN_TRY( - AllocateGPUHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV), - GetD3D12HeapFlags(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV))); - DAWN_TRY(AllocateGPUHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER), - GetD3D12HeapFlags(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER))); + DAWN_TRY(AllocateGPUHeap(&mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])); + DAWN_TRY(AllocateGPUHeap(&mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])); // Invalidate all bindgroup allocations on previously bound heaps by incrementing the heap // serial. When a bindgroup attempts to re-populate, it will compare with its recorded @@ -122,28 +121,45 @@ namespace dawn_native { namespace d3d12 { // Creates a GPU descriptor heap that manages descriptors in a FIFO queue. MaybeError ShaderVisibleDescriptorAllocator::AllocateGPUHeap( - D3D12_DESCRIPTOR_HEAP_TYPE heapType, - uint32_t descriptorCount, - D3D12_DESCRIPTOR_HEAP_FLAGS heapFlags) { - ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || - heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - if (mShaderVisibleBuffers[heapType].heap != nullptr) { - mDevice->ReferenceUntilUnused(std::move(mShaderVisibleBuffers[heapType].heap)); + ShaderVisibleBuffer* shaderVisibleBuffer) { + ComPtr heap; + // Return the switched out heap to the pool and retrieve the oldest heap that is no longer + // used by GPU. This maintains a heap buffer to avoid frequently re-creating heaps for heavy + // users. + // TODO(dawn:256): Consider periodically triming to avoid OOM. + if (shaderVisibleBuffer->heap != nullptr) { + shaderVisibleBuffer->pool.push_back( + {mDevice->GetPendingCommandSerial(), std::move(shaderVisibleBuffer->heap)}); } - D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor; - heapDescriptor.Type = heapType; - heapDescriptor.NumDescriptors = descriptorCount; - heapDescriptor.Flags = heapFlags; - heapDescriptor.NodeMask = 0; - ComPtr heap; - DAWN_TRY(CheckOutOfMemoryHRESULT( - mDevice->GetD3D12Device()->CreateDescriptorHeap(&heapDescriptor, IID_PPV_ARGS(&heap)), - "ID3D12Device::CreateDescriptorHeap")); + // Recycle existing heap if possible. + if (!shaderVisibleBuffer->pool.empty() && + shaderVisibleBuffer->pool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) { + heap = std::move(shaderVisibleBuffer->pool.front().heap); + shaderVisibleBuffer->pool.pop_front(); + } + + const D3D12_DESCRIPTOR_HEAP_TYPE heapType = shaderVisibleBuffer->heapType; + + // TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory + // should the developer not allocate any bindings for the heap type. + // Consider dynamically re-sizing GPU heaps. + const uint32_t descriptorCount = GetD3D12ShaderVisibleHeapSize(heapType); + + if (heap == nullptr) { + D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor; + heapDescriptor.Type = heapType; + heapDescriptor.NumDescriptors = descriptorCount; + heapDescriptor.Flags = GetD3D12HeapFlags(heapType); + heapDescriptor.NodeMask = 0; + DAWN_TRY(CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap( + &heapDescriptor, IID_PPV_ARGS(&heap)), + "ID3D12Device::CreateDescriptorHeap")); + } // Create a FIFO buffer from the recently created heap. - mShaderVisibleBuffers[heapType].heap = std::move(heap); - mShaderVisibleBuffers[heapType].allocator = RingBufferAllocator(descriptorCount); + shaderVisibleBuffer->heap = std::move(heap); + shaderVisibleBuffer->allocator = RingBufferAllocator(descriptorCount); return {}; } @@ -158,6 +174,20 @@ namespace dawn_native { namespace d3d12 { return mShaderVisibleBuffers[heapType].allocator.GetSize(); } + ComPtr ShaderVisibleDescriptorAllocator::GetShaderVisibleHeapForTesting( + D3D12_DESCRIPTOR_HEAP_TYPE heapType) const { + ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || + heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + return mShaderVisibleBuffers[heapType].heap; + } + + uint64_t ShaderVisibleDescriptorAllocator::GetShaderVisiblePoolSizeForTesting( + D3D12_DESCRIPTOR_HEAP_TYPE heapType) const { + ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || + heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + return mShaderVisibleBuffers[heapType].pool.size(); + } + bool ShaderVisibleDescriptorAllocator::IsAllocationStillValid(Serial lastUsageSerial, Serial heapSerial) const { // Consider valid if allocated for the pending submit and the shader visible heaps diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h index 5c6241ec25..66f63f55eb 100644 --- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h +++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h @@ -20,6 +20,7 @@ #include "dawn_native/d3d12/DescriptorHeapAllocationD3D12.h" #include +#include namespace dawn_native { namespace d3d12 { @@ -44,19 +45,27 @@ namespace dawn_native { namespace d3d12 { MaybeError AllocateAndSwitchShaderVisibleHeaps(); uint64_t GetShaderVisibleHeapSizeForTesting(D3D12_DESCRIPTOR_HEAP_TYPE heapType) const; + ComPtr GetShaderVisibleHeapForTesting( + D3D12_DESCRIPTOR_HEAP_TYPE heapType) const; + uint64_t GetShaderVisiblePoolSizeForTesting(D3D12_DESCRIPTOR_HEAP_TYPE heapType) const; bool IsAllocationStillValid(Serial lastUsageSerial, Serial heapSerial) const; private: - MaybeError AllocateGPUHeap(D3D12_DESCRIPTOR_HEAP_TYPE heapType, - uint32_t descriptorCount, - D3D12_DESCRIPTOR_HEAP_FLAGS heapFlags); + struct SerialDescriptorHeap { + Serial heapSerial; + ComPtr heap; + }; struct ShaderVisibleBuffer { ComPtr heap; RingBufferAllocator allocator; + std::list pool; + D3D12_DESCRIPTOR_HEAP_TYPE heapType; }; + MaybeError AllocateGPUHeap(ShaderVisibleBuffer* shaderVisibleBuffer); + Device* mDevice; // The serial value of 0 means the shader-visible heaps have not been allocated. diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp index 283f4a3282..7dc0a918bd 100644 --- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp +++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp @@ -21,13 +21,21 @@ constexpr uint32_t kRTSize = 4; +// Pooling tests are required to advance the GPU completed serial to reuse heaps. +// This requires Tick() to be called at-least |kFrameDepth| times. This constant +// should be updated if the internals of Tick() change. +constexpr uint32_t kFrameDepth = 2; + using namespace dawn_native::d3d12; class D3D12DescriptorHeapTests : public DawnTest { - private: + protected: void TestSetUp() override { DAWN_SKIP_TEST_IF(UsesWire()); + mD3DDevice = reinterpret_cast(device.Get()); } + + Device* mD3DDevice = nullptr; }; // Verify the shader visible heaps switch over within a single submit. @@ -85,4 +93,109 @@ TEST_P(D3D12DescriptorHeapTests, SwitchOverHeaps) { EXPECT_EQ(allocator->GetShaderVisibleHeapsSerial(), heapSerial + 1); } +// Verify shader-visible heaps can be recycled for multiple submits. +TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) { + constexpr D3D12_DESCRIPTOR_HEAP_TYPE heapType = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + + ShaderVisibleDescriptorAllocator* allocator = mD3DDevice->GetShaderVisibleDescriptorAllocator(); + + std::list> heaps = { + allocator->GetShaderVisibleHeapForTesting(heapType)}; + + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), 0u); + + // Allocate + Tick() up to |kFrameDepth| and ensure heaps are always unique. + for (uint32_t i = 0; i < kFrameDepth; i++) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeapForTesting(heapType); + EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end()); + heaps.push_back(heap); + mD3DDevice->Tick(); + } + + // Repeat up to |kFrameDepth| again but ensure heaps are the same in the expected order + // (oldest heaps are recycled first). The "+ 1" is so we also include the very first heap in the + // check. + for (uint32_t i = 0; i < kFrameDepth + 1; i++) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeapForTesting(heapType); + EXPECT_TRUE(heaps.front() == heap); + heaps.pop_front(); + mD3DDevice->Tick(); + } + + EXPECT_TRUE(heaps.empty()); + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), kFrameDepth); +} + +// Verify shader-visible heaps do not recycle in a pending submit. +TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) { + constexpr D3D12_DESCRIPTOR_HEAP_TYPE heapType = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + constexpr uint32_t kNumOfSwitches = 5; + + ShaderVisibleDescriptorAllocator* allocator = mD3DDevice->GetShaderVisibleDescriptorAllocator(); + + const Serial heapSerial = allocator->GetShaderVisibleHeapsSerial(); + + std::set> heaps = { + allocator->GetShaderVisibleHeapForTesting(heapType)}; + + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), 0u); + + // Switch-over |kNumOfSwitches| and ensure heaps are always unique. + for (uint32_t i = 0; i < kNumOfSwitches; i++) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeapForTesting(heapType); + EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end()); + heaps.insert(heap); + } + + // After |kNumOfSwitches|, no heaps are recycled. + EXPECT_EQ(allocator->GetShaderVisibleHeapsSerial(), heapSerial + kNumOfSwitches); + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), kNumOfSwitches); +} + +// Verify switching shader-visible heaps do not recycle in a pending submit but do so +// once no longer pending. +TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) { + constexpr D3D12_DESCRIPTOR_HEAP_TYPE heapType = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + constexpr uint32_t kNumOfSwitches = 5; + + ShaderVisibleDescriptorAllocator* allocator = mD3DDevice->GetShaderVisibleDescriptorAllocator(); + const Serial heapSerial = allocator->GetShaderVisibleHeapsSerial(); + + std::set> heaps = { + allocator->GetShaderVisibleHeapForTesting(heapType)}; + + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), 0u); + + // Switch-over |kNumOfSwitches| to create a pool of unique heaps. + for (uint32_t i = 0; i < kNumOfSwitches; i++) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeapForTesting(heapType); + EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end()); + heaps.insert(heap); + } + + EXPECT_EQ(allocator->GetShaderVisibleHeapsSerial(), heapSerial + kNumOfSwitches); + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), kNumOfSwitches); + + // Ensure switched-over heaps can be recycled by advancing the GPU by at-least |kFrameDepth|. + for (uint32_t i = 0; i < kFrameDepth; i++) { + mD3DDevice->Tick(); + } + + // Switch-over |kNumOfSwitches| again reusing the same heaps. + for (uint32_t i = 0; i < kNumOfSwitches; i++) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeapForTesting(heapType); + EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) != heaps.end()); + heaps.erase(heap); + } + + // After switching-over |kNumOfSwitches| x 2, ensure no additional heaps exist. + EXPECT_EQ(allocator->GetShaderVisibleHeapsSerial(), heapSerial + kNumOfSwitches * 2); + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), kNumOfSwitches); +} + DAWN_INSTANTIATE_TEST(D3D12DescriptorHeapTests, D3D12Backend());