D3D12: Dynamic shader-visible heap allocation.

Allocates shader-visible descriptor heaps at a much
smaller size then pool-allocates them upon reaching
the max size. This strategy avoids always wasting memory
for lighter users while still maximizing performance for
heavy users.

BUG=dawn:155

Change-Id: I0519235c901d0283b98ee824eeb0cda6de70b210
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25620
Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
This commit is contained in:
Bryan Bernhart 2020-07-30 21:50:32 +00:00 committed by Commit Bot service account
parent 05863e62f3
commit f03590a754
3 changed files with 185 additions and 42 deletions

View File

@ -20,10 +20,32 @@
namespace dawn_native { namespace d3d12 {
// Limits the min/max heap size to always be some known value for testing.
// Thresholds should be adjusted (lower == faster) to avoid tests taking too long to complete.
static constexpr const uint32_t kShaderVisibleSmallHeapSizes[] = {1024, 512};
uint32_t GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, bool useSmallSize) {
uint32_t GetD3D12ShaderVisibleHeapMinSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
bool useSmallSize) {
if (useSmallSize) {
return kShaderVisibleSmallHeapSizes[heapType];
}
// Minimum heap size must be large enough to satisfy the largest descriptor allocation
// request and to amortize the cost of sub-allocation. But small enough to avoid wasting
// memory should only a tiny fraction ever be used.
// TODO(dawn:155): Figure out these values.
switch (heapType) {
case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
return 4096;
case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER:
return 256;
default:
UNREACHABLE();
}
}
uint32_t GetD3D12ShaderVisibleHeapMaxSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
bool useSmallSize) {
if (useSmallSize) {
return kShaderVisibleSmallHeapSizes[heapType];
}
@ -62,7 +84,10 @@ namespace dawn_native { namespace d3d12 {
D3D12_DESCRIPTOR_HEAP_TYPE heapType)
: mHeapType(heapType),
mDevice(device),
mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)) {
mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)),
mDescriptorCount(GetD3D12ShaderVisibleHeapMinSize(
heapType,
mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting))) {
ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
}
@ -107,60 +132,74 @@ namespace dawn_native { namespace d3d12 {
mAllocator.Deallocate(completedSerial);
}
ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>>
ShaderVisibleDescriptorAllocator::AllocateHeap(uint32_t descriptorCount) const {
// The size in bytes of a descriptor heap is best calculated by the increment size
// multiplied by the number of descriptors. In practice, this is only an estimate and
// the actual size may vary depending on the driver.
const uint64_t kSize = mSizeIncrement * descriptorCount;
DAWN_TRY(mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
heapDescriptor.Type = mHeapType;
heapDescriptor.NumDescriptors = descriptorCount;
heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
heapDescriptor.NodeMask = 0;
DAWN_TRY(CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
&heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
"ID3D12Device::CreateDescriptorHeap"));
std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap =
std::make_unique<ShaderVisibleDescriptorHeap>(std::move(d3d12DescriptorHeap), kSize);
// We must track the allocation in the LRU when it is created, otherwise the residency
// manager will see the allocation as non-resident in the later call to LockAllocation.
mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
return std::move(descriptorHeap);
}
// Creates a GPU descriptor heap that manages descriptors in a FIFO queue.
MaybeError ShaderVisibleDescriptorAllocator::AllocateAndSwitchShaderVisibleHeap() {
std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap;
// Return the switched out heap to the pool and retrieve the oldest heap that is no longer
// used by GPU. This maintains a heap buffer to avoid frequently re-creating heaps for heavy
// users.
// TODO(dawn:256): Consider periodically triming to avoid OOM.
// Dynamically allocate using a two-phase allocation strategy.
// The first phase increasingly grows a small heap in binary sizes for light users while the
// second phase pool-allocates largest sized heaps for heavy users.
if (mHeap != nullptr) {
mDevice->GetResidencyManager()->UnlockAllocation(mHeap.get());
mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
}
// Recycle existing heap if possible.
if (!mPool.empty() && mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
descriptorHeap = std::move(mPool.front().heap);
mPool.pop_front();
const uint32_t maxDescriptorCount = GetD3D12ShaderVisibleHeapMaxSize(
mHeapType,
mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
if (mDescriptorCount < maxDescriptorCount) {
// Phase #1. Grow the heaps in powers-of-two.
mDevice->ReferenceUntilUnused(mHeap->GetD3D12DescriptorHeap());
mDescriptorCount = std::min(mDescriptorCount * 2, maxDescriptorCount);
} else {
// Phase #2. Pool-allocate heaps.
// Return the switched out heap to the pool and retrieve the oldest heap that is no
// longer used by GPU. This maintains a heap buffer to avoid frequently re-creating
// heaps for heavy users.
// TODO(dawn:256): Consider periodically triming to avoid OOM.
mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
if (mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
descriptorHeap = std::move(mPool.front().heap);
mPool.pop_front();
}
}
}
// TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory
// should the developer not allocate any bindings for the heap type.
// Consider dynamically re-sizing GPU heaps.
const uint32_t descriptorCount = GetD3D12ShaderVisibleHeapSize(
mHeapType, mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
if (descriptorHeap == nullptr) {
// The size in bytes of a descriptor heap is best calculated by the increment size
// multiplied by the number of descriptors. In practice, this is only an estimate and
// the actual size may vary depending on the driver.
const uint64_t kSize = mSizeIncrement * descriptorCount;
DAWN_TRY(
mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
heapDescriptor.Type = mHeapType;
heapDescriptor.NumDescriptors = descriptorCount;
heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
heapDescriptor.NodeMask = 0;
DAWN_TRY(
CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
&heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
"ID3D12Device::CreateDescriptorHeap"));
descriptorHeap = std::make_unique<ShaderVisibleDescriptorHeap>(
std::move(d3d12DescriptorHeap), kSize);
// We must track the allocation in the LRU when it is created, otherwise the residency
// manager will see the allocation as non-resident in the later call to LockAllocation.
mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
DAWN_TRY_ASSIGN(descriptorHeap, AllocateHeap(mDescriptorCount));
}
DAWN_TRY(mDevice->GetResidencyManager()->LockAllocation(descriptorHeap.get()));
// Create a FIFO buffer from the recently created heap.
mHeap = std::move(descriptorHeap);
mAllocator = RingBufferAllocator(descriptorCount);
mAllocator = RingBufferAllocator(mDescriptorCount);
// Invalidate all bindgroup allocations on previously bound heaps by incrementing the heap
// serial. When a bindgroup attempts to re-populate, it will compare with its recorded

View File

@ -78,6 +78,9 @@ namespace dawn_native { namespace d3d12 {
std::unique_ptr<ShaderVisibleDescriptorHeap> heap;
};
ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>> AllocateHeap(
uint32_t descriptorCount) const;
std::unique_ptr<ShaderVisibleDescriptorHeap> mHeap;
RingBufferAllocator mAllocator;
std::list<SerialDescriptorHeap> mPool;
@ -91,6 +94,10 @@ namespace dawn_native { namespace d3d12 {
Serial mHeapSerial = 0;
uint32_t mSizeIncrement;
// The descriptor count is the current size of the heap in number of descriptors.
// This is stored on the allocator to avoid extra conversions.
uint32_t mDescriptorCount = 0;
};
}} // namespace dawn_native::d3d12

View File

@ -220,6 +220,10 @@ TEST_P(D3D12DescriptorHeapTests, NoSwitchOverSamplerHeap) {
// Verify shader-visible heaps can be recycled for multiple submits.
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
// Use small heaps to count only pool-allocated switches.
DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
ShaderVisibleDescriptorAllocator* allocator =
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
@ -253,6 +257,10 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
// Verify shader-visible heaps do not recycle in a pending submit.
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) {
// Use small heaps to count only pool-allocated switches.
DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
constexpr uint32_t kNumOfSwitches = 5;
ShaderVisibleDescriptorAllocator* allocator =
@ -280,6 +288,10 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) {
// Verify switching shader-visible heaps do not recycle in a pending submit but do so
// once no longer pending.
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) {
// Use small heaps to count only pool-allocated switches.
DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
constexpr uint32_t kNumOfSwitches = 5;
ShaderVisibleDescriptorAllocator* allocator =
@ -319,6 +331,91 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) {
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfSwitches);
}
// Verify shader-visible heaps do not recycle in multiple submits.
TEST_P(D3D12DescriptorHeapTests, GrowHeapsInMultipleSubmits) {
ShaderVisibleDescriptorAllocator* allocator =
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
// Growth: Allocate + Tick() and ensure heaps are always unique.
while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) {
EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
heaps.insert(heap);
mD3DDevice->Tick();
}
// Verify the number of switches equals the size of heaps allocated (minus the initial).
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u);
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1);
}
// Verify shader-visible heaps do not recycle in a pending submit.
TEST_P(D3D12DescriptorHeapTests, GrowHeapsInPendingSubmit) {
ShaderVisibleDescriptorAllocator* allocator =
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
// Growth: Allocate new heaps.
while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) {
EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
heaps.insert(heap);
}
// Verify the number of switches equals the size of heaps allocated (minus the initial).
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u);
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1);
}
// Verify switching shader-visible heaps do not recycle in a pending submit but do so
// once no longer pending.
// Switches over many times until |kNumOfPooledHeaps| heaps are pool-allocated.
TEST_P(D3D12DescriptorHeapTests, GrowAndPoolHeapsInPendingAndMultipleSubmits) {
ShaderVisibleDescriptorAllocator* allocator =
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
uint32_t kNumOfPooledHeaps = 5;
while (allocator->GetShaderVisiblePoolSizeForTesting() < kNumOfPooledHeaps) {
EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
heaps.insert(heap);
}
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps);
// Ensure switched-over heaps can be recycled by advancing the GPU by at-least |kFrameDepth|.
for (uint32_t i = 0; i < kFrameDepth; i++) {
mD3DDevice->Tick();
}
// Switch-over the pool-allocated heaps.
for (uint32_t i = 0; i < kNumOfPooledHeaps; i++) {
EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
EXPECT_FALSE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
}
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps);
}
// Verify encoding multiple heaps worth of bindgroups.
// Shader-visible heaps will switch out |kNumOfHeaps| times.
TEST_P(D3D12DescriptorHeapTests, EncodeManyUBO) {