D3D12: Dynamic shader-visible heap allocation.
Allocates shader-visible descriptor heaps at a much smaller size then pool-allocates them upon reaching the max size. This strategy avoids always wasting memory for lighter users while still maximizing performance for heavy users. BUG=dawn:155 Change-Id: I0519235c901d0283b98ee824eeb0cda6de70b210 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25620 Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com> Reviewed-by: Austin Eng <enga@chromium.org>
This commit is contained in:
parent
05863e62f3
commit
f03590a754
|
@ -20,10 +20,32 @@
|
|||
|
||||
namespace dawn_native { namespace d3d12 {
|
||||
|
||||
// Limits the min/max heap size to always be some known value for testing.
|
||||
// Thresholds should be adjusted (lower == faster) to avoid tests taking too long to complete.
|
||||
static constexpr const uint32_t kShaderVisibleSmallHeapSizes[] = {1024, 512};
|
||||
|
||||
uint32_t GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, bool useSmallSize) {
|
||||
uint32_t GetD3D12ShaderVisibleHeapMinSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
|
||||
bool useSmallSize) {
|
||||
if (useSmallSize) {
|
||||
return kShaderVisibleSmallHeapSizes[heapType];
|
||||
}
|
||||
|
||||
// Minimum heap size must be large enough to satisfy the largest descriptor allocation
|
||||
// request and to amortize the cost of sub-allocation. But small enough to avoid wasting
|
||||
// memory should only a tiny fraction ever be used.
|
||||
// TODO(dawn:155): Figure out these values.
|
||||
switch (heapType) {
|
||||
case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
|
||||
return 4096;
|
||||
case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER:
|
||||
return 256;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GetD3D12ShaderVisibleHeapMaxSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
|
||||
bool useSmallSize) {
|
||||
if (useSmallSize) {
|
||||
return kShaderVisibleSmallHeapSizes[heapType];
|
||||
}
|
||||
|
@ -62,7 +84,10 @@ namespace dawn_native { namespace d3d12 {
|
|||
D3D12_DESCRIPTOR_HEAP_TYPE heapType)
|
||||
: mHeapType(heapType),
|
||||
mDevice(device),
|
||||
mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)) {
|
||||
mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)),
|
||||
mDescriptorCount(GetD3D12ShaderVisibleHeapMinSize(
|
||||
heapType,
|
||||
mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting))) {
|
||||
ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
|
||||
heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
|
||||
}
|
||||
|
@ -107,60 +132,74 @@ namespace dawn_native { namespace d3d12 {
|
|||
mAllocator.Deallocate(completedSerial);
|
||||
}
|
||||
|
||||
ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>>
|
||||
ShaderVisibleDescriptorAllocator::AllocateHeap(uint32_t descriptorCount) const {
|
||||
// The size in bytes of a descriptor heap is best calculated by the increment size
|
||||
// multiplied by the number of descriptors. In practice, this is only an estimate and
|
||||
// the actual size may vary depending on the driver.
|
||||
const uint64_t kSize = mSizeIncrement * descriptorCount;
|
||||
|
||||
DAWN_TRY(mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
|
||||
|
||||
ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
|
||||
D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
|
||||
heapDescriptor.Type = mHeapType;
|
||||
heapDescriptor.NumDescriptors = descriptorCount;
|
||||
heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
|
||||
heapDescriptor.NodeMask = 0;
|
||||
DAWN_TRY(CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
|
||||
&heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
|
||||
"ID3D12Device::CreateDescriptorHeap"));
|
||||
|
||||
std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap =
|
||||
std::make_unique<ShaderVisibleDescriptorHeap>(std::move(d3d12DescriptorHeap), kSize);
|
||||
|
||||
// We must track the allocation in the LRU when it is created, otherwise the residency
|
||||
// manager will see the allocation as non-resident in the later call to LockAllocation.
|
||||
mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
|
||||
|
||||
return std::move(descriptorHeap);
|
||||
}
|
||||
|
||||
// Creates a GPU descriptor heap that manages descriptors in a FIFO queue.
|
||||
MaybeError ShaderVisibleDescriptorAllocator::AllocateAndSwitchShaderVisibleHeap() {
|
||||
std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap;
|
||||
// Return the switched out heap to the pool and retrieve the oldest heap that is no longer
|
||||
// used by GPU. This maintains a heap buffer to avoid frequently re-creating heaps for heavy
|
||||
// users.
|
||||
// TODO(dawn:256): Consider periodically triming to avoid OOM.
|
||||
// Dynamically allocate using a two-phase allocation strategy.
|
||||
// The first phase increasingly grows a small heap in binary sizes for light users while the
|
||||
// second phase pool-allocates largest sized heaps for heavy users.
|
||||
if (mHeap != nullptr) {
|
||||
mDevice->GetResidencyManager()->UnlockAllocation(mHeap.get());
|
||||
mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
|
||||
}
|
||||
|
||||
// Recycle existing heap if possible.
|
||||
if (!mPool.empty() && mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
|
||||
descriptorHeap = std::move(mPool.front().heap);
|
||||
mPool.pop_front();
|
||||
const uint32_t maxDescriptorCount = GetD3D12ShaderVisibleHeapMaxSize(
|
||||
mHeapType,
|
||||
mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
|
||||
if (mDescriptorCount < maxDescriptorCount) {
|
||||
// Phase #1. Grow the heaps in powers-of-two.
|
||||
mDevice->ReferenceUntilUnused(mHeap->GetD3D12DescriptorHeap());
|
||||
mDescriptorCount = std::min(mDescriptorCount * 2, maxDescriptorCount);
|
||||
} else {
|
||||
// Phase #2. Pool-allocate heaps.
|
||||
// Return the switched out heap to the pool and retrieve the oldest heap that is no
|
||||
// longer used by GPU. This maintains a heap buffer to avoid frequently re-creating
|
||||
// heaps for heavy users.
|
||||
// TODO(dawn:256): Consider periodically triming to avoid OOM.
|
||||
mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
|
||||
if (mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
|
||||
descriptorHeap = std::move(mPool.front().heap);
|
||||
mPool.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory
|
||||
// should the developer not allocate any bindings for the heap type.
|
||||
// Consider dynamically re-sizing GPU heaps.
|
||||
const uint32_t descriptorCount = GetD3D12ShaderVisibleHeapSize(
|
||||
mHeapType, mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
|
||||
|
||||
if (descriptorHeap == nullptr) {
|
||||
// The size in bytes of a descriptor heap is best calculated by the increment size
|
||||
// multiplied by the number of descriptors. In practice, this is only an estimate and
|
||||
// the actual size may vary depending on the driver.
|
||||
const uint64_t kSize = mSizeIncrement * descriptorCount;
|
||||
|
||||
DAWN_TRY(
|
||||
mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
|
||||
|
||||
ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
|
||||
D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
|
||||
heapDescriptor.Type = mHeapType;
|
||||
heapDescriptor.NumDescriptors = descriptorCount;
|
||||
heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
|
||||
heapDescriptor.NodeMask = 0;
|
||||
DAWN_TRY(
|
||||
CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
|
||||
&heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
|
||||
"ID3D12Device::CreateDescriptorHeap"));
|
||||
descriptorHeap = std::make_unique<ShaderVisibleDescriptorHeap>(
|
||||
std::move(d3d12DescriptorHeap), kSize);
|
||||
// We must track the allocation in the LRU when it is created, otherwise the residency
|
||||
// manager will see the allocation as non-resident in the later call to LockAllocation.
|
||||
mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
|
||||
DAWN_TRY_ASSIGN(descriptorHeap, AllocateHeap(mDescriptorCount));
|
||||
}
|
||||
|
||||
DAWN_TRY(mDevice->GetResidencyManager()->LockAllocation(descriptorHeap.get()));
|
||||
|
||||
// Create a FIFO buffer from the recently created heap.
|
||||
mHeap = std::move(descriptorHeap);
|
||||
mAllocator = RingBufferAllocator(descriptorCount);
|
||||
mAllocator = RingBufferAllocator(mDescriptorCount);
|
||||
|
||||
// Invalidate all bindgroup allocations on previously bound heaps by incrementing the heap
|
||||
// serial. When a bindgroup attempts to re-populate, it will compare with its recorded
|
||||
|
|
|
@ -78,6 +78,9 @@ namespace dawn_native { namespace d3d12 {
|
|||
std::unique_ptr<ShaderVisibleDescriptorHeap> heap;
|
||||
};
|
||||
|
||||
ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>> AllocateHeap(
|
||||
uint32_t descriptorCount) const;
|
||||
|
||||
std::unique_ptr<ShaderVisibleDescriptorHeap> mHeap;
|
||||
RingBufferAllocator mAllocator;
|
||||
std::list<SerialDescriptorHeap> mPool;
|
||||
|
@ -91,6 +94,10 @@ namespace dawn_native { namespace d3d12 {
|
|||
Serial mHeapSerial = 0;
|
||||
|
||||
uint32_t mSizeIncrement;
|
||||
|
||||
// The descriptor count is the current size of the heap in number of descriptors.
|
||||
// This is stored on the allocator to avoid extra conversions.
|
||||
uint32_t mDescriptorCount = 0;
|
||||
};
|
||||
}} // namespace dawn_native::d3d12
|
||||
|
||||
|
|
|
@ -220,6 +220,10 @@ TEST_P(D3D12DescriptorHeapTests, NoSwitchOverSamplerHeap) {
|
|||
|
||||
// Verify shader-visible heaps can be recycled for multiple submits.
|
||||
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
|
||||
// Use small heaps to count only pool-allocated switches.
|
||||
DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
|
||||
dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
|
||||
|
||||
ShaderVisibleDescriptorAllocator* allocator =
|
||||
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
|
||||
|
||||
|
@ -253,6 +257,10 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
|
|||
|
||||
// Verify shader-visible heaps do not recycle in a pending submit.
|
||||
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) {
|
||||
// Use small heaps to count only pool-allocated switches.
|
||||
DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
|
||||
dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
|
||||
|
||||
constexpr uint32_t kNumOfSwitches = 5;
|
||||
|
||||
ShaderVisibleDescriptorAllocator* allocator =
|
||||
|
@ -280,6 +288,10 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) {
|
|||
// Verify switching shader-visible heaps do not recycle in a pending submit but do so
|
||||
// once no longer pending.
|
||||
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) {
|
||||
// Use small heaps to count only pool-allocated switches.
|
||||
DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
|
||||
dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
|
||||
|
||||
constexpr uint32_t kNumOfSwitches = 5;
|
||||
|
||||
ShaderVisibleDescriptorAllocator* allocator =
|
||||
|
@ -319,6 +331,91 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) {
|
|||
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfSwitches);
|
||||
}
|
||||
|
||||
// Verify shader-visible heaps do not recycle in multiple submits.
|
||||
TEST_P(D3D12DescriptorHeapTests, GrowHeapsInMultipleSubmits) {
|
||||
ShaderVisibleDescriptorAllocator* allocator =
|
||||
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
|
||||
|
||||
const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
|
||||
|
||||
std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
|
||||
|
||||
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
|
||||
|
||||
// Growth: Allocate + Tick() and ensure heaps are always unique.
|
||||
while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) {
|
||||
EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
|
||||
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
|
||||
EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
|
||||
heaps.insert(heap);
|
||||
mD3DDevice->Tick();
|
||||
}
|
||||
|
||||
// Verify the number of switches equals the size of heaps allocated (minus the initial).
|
||||
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u);
|
||||
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1);
|
||||
}
|
||||
|
||||
// Verify shader-visible heaps do not recycle in a pending submit.
|
||||
TEST_P(D3D12DescriptorHeapTests, GrowHeapsInPendingSubmit) {
|
||||
ShaderVisibleDescriptorAllocator* allocator =
|
||||
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
|
||||
|
||||
const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
|
||||
|
||||
std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
|
||||
|
||||
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
|
||||
|
||||
// Growth: Allocate new heaps.
|
||||
while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) {
|
||||
EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
|
||||
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
|
||||
EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
|
||||
heaps.insert(heap);
|
||||
}
|
||||
|
||||
// Verify the number of switches equals the size of heaps allocated (minus the initial).
|
||||
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u);
|
||||
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1);
|
||||
}
|
||||
|
||||
// Verify switching shader-visible heaps do not recycle in a pending submit but do so
|
||||
// once no longer pending.
|
||||
// Switches over many times until |kNumOfPooledHeaps| heaps are pool-allocated.
|
||||
TEST_P(D3D12DescriptorHeapTests, GrowAndPoolHeapsInPendingAndMultipleSubmits) {
|
||||
ShaderVisibleDescriptorAllocator* allocator =
|
||||
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
|
||||
|
||||
std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
|
||||
|
||||
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
|
||||
|
||||
uint32_t kNumOfPooledHeaps = 5;
|
||||
while (allocator->GetShaderVisiblePoolSizeForTesting() < kNumOfPooledHeaps) {
|
||||
EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
|
||||
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
|
||||
EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
|
||||
heaps.insert(heap);
|
||||
}
|
||||
|
||||
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps);
|
||||
|
||||
// Ensure switched-over heaps can be recycled by advancing the GPU by at-least |kFrameDepth|.
|
||||
for (uint32_t i = 0; i < kFrameDepth; i++) {
|
||||
mD3DDevice->Tick();
|
||||
}
|
||||
|
||||
// Switch-over the pool-allocated heaps.
|
||||
for (uint32_t i = 0; i < kNumOfPooledHeaps; i++) {
|
||||
EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
|
||||
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
|
||||
EXPECT_FALSE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
|
||||
}
|
||||
|
||||
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps);
|
||||
}
|
||||
|
||||
// Verify encoding multiple heaps worth of bindgroups.
|
||||
// Shader-visible heaps will switch out |kNumOfHeaps| times.
|
||||
TEST_P(D3D12DescriptorHeapTests, EncodeManyUBO) {
|
||||
|
|
Loading…
Reference in New Issue