diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp index ca30889e2a..ba1b4939a2 100644 --- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp +++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp @@ -20,10 +20,32 @@ namespace dawn_native { namespace d3d12 { + // Limits the min/max heap size to always be some known value for testing. // Thresholds should be adjusted (lower == faster) to avoid tests taking too long to complete. static constexpr const uint32_t kShaderVisibleSmallHeapSizes[] = {1024, 512}; - uint32_t GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, bool useSmallSize) { + uint32_t GetD3D12ShaderVisibleHeapMinSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, + bool useSmallSize) { + if (useSmallSize) { + return kShaderVisibleSmallHeapSizes[heapType]; + } + + // Minimum heap size must be large enough to satisfy the largest descriptor allocation + // request and to amortize the cost of sub-allocation. But small enough to avoid wasting + // memory should only a tiny fraction ever be used. + // TODO(dawn:155): Figure out these values. + switch (heapType) { + case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: + return 4096; + case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER: + return 256; + default: + UNREACHABLE(); + } + } + + uint32_t GetD3D12ShaderVisibleHeapMaxSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, + bool useSmallSize) { if (useSmallSize) { return kShaderVisibleSmallHeapSizes[heapType]; } @@ -62,7 +84,10 @@ namespace dawn_native { namespace d3d12 { D3D12_DESCRIPTOR_HEAP_TYPE heapType) : mHeapType(heapType), mDevice(device), - mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)) { + mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)), + mDescriptorCount(GetD3D12ShaderVisibleHeapMinSize( + heapType, + mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting))) { ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); } @@ -107,60 +132,74 @@ namespace dawn_native { namespace d3d12 { mAllocator.Deallocate(completedSerial); } + ResultOrError> + ShaderVisibleDescriptorAllocator::AllocateHeap(uint32_t descriptorCount) const { + // The size in bytes of a descriptor heap is best calculated by the increment size + // multiplied by the number of descriptors. In practice, this is only an estimate and + // the actual size may vary depending on the driver. + const uint64_t kSize = mSizeIncrement * descriptorCount; + + DAWN_TRY(mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local)); + + ComPtr d3d12DescriptorHeap; + D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor; + heapDescriptor.Type = mHeapType; + heapDescriptor.NumDescriptors = descriptorCount; + heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType); + heapDescriptor.NodeMask = 0; + DAWN_TRY(CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap( + &heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)), + "ID3D12Device::CreateDescriptorHeap")); + + std::unique_ptr descriptorHeap = + std::make_unique(std::move(d3d12DescriptorHeap), kSize); + + // We must track the allocation in the LRU when it is created, otherwise the residency + // manager will see the allocation as non-resident in the later call to LockAllocation. + mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get()); + + return std::move(descriptorHeap); + } + // Creates a GPU descriptor heap that manages descriptors in a FIFO queue. MaybeError ShaderVisibleDescriptorAllocator::AllocateAndSwitchShaderVisibleHeap() { std::unique_ptr descriptorHeap; - // Return the switched out heap to the pool and retrieve the oldest heap that is no longer - // used by GPU. This maintains a heap buffer to avoid frequently re-creating heaps for heavy - // users. - // TODO(dawn:256): Consider periodically triming to avoid OOM. + // Dynamically allocate using a two-phase allocation strategy. + // The first phase increasingly grows a small heap in binary sizes for light users while the + // second phase pool-allocates largest sized heaps for heavy users. if (mHeap != nullptr) { mDevice->GetResidencyManager()->UnlockAllocation(mHeap.get()); - mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)}); - } - // Recycle existing heap if possible. - if (!mPool.empty() && mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) { - descriptorHeap = std::move(mPool.front().heap); - mPool.pop_front(); + const uint32_t maxDescriptorCount = GetD3D12ShaderVisibleHeapMaxSize( + mHeapType, + mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting)); + if (mDescriptorCount < maxDescriptorCount) { + // Phase #1. Grow the heaps in powers-of-two. + mDevice->ReferenceUntilUnused(mHeap->GetD3D12DescriptorHeap()); + mDescriptorCount = std::min(mDescriptorCount * 2, maxDescriptorCount); + } else { + // Phase #2. Pool-allocate heaps. + // Return the switched out heap to the pool and retrieve the oldest heap that is no + // longer used by GPU. This maintains a heap buffer to avoid frequently re-creating + // heaps for heavy users. + // TODO(dawn:256): Consider periodically triming to avoid OOM. + mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)}); + if (mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) { + descriptorHeap = std::move(mPool.front().heap); + mPool.pop_front(); + } + } } - // TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory - // should the developer not allocate any bindings for the heap type. - // Consider dynamically re-sizing GPU heaps. - const uint32_t descriptorCount = GetD3D12ShaderVisibleHeapSize( - mHeapType, mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting)); - if (descriptorHeap == nullptr) { - // The size in bytes of a descriptor heap is best calculated by the increment size - // multiplied by the number of descriptors. In practice, this is only an estimate and - // the actual size may vary depending on the driver. - const uint64_t kSize = mSizeIncrement * descriptorCount; - - DAWN_TRY( - mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local)); - - ComPtr d3d12DescriptorHeap; - D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor; - heapDescriptor.Type = mHeapType; - heapDescriptor.NumDescriptors = descriptorCount; - heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType); - heapDescriptor.NodeMask = 0; - DAWN_TRY( - CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap( - &heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)), - "ID3D12Device::CreateDescriptorHeap")); - descriptorHeap = std::make_unique( - std::move(d3d12DescriptorHeap), kSize); - // We must track the allocation in the LRU when it is created, otherwise the residency - // manager will see the allocation as non-resident in the later call to LockAllocation. - mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get()); + DAWN_TRY_ASSIGN(descriptorHeap, AllocateHeap(mDescriptorCount)); } DAWN_TRY(mDevice->GetResidencyManager()->LockAllocation(descriptorHeap.get())); + // Create a FIFO buffer from the recently created heap. mHeap = std::move(descriptorHeap); - mAllocator = RingBufferAllocator(descriptorCount); + mAllocator = RingBufferAllocator(mDescriptorCount); // Invalidate all bindgroup allocations on previously bound heaps by incrementing the heap // serial. When a bindgroup attempts to re-populate, it will compare with its recorded diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h index 564eb95e79..d93e57a073 100644 --- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h +++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h @@ -78,6 +78,9 @@ namespace dawn_native { namespace d3d12 { std::unique_ptr heap; }; + ResultOrError> AllocateHeap( + uint32_t descriptorCount) const; + std::unique_ptr mHeap; RingBufferAllocator mAllocator; std::list mPool; @@ -91,6 +94,10 @@ namespace dawn_native { namespace d3d12 { Serial mHeapSerial = 0; uint32_t mSizeIncrement; + + // The descriptor count is the current size of the heap in number of descriptors. + // This is stored on the allocator to avoid extra conversions. + uint32_t mDescriptorCount = 0; }; }} // namespace dawn_native::d3d12 diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp index 6750586c76..43a1179a40 100644 --- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp +++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp @@ -220,6 +220,10 @@ TEST_P(D3D12DescriptorHeapTests, NoSwitchOverSamplerHeap) { // Verify shader-visible heaps can be recycled for multiple submits. TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) { + // Use small heaps to count only pool-allocated switches. + DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled( + dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting)); + ShaderVisibleDescriptorAllocator* allocator = mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator(); @@ -253,6 +257,10 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) { // Verify shader-visible heaps do not recycle in a pending submit. TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) { + // Use small heaps to count only pool-allocated switches. + DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled( + dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting)); + constexpr uint32_t kNumOfSwitches = 5; ShaderVisibleDescriptorAllocator* allocator = @@ -280,6 +288,10 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) { // Verify switching shader-visible heaps do not recycle in a pending submit but do so // once no longer pending. TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) { + // Use small heaps to count only pool-allocated switches. + DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled( + dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting)); + constexpr uint32_t kNumOfSwitches = 5; ShaderVisibleDescriptorAllocator* allocator = @@ -319,6 +331,91 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) { EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfSwitches); } +// Verify shader-visible heaps do not recycle in multiple submits. +TEST_P(D3D12DescriptorHeapTests, GrowHeapsInMultipleSubmits) { + ShaderVisibleDescriptorAllocator* allocator = + mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator(); + + const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting(); + + std::set> heaps = {allocator->GetShaderVisibleHeap()}; + + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u); + + // Growth: Allocate + Tick() and ensure heaps are always unique. + while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeap(); + EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end()); + heaps.insert(heap); + mD3DDevice->Tick(); + } + + // Verify the number of switches equals the size of heaps allocated (minus the initial). + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u); + EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1); +} + +// Verify shader-visible heaps do not recycle in a pending submit. +TEST_P(D3D12DescriptorHeapTests, GrowHeapsInPendingSubmit) { + ShaderVisibleDescriptorAllocator* allocator = + mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator(); + + const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting(); + + std::set> heaps = {allocator->GetShaderVisibleHeap()}; + + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u); + + // Growth: Allocate new heaps. + while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeap(); + EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end()); + heaps.insert(heap); + } + + // Verify the number of switches equals the size of heaps allocated (minus the initial). + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u); + EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1); +} + +// Verify switching shader-visible heaps do not recycle in a pending submit but do so +// once no longer pending. +// Switches over many times until |kNumOfPooledHeaps| heaps are pool-allocated. +TEST_P(D3D12DescriptorHeapTests, GrowAndPoolHeapsInPendingAndMultipleSubmits) { + ShaderVisibleDescriptorAllocator* allocator = + mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator(); + + std::set> heaps = {allocator->GetShaderVisibleHeap()}; + + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u); + + uint32_t kNumOfPooledHeaps = 5; + while (allocator->GetShaderVisiblePoolSizeForTesting() < kNumOfPooledHeaps) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeap(); + EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end()); + heaps.insert(heap); + } + + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps); + + // Ensure switched-over heaps can be recycled by advancing the GPU by at-least |kFrameDepth|. + for (uint32_t i = 0; i < kFrameDepth; i++) { + mD3DDevice->Tick(); + } + + // Switch-over the pool-allocated heaps. + for (uint32_t i = 0; i < kNumOfPooledHeaps; i++) { + EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess()); + ComPtr heap = allocator->GetShaderVisibleHeap(); + EXPECT_FALSE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end()); + } + + EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps); +} + // Verify encoding multiple heaps worth of bindgroups. // Shader-visible heaps will switch out |kNumOfHeaps| times. TEST_P(D3D12DescriptorHeapTests, EncodeManyUBO) {