From 2479860e4bb0ef5a12d269557a088bace53f0f30 Mon Sep 17 00:00:00 2001 From: Bryan Bernhart Date: Fri, 3 Apr 2020 16:52:28 +0000 Subject: [PATCH] D3D12: Stage BindGroups on CPU descriptor heaps. Instead of directly populating GPU heaps, pre-encoded BindGroups are staged on CPU heaps then copied over to the GPU. Non-shader visible allocators are stored on the BGL, which hands out fixed-size chunks to simplify memory managment. To enable memory re-use, CPU allocations are tied to the lifetime of BindGroup objects. BUG=dawn:155 Change-Id: I402e6686c96f7450a077c627c8499600979e426c Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/18100 Commit-Queue: Bryan Bernhart Reviewed-by: Corentin Wallez --- BUILD.gn | 4 + src/common/Math.cpp | 8 + src/common/Math.h | 1 + src/dawn_native/CMakeLists.txt | 4 + src/dawn_native/d3d12/BindGroupD3D12.cpp | 161 +++++++++------- src/dawn_native/d3d12/BindGroupD3D12.h | 17 +- .../d3d12/BindGroupLayoutD3D12.cpp | 52 ++++- src/dawn_native/d3d12/BindGroupLayoutD3D12.h | 13 +- .../CPUDescriptorHeapAllocationD3D12.cpp | 48 +++++ .../d3d12/CPUDescriptorHeapAllocationD3D12.h | 45 +++++ ...nShaderVisibleDescriptorAllocatorD3D12.cpp | 137 ++++++++++++++ ...NonShaderVisibleDescriptorAllocatorD3D12.h | 78 ++++++++ src/tests/unittests/MathTests.cpp | 20 +- .../white_box/D3D12DescriptorHeapTests.cpp | 178 ++++++++++++++++++ 14 files changed, 686 insertions(+), 80 deletions(-) create mode 100644 src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.cpp create mode 100644 src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h create mode 100644 src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.cpp create mode 100644 src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h diff --git a/BUILD.gn b/BUILD.gn index ee3e71e5cf..542680d618 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -291,6 +291,8 @@ source_set("libdawn_native_sources") { "src/dawn_native/d3d12/BindGroupLayoutD3D12.h", "src/dawn_native/d3d12/BufferD3D12.cpp", "src/dawn_native/d3d12/BufferD3D12.h", + "src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.cpp", + "src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h", "src/dawn_native/d3d12/CommandAllocatorManager.cpp", "src/dawn_native/d3d12/CommandAllocatorManager.h", "src/dawn_native/d3d12/CommandBufferD3D12.cpp", @@ -316,6 +318,8 @@ source_set("libdawn_native_sources") { "src/dawn_native/d3d12/HeapD3D12.h", "src/dawn_native/d3d12/NativeSwapChainImplD3D12.cpp", "src/dawn_native/d3d12/NativeSwapChainImplD3D12.h", + "src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.cpp", + "src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h", "src/dawn_native/d3d12/PipelineLayoutD3D12.cpp", "src/dawn_native/d3d12/PipelineLayoutD3D12.h", "src/dawn_native/d3d12/PlatformFunctions.cpp", diff --git a/src/common/Math.cpp b/src/common/Math.cpp index 4471eb77aa..62807556f5 100644 --- a/src/common/Math.cpp +++ b/src/common/Math.cpp @@ -19,6 +19,7 @@ #include #include +#include #if defined(DAWN_COMPILER_MSVC) # include @@ -152,3 +153,10 @@ float SRGBToLinear(float srgb) { return std::pow((srgb + 0.055f) / 1.055f, 2.4f); } } + +uint64_t RoundUp(uint64_t n, uint64_t m) { + ASSERT(m > 0); + ASSERT(n > 0); + ASSERT(m <= std::numeric_limits::max() - n); + return ((n + m - 1) / m) * m; +} \ No newline at end of file diff --git a/src/common/Math.h b/src/common/Math.h index 5ee915ef73..db941f279a 100644 --- a/src/common/Math.h +++ b/src/common/Math.h @@ -29,6 +29,7 @@ uint32_t ScanForward(uint32_t bits); uint32_t Log2(uint32_t value); uint32_t Log2(uint64_t value); bool IsPowerOfTwo(uint64_t n); +uint64_t RoundUp(uint64_t n, uint64_t m); uint64_t NextPowerOfTwo(uint64_t n); bool IsPtrAligned(const void* ptr, size_t alignment); diff --git a/src/dawn_native/CMakeLists.txt b/src/dawn_native/CMakeLists.txt index 0e9bbf69dd..5d87c2596f 100644 --- a/src/dawn_native/CMakeLists.txt +++ b/src/dawn_native/CMakeLists.txt @@ -164,6 +164,8 @@ if (DAWN_ENABLE_D3D12) "d3d12/BindGroupLayoutD3D12.h" "d3d12/BufferD3D12.cpp" "d3d12/BufferD3D12.h" + "d3d12/CPUDescriptorHeapAllocationD3D12.cpp" + "d3d12/CPUDescriptorHeapAllocationD3D12.h" "d3d12/CommandAllocatorManager.cpp" "d3d12/CommandAllocatorManager.h" "d3d12/CommandBufferD3D12.cpp" @@ -189,6 +191,8 @@ if (DAWN_ENABLE_D3D12) "d3d12/HeapD3D12.h" "d3d12/NativeSwapChainImplD3D12.cpp" "d3d12/NativeSwapChainImplD3D12.h" + "d3d12/NonShaderVisibleDescriptorAllocatorD3D12.cpp" + "d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h" "d3d12/PipelineLayoutD3D12.cpp" "d3d12/PipelineLayoutD3D12.h" "d3d12/PlatformFunctions.cpp" diff --git a/src/dawn_native/d3d12/BindGroupD3D12.cpp b/src/dawn_native/d3d12/BindGroupD3D12.cpp index 3abae0ea62..950279eb5b 100644 --- a/src/dawn_native/d3d12/BindGroupD3D12.cpp +++ b/src/dawn_native/d3d12/BindGroupD3D12.cpp @@ -25,76 +25,37 @@ namespace dawn_native { namespace d3d12 { // static - BindGroup* BindGroup::Create(Device* device, const BindGroupDescriptor* descriptor) { + ResultOrError BindGroup::Create(Device* device, + const BindGroupDescriptor* descriptor) { return ToBackend(descriptor->layout)->AllocateBindGroup(device, descriptor); } - BindGroup::BindGroup(Device* device, const BindGroupDescriptor* descriptor) + BindGroup::BindGroup(Device* device, + const BindGroupDescriptor* descriptor, + uint32_t viewSizeIncrement, + const CPUDescriptorHeapAllocation& viewAllocation, + uint32_t samplerSizeIncrement, + const CPUDescriptorHeapAllocation& samplerAllocation) : BindGroupBase(this, device, descriptor) { - } + BindGroupLayout* bgl = ToBackend(GetLayout()); - BindGroup::~BindGroup() { - ToBackend(GetLayout())->DeallocateBindGroup(this); - } - - ResultOrError BindGroup::Populate(ShaderVisibleDescriptorAllocator* allocator) { - Device* device = ToBackend(GetDevice()); - - if (allocator->IsAllocationStillValid(mLastUsageSerial, mHeapSerial)) { - return true; - } - - // Attempt to allocate descriptors for the currently bound shader-visible heaps. - // If either failed, return early to re-allocate and switch the heaps. - const BindGroupLayout* bgl = ToBackend(GetLayout()); - const Serial pendingSerial = device->GetPendingCommandSerial(); - - const uint32_t cbvUavSrvDescriptorCount = bgl->GetCbvUavSrvDescriptorCount(); - DescriptorHeapAllocation cbvSrvUavDescriptorHeapAllocation; - if (cbvUavSrvDescriptorCount > 0) { - DAWN_TRY_ASSIGN( - cbvSrvUavDescriptorHeapAllocation, - allocator->AllocateGPUDescriptors(cbvUavSrvDescriptorCount, pendingSerial, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); - if (cbvSrvUavDescriptorHeapAllocation.IsInvalid()) { - return false; - } - - mBaseCbvSrvUavDescriptor = cbvSrvUavDescriptorHeapAllocation.GetGPUHandle(0); - } - - const uint32_t samplerDescriptorCount = bgl->GetSamplerDescriptorCount(); - DescriptorHeapAllocation samplerDescriptorHeapAllocation; - if (samplerDescriptorCount > 0) { - DAWN_TRY_ASSIGN(samplerDescriptorHeapAllocation, - allocator->AllocateGPUDescriptors(samplerDescriptorCount, pendingSerial, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); - if (samplerDescriptorHeapAllocation.IsInvalid()) { - return false; - } - - mBaseSamplerDescriptor = samplerDescriptorHeapAllocation.GetGPUHandle(0); - } - - // Record both the device and heap serials to determine later if the allocations are still - // valid. - mLastUsageSerial = pendingSerial; - mHeapSerial = allocator->GetShaderVisibleHeapsSerial(); + mCPUViewAllocation = viewAllocation; + mCPUSamplerAllocation = samplerAllocation; const auto& bindingOffsets = bgl->GetBindingOffsets(); ID3D12Device* d3d12Device = device->GetD3D12Device().Get(); - for (BindingIndex bindingIndex = 0; bindingIndex < bgl->GetBindingCount(); ++bindingIndex) { + // It's not necessary to create descriptors in the descriptor heap for dynamic resources. + // This is because they are created as root descriptors which are never heap allocated. + // Since dynamic buffers are packed in the front, we can skip over these bindings by + // starting from the dynamic buffer count. + for (BindingIndex bindingIndex = bgl->GetDynamicBufferCount(); + bindingIndex < bgl->GetBindingCount(); ++bindingIndex) { const BindingInfo& bindingInfo = bgl->GetBindingInfo(bindingIndex); - // It's not necessary to create descriptors in descriptor heap for dynamic - // resources. So skip allocating descriptors in descriptor heaps for dynamic - // buffers. - if (bindingInfo.hasDynamicOffset) { - continue; - } - + // Increment size does not need to be stored and is only used to get a handle + // local to the allocation with OffsetFrom(). switch (bindingInfo.type) { case wgpu::BindingType::UniformBuffer: { BufferBinding binding = GetBindingAsBufferBinding(bindingIndex); @@ -106,8 +67,8 @@ namespace dawn_native { namespace d3d12 { desc.BufferLocation = ToBackend(binding.buffer)->GetVA() + binding.offset; d3d12Device->CreateConstantBufferView( - &desc, cbvSrvUavDescriptorHeapAllocation.GetCPUHandle( - bindingOffsets[bindingIndex])); + &desc, + viewAllocation.OffsetFrom(viewSizeIncrement, bindingOffsets[bindingIndex])); break; } case wgpu::BindingType::StorageBuffer: { @@ -131,8 +92,7 @@ namespace dawn_native { namespace d3d12 { d3d12Device->CreateUnorderedAccessView( ToBackend(binding.buffer)->GetD3D12Resource().Get(), nullptr, &desc, - cbvSrvUavDescriptorHeapAllocation.GetCPUHandle( - bindingOffsets[bindingIndex])); + viewAllocation.OffsetFrom(viewSizeIncrement, bindingOffsets[bindingIndex])); break; } case wgpu::BindingType::ReadonlyStorageBuffer: { @@ -152,8 +112,7 @@ namespace dawn_native { namespace d3d12 { desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; d3d12Device->CreateShaderResourceView( ToBackend(binding.buffer)->GetD3D12Resource().Get(), &desc, - cbvSrvUavDescriptorHeapAllocation.GetCPUHandle( - bindingOffsets[bindingIndex])); + viewAllocation.OffsetFrom(viewSizeIncrement, bindingOffsets[bindingIndex])); break; } case wgpu::BindingType::SampledTexture: { @@ -161,16 +120,15 @@ namespace dawn_native { namespace d3d12 { auto& srv = view->GetSRVDescriptor(); d3d12Device->CreateShaderResourceView( ToBackend(view->GetTexture())->GetD3D12Resource(), &srv, - cbvSrvUavDescriptorHeapAllocation.GetCPUHandle( - bindingOffsets[bindingIndex])); + viewAllocation.OffsetFrom(viewSizeIncrement, bindingOffsets[bindingIndex])); break; } case wgpu::BindingType::Sampler: { auto* sampler = ToBackend(GetBindingAsSampler(bindingIndex)); auto& samplerDesc = sampler->GetSamplerDescriptor(); d3d12Device->CreateSampler( - &samplerDesc, - samplerDescriptorHeapAllocation.GetCPUHandle(bindingOffsets[bindingIndex])); + &samplerDesc, samplerAllocation.OffsetFrom(samplerSizeIncrement, + bindingOffsets[bindingIndex])); break; } @@ -183,12 +141,77 @@ namespace dawn_native { namespace d3d12 { // TODO(shaobo.yan@intel.com): Implement dynamic buffer offset. } } + } + + BindGroup::~BindGroup() { + ToBackend(GetLayout()) + ->DeallocateBindGroup(this, &mCPUViewAllocation, &mCPUSamplerAllocation); + ASSERT(!mCPUViewAllocation.IsValid()); + ASSERT(!mCPUSamplerAllocation.IsValid()); + } + + ResultOrError BindGroup::Populate(ShaderVisibleDescriptorAllocator* allocator) { + Device* device = ToBackend(GetDevice()); + + if (allocator->IsAllocationStillValid(mLastUsageSerial, mHeapSerial)) { + return true; + } + + // Attempt to allocate descriptors for the currently bound shader-visible heaps. + // If either failed, return early to re-allocate and switch the heaps. + const BindGroupLayout* bgl = ToBackend(GetLayout()); + const Serial pendingSerial = device->GetPendingCommandSerial(); + + ID3D12Device* d3d12Device = device->GetD3D12Device().Get(); + + // CPU bindgroups are sparsely allocated across CPU heaps. Instead of doing + // simple copies per bindgroup, a single non-simple copy could be issued. + // TODO(dawn:155): Consider doing this optimization. + const uint32_t viewDescriptorCount = bgl->GetCbvUavSrvDescriptorCount(); + if (viewDescriptorCount > 0) { + DescriptorHeapAllocation viewDescriptorHeapAllocation; + DAWN_TRY_ASSIGN( + viewDescriptorHeapAllocation, + allocator->AllocateGPUDescriptors(viewDescriptorCount, pendingSerial, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); + if (viewDescriptorHeapAllocation.IsInvalid()) { + return false; + } + + d3d12Device->CopyDescriptorsSimple( + viewDescriptorCount, viewDescriptorHeapAllocation.GetCPUHandle(0), + mCPUViewAllocation.OffsetFrom(0, 0), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + mBaseViewDescriptor = viewDescriptorHeapAllocation.GetGPUHandle(0); + } + + const uint32_t samplerDescriptorCount = bgl->GetSamplerDescriptorCount(); + if (samplerDescriptorCount > 0) { + DescriptorHeapAllocation samplerDescriptorHeapAllocation; + DAWN_TRY_ASSIGN(samplerDescriptorHeapAllocation, + allocator->AllocateGPUDescriptors(samplerDescriptorCount, pendingSerial, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); + if (samplerDescriptorHeapAllocation.IsInvalid()) { + return false; + } + + d3d12Device->CopyDescriptorsSimple( + samplerDescriptorCount, samplerDescriptorHeapAllocation.GetCPUHandle(0), + mCPUSamplerAllocation.OffsetFrom(0, 0), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + mBaseSamplerDescriptor = samplerDescriptorHeapAllocation.GetGPUHandle(0); + } + + // Record both the device and heap serials to determine later if the allocations are still + // valid. + mLastUsageSerial = pendingSerial; + mHeapSerial = allocator->GetShaderVisibleHeapsSerial(); return true; } D3D12_GPU_DESCRIPTOR_HANDLE BindGroup::GetBaseCbvUavSrvDescriptor() const { - return mBaseCbvSrvUavDescriptor; + return mBaseViewDescriptor; } D3D12_GPU_DESCRIPTOR_HANDLE BindGroup::GetBaseSamplerDescriptor() const { diff --git a/src/dawn_native/d3d12/BindGroupD3D12.h b/src/dawn_native/d3d12/BindGroupD3D12.h index dc5fb75665..7ec0c8f0bf 100644 --- a/src/dawn_native/d3d12/BindGroupD3D12.h +++ b/src/dawn_native/d3d12/BindGroupD3D12.h @@ -18,7 +18,7 @@ #include "common/PlacementAllocated.h" #include "common/Serial.h" #include "dawn_native/BindGroup.h" -#include "dawn_native/d3d12/d3d12_platform.h" +#include "dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h" namespace dawn_native { namespace d3d12 { @@ -27,9 +27,15 @@ namespace dawn_native { namespace d3d12 { class BindGroup : public BindGroupBase, public PlacementAllocated { public: - static BindGroup* Create(Device* device, const BindGroupDescriptor* descriptor); + static ResultOrError Create(Device* device, + const BindGroupDescriptor* descriptor); - BindGroup(Device* device, const BindGroupDescriptor* descriptor); + BindGroup(Device* device, + const BindGroupDescriptor* descriptor, + uint32_t viewSizeIncrement, + const CPUDescriptorHeapAllocation& viewAllocation, + uint32_t samplerSizeIncrement, + const CPUDescriptorHeapAllocation& samplerAllocation); ~BindGroup() override; // Returns true if the BindGroup was successfully populated. @@ -42,8 +48,11 @@ namespace dawn_native { namespace d3d12 { Serial mLastUsageSerial = 0; Serial mHeapSerial = 0; - D3D12_GPU_DESCRIPTOR_HANDLE mBaseCbvSrvUavDescriptor = {0}; + D3D12_GPU_DESCRIPTOR_HANDLE mBaseViewDescriptor = {0}; D3D12_GPU_DESCRIPTOR_HANDLE mBaseSamplerDescriptor = {0}; + + CPUDescriptorHeapAllocation mCPUSamplerAllocation; + CPUDescriptorHeapAllocation mCPUViewAllocation; }; }} // namespace dawn_native::d3d12 diff --git a/src/dawn_native/d3d12/BindGroupLayoutD3D12.cpp b/src/dawn_native/d3d12/BindGroupLayoutD3D12.cpp index 0def96c8ad..0ee36d1071 100644 --- a/src/dawn_native/d3d12/BindGroupLayoutD3D12.cpp +++ b/src/dawn_native/d3d12/BindGroupLayoutD3D12.cpp @@ -17,6 +17,7 @@ #include "common/BitSetIterator.h" #include "dawn_native/d3d12/BindGroupD3D12.h" #include "dawn_native/d3d12/DeviceD3D12.h" +#include "dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h" namespace dawn_native { namespace d3d12 { namespace { @@ -41,6 +42,9 @@ namespace dawn_native { namespace d3d12 { } } // anonymous namespace + // TODO(dawn:155): Figure out this value. + static constexpr uint16_t kDescriptorHeapSize = 1024; + BindGroupLayout::BindGroupLayout(Device* device, const BindGroupLayoutDescriptor* descriptor) : BindGroupLayoutBase(device, descriptor), mDescriptorCounts{}, @@ -128,14 +132,54 @@ namespace dawn_native { namespace d3d12 { DescriptorType descriptorType = WGPUBindingTypeToDescriptorType(bindingInfo.type); mBindingOffsets[bindingIndex] += descriptorOffsets[descriptorType]; } + + const uint32_t viewDescriptorCount = GetCbvUavSrvDescriptorCount(); + if (viewDescriptorCount > 0) { + mViewAllocator = std::make_unique( + device, viewDescriptorCount, kDescriptorHeapSize, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + const uint32_t samplerDescriptorCount = GetSamplerDescriptorCount(); + if (samplerDescriptorCount > 0) { + mSamplerAllocator = std::make_unique( + device, samplerDescriptorCount, kDescriptorHeapSize, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + } } - BindGroup* BindGroupLayout::AllocateBindGroup(Device* device, - const BindGroupDescriptor* descriptor) { - return mBindGroupAllocator.Allocate(device, descriptor); + ResultOrError BindGroupLayout::AllocateBindGroup( + Device* device, + const BindGroupDescriptor* descriptor) { + uint32_t viewSizeIncrement = 0; + CPUDescriptorHeapAllocation viewAllocation; + if (GetCbvUavSrvDescriptorCount() > 0) { + DAWN_TRY_ASSIGN(viewAllocation, mViewAllocator->AllocateCPUDescriptors()); + viewSizeIncrement = mViewAllocator->GetSizeIncrement(); + } + + uint32_t samplerSizeIncrement = 0; + CPUDescriptorHeapAllocation samplerAllocation; + if (GetSamplerDescriptorCount() > 0) { + DAWN_TRY_ASSIGN(samplerAllocation, mSamplerAllocator->AllocateCPUDescriptors()); + samplerSizeIncrement = mSamplerAllocator->GetSizeIncrement(); + } + + return mBindGroupAllocator.Allocate(device, descriptor, viewSizeIncrement, viewAllocation, + samplerSizeIncrement, samplerAllocation); } - void BindGroupLayout::DeallocateBindGroup(BindGroup* bindGroup) { + void BindGroupLayout::DeallocateBindGroup(BindGroup* bindGroup, + CPUDescriptorHeapAllocation* viewAllocation, + CPUDescriptorHeapAllocation* samplerAllocation) { + if (viewAllocation->IsValid()) { + mViewAllocator->Deallocate(viewAllocation); + } + + if (samplerAllocation->IsValid()) { + mSamplerAllocator->Deallocate(samplerAllocation); + } + mBindGroupAllocator.Deallocate(bindGroup); } diff --git a/src/dawn_native/d3d12/BindGroupLayoutD3D12.h b/src/dawn_native/d3d12/BindGroupLayoutD3D12.h index 7d393ec65d..f91b71238d 100644 --- a/src/dawn_native/d3d12/BindGroupLayoutD3D12.h +++ b/src/dawn_native/d3d12/BindGroupLayoutD3D12.h @@ -24,13 +24,18 @@ namespace dawn_native { namespace d3d12 { class BindGroup; class Device; + class NonShaderVisibleDescriptorAllocator; + class CPUDescriptorHeapAllocation; class BindGroupLayout : public BindGroupLayoutBase { public: BindGroupLayout(Device* device, const BindGroupLayoutDescriptor* descriptor); - BindGroup* AllocateBindGroup(Device* device, const BindGroupDescriptor* descriptor); - void DeallocateBindGroup(BindGroup* bindGroup); + ResultOrError AllocateBindGroup(Device* device, + const BindGroupDescriptor* descriptor); + void DeallocateBindGroup(BindGroup* bindGroup, + CPUDescriptorHeapAllocation* viewAllocation, + CPUDescriptorHeapAllocation* samplerAllocation); enum DescriptorType { CBV, @@ -54,6 +59,10 @@ namespace dawn_native { namespace d3d12 { D3D12_DESCRIPTOR_RANGE mRanges[DescriptorType::Count]; SlabAllocator mBindGroupAllocator; + + // TODO(dawn:155): Store and bucket allocators by size on the device. + std::unique_ptr mSamplerAllocator; + std::unique_ptr mViewAllocator; }; }} // namespace dawn_native::d3d12 diff --git a/src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.cpp b/src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.cpp new file mode 100644 index 0000000000..635b25ebc7 --- /dev/null +++ b/src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.cpp @@ -0,0 +1,48 @@ +// Copyright 2020 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h" +#include "dawn_native/Error.h" + +namespace dawn_native { namespace d3d12 { + + CPUDescriptorHeapAllocation::CPUDescriptorHeapAllocation( + D3D12_CPU_DESCRIPTOR_HANDLE baseDescriptor, + uint32_t heapIndex) + : mBaseDescriptor(baseDescriptor), mHeapIndex(heapIndex) { + } + + D3D12_CPU_DESCRIPTOR_HANDLE CPUDescriptorHeapAllocation::OffsetFrom( + uint32_t sizeIncrementInBytes, + uint32_t offsetInDescriptorCount) const { + ASSERT(IsValid()); + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = mBaseDescriptor; + cpuHandle.ptr += sizeIncrementInBytes * offsetInDescriptorCount; + return cpuHandle; + } + + uint32_t CPUDescriptorHeapAllocation::GetHeapIndex() const { + ASSERT(mHeapIndex >= 0); + return mHeapIndex; + } + + bool CPUDescriptorHeapAllocation::IsValid() const { + return mBaseDescriptor.ptr != 0; + } + + void CPUDescriptorHeapAllocation::Invalidate() { + mBaseDescriptor = {0}; + } + +}} // namespace dawn_native::d3d12 \ No newline at end of file diff --git a/src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h b/src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h new file mode 100644 index 0000000000..560c99871e --- /dev/null +++ b/src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h @@ -0,0 +1,45 @@ +// Copyright 2020 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DAWNNATIVE_D3D12_CPUDESCRIPTORHEAPALLOCATION_H_ +#define DAWNNATIVE_D3D12_CPUDESCRIPTORHEAPALLOCATION_H_ + +#include + +#include "dawn_native/d3d12/d3d12_platform.h" + +namespace dawn_native { namespace d3d12 { + + // Wrapper for a handle into a CPU-only descriptor heap. + class CPUDescriptorHeapAllocation { + public: + CPUDescriptorHeapAllocation() = default; + CPUDescriptorHeapAllocation(D3D12_CPU_DESCRIPTOR_HANDLE baseDescriptor, uint32_t heapIndex); + + D3D12_CPU_DESCRIPTOR_HANDLE OffsetFrom(uint32_t sizeIncrementInBytes, + uint32_t offsetInDescriptorCount) const; + uint32_t GetHeapIndex() const; + + bool IsValid() const; + + void Invalidate(); + + private: + D3D12_CPU_DESCRIPTOR_HANDLE mBaseDescriptor = {0}; + uint32_t mHeapIndex = -1; + }; + +}} // namespace dawn_native::d3d12 + +#endif // DAWNNATIVE_D3D12_CPUDESCRIPTORHEAPALLOCATION_H_ \ No newline at end of file diff --git a/src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.cpp b/src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.cpp new file mode 100644 index 0000000000..0bb60f7b3d --- /dev/null +++ b/src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.cpp @@ -0,0 +1,137 @@ +// Copyright 2020 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common/Math.h" + +#include "dawn_native/d3d12/D3D12Error.h" +#include "dawn_native/d3d12/DeviceD3D12.h" +#include "dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h" + +namespace dawn_native { namespace d3d12 { + + NonShaderVisibleDescriptorAllocator::NonShaderVisibleDescriptorAllocator( + Device* device, + uint32_t descriptorCount, + uint32_t heapSize, + D3D12_DESCRIPTOR_HEAP_TYPE heapType) + : mDevice(device), + mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)), + mBlockSize(descriptorCount * mSizeIncrement), + mHeapSize(RoundUp(heapSize, descriptorCount)), + mHeapType(heapType) { + ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || + heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + ASSERT(descriptorCount <= heapSize); + } + + NonShaderVisibleDescriptorAllocator::~NonShaderVisibleDescriptorAllocator() { + const Index freeBlockIndicesSize = GetFreeBlockIndicesSize(); + for (auto& buffer : mPool) { + ASSERT(buffer.freeBlockIndices.size() == freeBlockIndicesSize); + } + ASSERT(mAvailableHeaps.size() == mPool.size()); + } + + ResultOrError + NonShaderVisibleDescriptorAllocator::AllocateCPUDescriptors() { + if (mAvailableHeaps.empty()) { + DAWN_TRY(AllocateCPUHeap()); + } + + ASSERT(!mAvailableHeaps.empty()); + + const uint32_t heapIndex = mAvailableHeaps.back(); + NonShaderVisibleBuffer& buffer = mPool[heapIndex]; + + ASSERT(!buffer.freeBlockIndices.empty()); + + const Index blockIndex = buffer.freeBlockIndices.back(); + + buffer.freeBlockIndices.pop_back(); + + if (buffer.freeBlockIndices.empty()) { + mAvailableHeaps.pop_back(); + } + + const D3D12_CPU_DESCRIPTOR_HANDLE baseCPUDescriptor = { + buffer.heap->GetCPUDescriptorHandleForHeapStart().ptr + (blockIndex * mBlockSize)}; + + return CPUDescriptorHeapAllocation{baseCPUDescriptor, heapIndex}; + } + + MaybeError NonShaderVisibleDescriptorAllocator::AllocateCPUHeap() { + D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor; + heapDescriptor.Type = mHeapType; + heapDescriptor.NumDescriptors = mHeapSize; + heapDescriptor.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + heapDescriptor.NodeMask = 0; + + ComPtr heap; + DAWN_TRY(CheckHRESULT( + mDevice->GetD3D12Device()->CreateDescriptorHeap(&heapDescriptor, IID_PPV_ARGS(&heap)), + "ID3D12Device::CreateDescriptorHeap")); + + NonShaderVisibleBuffer newBuffer; + newBuffer.heap = std::move(heap); + + const Index freeBlockIndicesSize = GetFreeBlockIndicesSize(); + newBuffer.freeBlockIndices.reserve(freeBlockIndicesSize); + + for (Index blockIndex = 0; blockIndex < freeBlockIndicesSize; blockIndex++) { + newBuffer.freeBlockIndices.push_back(blockIndex); + } + + mAvailableHeaps.push_back(mPool.size()); + mPool.emplace_back(std::move(newBuffer)); + + return {}; + } + + void NonShaderVisibleDescriptorAllocator::Deallocate(CPUDescriptorHeapAllocation* allocation) { + const uint32_t heapIndex = allocation->GetHeapIndex(); + + ASSERT(heapIndex < mPool.size()); + + // Insert the deallocated block back into the free-list. Order does not matter. However, + // having blocks be non-contigious could slow down future allocations due to poor cache + // locality. + // TODO(dawn:155): Consider more optimization. + std::vector& freeBlockIndices = mPool[heapIndex].freeBlockIndices; + if (freeBlockIndices.empty()) { + mAvailableHeaps.emplace_back(heapIndex); + } + + const D3D12_CPU_DESCRIPTOR_HANDLE heapStart = + mPool[heapIndex].heap->GetCPUDescriptorHandleForHeapStart(); + + const D3D12_CPU_DESCRIPTOR_HANDLE baseDescriptor = allocation->OffsetFrom(0, 0); + + const Index blockIndex = (baseDescriptor.ptr - heapStart.ptr) / mBlockSize; + + freeBlockIndices.emplace_back(blockIndex); + + // Invalidate the handle in case the developer accidentally uses it again. + allocation->Invalidate(); + } + + uint32_t NonShaderVisibleDescriptorAllocator::GetSizeIncrement() const { + return mSizeIncrement; + } + + NonShaderVisibleDescriptorAllocator::Index + NonShaderVisibleDescriptorAllocator::GetFreeBlockIndicesSize() const { + return ((mHeapSize * mSizeIncrement) / mBlockSize); + } + +}} // namespace dawn_native::d3d12 \ No newline at end of file diff --git a/src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h b/src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h new file mode 100644 index 0000000000..8cbb7e213c --- /dev/null +++ b/src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h @@ -0,0 +1,78 @@ +// Copyright 2020 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DAWNNATIVE_D3D12_NONSHADERVISIBLEDESCRIPTORALLOCATOR_H_ +#define DAWNNATIVE_D3D12_NONSHADERVISIBLEDESCRIPTORALLOCATOR_H_ + +#include "dawn_native/Error.h" + +#include "dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h" + +#include + +// |NonShaderVisibleDescriptorAllocator| allocates a fixed-size block of descriptors from a CPU +// descriptor heap pool. +// Internally, it manages a list of heaps using a fixed-size block allocator. The fixed-size +// block allocator is backed by a list of free blocks (free-list). The heap is in one of two +// states: AVAILABLE or not. To allocate, the next free block is removed from the free-list +// and the corresponding heap offset is returned. The AVAILABLE heap always has room for +// at-least one free block. If no AVAILABLE heap exists, a new heap is created and inserted +// back into the pool to be immediately used. To deallocate, the block corresponding to the +// offset is inserted back into the free-list. +namespace dawn_native { namespace d3d12 { + + class Device; + + class NonShaderVisibleDescriptorAllocator { + public: + NonShaderVisibleDescriptorAllocator() = default; + NonShaderVisibleDescriptorAllocator(Device* device, + uint32_t descriptorCount, + uint32_t heapSize, + D3D12_DESCRIPTOR_HEAP_TYPE heapType); + ~NonShaderVisibleDescriptorAllocator(); + + ResultOrError AllocateCPUDescriptors(); + + void Deallocate(CPUDescriptorHeapAllocation* allocation); + + uint32_t GetSizeIncrement() const; + + private: + using Index = uint16_t; + + struct NonShaderVisibleBuffer { + ComPtr heap; + std::vector freeBlockIndices; + }; + + MaybeError AllocateCPUHeap(); + + Index GetFreeBlockIndicesSize() const; + + std::vector mAvailableHeaps; // Indices into the pool. + std::vector mPool; + + Device* mDevice; + + uint32_t mSizeIncrement; // Size of the descriptor (in bytes). + uint32_t mBlockSize; // Size of the block of descriptors (in bytes). + uint32_t mHeapSize; // Size of the heap (in number of descriptors). + + D3D12_DESCRIPTOR_HEAP_TYPE mHeapType; + }; + +}} // namespace dawn_native::d3d12 + +#endif // DAWNNATIVE_D3D12_NONSHADERVISIBLEDESCRIPTORALLOCATOR_H_ \ No newline at end of file diff --git a/src/tests/unittests/MathTests.cpp b/src/tests/unittests/MathTests.cpp index 7bcaf60b18..a553f7b417 100644 --- a/src/tests/unittests/MathTests.cpp +++ b/src/tests/unittests/MathTests.cpp @@ -82,7 +82,7 @@ TEST(Math, AlignPtr) { ASSERT_GE(aligned - unaligned, 0); ASSERT_LT(static_cast(aligned - unaligned), kTestAlignment); - ASSERT_EQ(reinterpret_cast(aligned) & (kTestAlignment -1), 0u); + ASSERT_EQ(reinterpret_cast(aligned) & (kTestAlignment - 1), 0u); } } @@ -191,3 +191,21 @@ TEST(Math, SRGBToLinear) { ASSERT_FLOAT_EQ(SRGBToLinear(0.5f), 0.21404114f); } + +// Tests for RoundUp +TEST(Math, RoundUp) { + ASSERT_EQ(RoundUp(2, 2), 2u); + ASSERT_EQ(RoundUp(2, 4), 4u); + ASSERT_EQ(RoundUp(6, 2), 6u); + ASSERT_EQ(RoundUp(8, 4), 8u); + ASSERT_EQ(RoundUp(12, 6), 12u); + + ASSERT_EQ(RoundUp(3, 3), 3u); + ASSERT_EQ(RoundUp(3, 5), 5u); + ASSERT_EQ(RoundUp(5, 3), 6u); + ASSERT_EQ(RoundUp(9, 5), 10u); + + // Test extrema + ASSERT_EQ(RoundUp(0x7FFFFFFFFFFFFFFFull, 0x8000000000000000ull), 0x8000000000000000ull); + ASSERT_EQ(RoundUp(1, 1), 1u); +} diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp index 4431f01485..eeb4412326 100644 --- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp +++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp @@ -15,7 +15,9 @@ #include "tests/DawnTest.h" #include "dawn_native/Toggles.h" +#include "dawn_native/d3d12/BindGroupLayoutD3D12.h" #include "dawn_native/d3d12/DeviceD3D12.h" +#include "dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h" #include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h" #include "utils/ComboRenderPipelineDescriptor.h" #include "utils/WGPUHelpers.h" @@ -93,6 +95,31 @@ class D3D12DescriptorHeapTests : public DawnTest { wgpu::ShaderModule mSimpleFSModule; }; +class DummyNonShaderVisibleDescriptorAllocator { + public: + DummyNonShaderVisibleDescriptorAllocator(Device* device, + uint32_t descriptorCount, + uint32_t allocationsPerHeap) + : mAllocator(device, + descriptorCount, + allocationsPerHeap * descriptorCount, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) { + } + + CPUDescriptorHeapAllocation AllocateCPUDescriptors() { + dawn_native::ResultOrError result = + mAllocator.AllocateCPUDescriptors(); + return (result.IsSuccess()) ? result.AcquireSuccess() : CPUDescriptorHeapAllocation{}; + } + + void Deallocate(CPUDescriptorHeapAllocation& allocation) { + mAllocator.Deallocate(&allocation); + } + + private: + NonShaderVisibleDescriptorAllocator mAllocator; +}; + // Verify the shader visible heaps switch over within a single submit. TEST_P(D3D12DescriptorHeapTests, SwitchOverHeaps) { utils::ComboRenderPipelineDescriptor renderPipelineDescriptor(device); @@ -688,6 +715,157 @@ TEST_P(D3D12DescriptorHeapTests, EncodeManyUBOAndSamplers) { } } +// Verify a single allocate/deallocate. +// One non-shader visible heap will be created. +TEST_P(D3D12DescriptorHeapTests, Single) { + constexpr uint32_t kDescriptorCount = 4; + constexpr uint32_t kAllocationsPerHeap = 3; + DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount, + kAllocationsPerHeap); + + CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors(); + EXPECT_EQ(allocation.GetHeapIndex(), 0u); + EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u); + + allocator.Deallocate(allocation); + EXPECT_FALSE(allocation.IsValid()); +} + +// Verify allocating many times causes the pool to increase in size. +// Creates |kNumOfHeaps| non-shader visible heaps. +TEST_P(D3D12DescriptorHeapTests, Sequential) { + constexpr uint32_t kDescriptorCount = 4; + constexpr uint32_t kAllocationsPerHeap = 3; + DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount, + kAllocationsPerHeap); + + // Allocate |kNumOfHeaps| worth. + constexpr uint32_t kNumOfHeaps = 2; + + std::set allocatedHeaps; + + std::vector allocations; + for (uint32_t i = 0; i < kAllocationsPerHeap * kNumOfHeaps; i++) { + CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors(); + EXPECT_EQ(allocation.GetHeapIndex(), i / kAllocationsPerHeap); + EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u); + allocations.push_back(allocation); + allocatedHeaps.insert(allocation.GetHeapIndex()); + } + + EXPECT_EQ(allocatedHeaps.size(), kNumOfHeaps); + + // Deallocate all. + for (CPUDescriptorHeapAllocation& allocation : allocations) { + allocator.Deallocate(allocation); + EXPECT_FALSE(allocation.IsValid()); + } +} + +// Verify that re-allocating a number of allocations < pool size, all heaps are reused. +// Creates and reuses |kNumofHeaps| non-shader visible heaps. +TEST_P(D3D12DescriptorHeapTests, ReuseFreedHeaps) { + constexpr uint32_t kDescriptorCount = 4; + constexpr uint32_t kAllocationsPerHeap = 25; + DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount, + kAllocationsPerHeap); + + constexpr uint32_t kNumofHeaps = 10; + + std::list allocations; + std::set allocationPtrs; + + // Allocate |kNumofHeaps| heaps worth. + for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) { + CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors(); + allocations.push_back(allocation); + EXPECT_TRUE(allocationPtrs.insert(allocation.OffsetFrom(0, 0).ptr).second); + } + + // Deallocate all. + for (CPUDescriptorHeapAllocation& allocation : allocations) { + allocator.Deallocate(allocation); + EXPECT_FALSE(allocation.IsValid()); + } + + allocations.clear(); + + // Re-allocate all again. + std::set reallocatedPtrs; + for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) { + CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors(); + allocations.push_back(allocation); + EXPECT_TRUE(reallocatedPtrs.insert(allocation.OffsetFrom(0, 0).ptr).second); + EXPECT_TRUE(std::find(allocationPtrs.begin(), allocationPtrs.end(), + allocation.OffsetFrom(0, 0).ptr) != allocationPtrs.end()); + } + + // Deallocate all again. + for (CPUDescriptorHeapAllocation& allocation : allocations) { + allocator.Deallocate(allocation); + EXPECT_FALSE(allocation.IsValid()); + } +} + +// Verify allocating then deallocating many times. +TEST_P(D3D12DescriptorHeapTests, AllocateDeallocateMany) { + constexpr uint32_t kDescriptorCount = 4; + constexpr uint32_t kAllocationsPerHeap = 25; + DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount, + kAllocationsPerHeap); + + std::list list3; + std::list list5; + std::list allocations; + + constexpr uint32_t kNumofHeaps = 2; + + // Allocate |kNumofHeaps| heaps worth. + for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) { + CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors(); + EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u); + if (i % 3 == 0) { + list3.push_back(allocation); + } else { + allocations.push_back(allocation); + } + } + + // Deallocate every 3rd allocation. + for (auto it = list3.begin(); it != list3.end(); it = list3.erase(it)) { + allocator.Deallocate(*it); + } + + // Allocate again. + for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) { + CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors(); + EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u); + if (i % 5 == 0) { + list5.push_back(allocation); + } else { + allocations.push_back(allocation); + } + } + + // Deallocate every 5th allocation. + for (auto it = list5.begin(); it != list5.end(); it = list5.erase(it)) { + allocator.Deallocate(*it); + } + + // Allocate again. + for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) { + CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors(); + EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u); + allocations.push_back(allocation); + } + + // Deallocate remaining. + for (CPUDescriptorHeapAllocation& allocation : allocations) { + allocator.Deallocate(allocation); + EXPECT_FALSE(allocation.IsValid()); + } +} + DAWN_INSTANTIATE_TEST(D3D12DescriptorHeapTests, D3D12Backend(), D3D12Backend({"use_d3d12_small_shader_visible_heap"}));