D3D12: De-duplicate sampler heap allocations.

Allows bindgroups that use the same samplers to share
a descriptor heap allocation. This is particularly important
for sampler heaps which incur expensive pipeline flushes
due to the smaller size requiring more frequent switches.

The device dolls out entries to a sampler heap allocation cache.
When the BindGroup is created, it does a lookup and refs the
allocation. This ensures the cache does not grow unbounded
or needlessly store unused entires.

This change is a follow-up of de-coupling heaps.

BUG=dawn:155

Change-Id: I3ab6f1bdb13a40905cb990cd7a2139e73da30303
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/20783
Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Bryan Bernhart 2020-05-18 23:25:31 +00:00 committed by Commit Bot service account
parent 275a95c1dc
commit e25ee25e77
14 changed files with 408 additions and 78 deletions

View File

@ -328,6 +328,8 @@ source_set("dawn_native_sources") {
"d3d12/ResourceHeapAllocationD3D12.h", "d3d12/ResourceHeapAllocationD3D12.h",
"d3d12/SamplerD3D12.cpp", "d3d12/SamplerD3D12.cpp",
"d3d12/SamplerD3D12.h", "d3d12/SamplerD3D12.h",
"d3d12/SamplerHeapCacheD3D12.cpp",
"d3d12/SamplerHeapCacheD3D12.h",
"d3d12/ShaderModuleD3D12.cpp", "d3d12/ShaderModuleD3D12.cpp",
"d3d12/ShaderModuleD3D12.h", "d3d12/ShaderModuleD3D12.h",
"d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp", "d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp",

View File

@ -297,6 +297,11 @@ namespace dawn_native {
return mLayout.Get(); return mLayout.Get();
} }
const BindGroupLayoutBase* BindGroupBase::GetLayout() const {
ASSERT(!IsError());
return mLayout.Get();
}
BufferBinding BindGroupBase::GetBindingAsBufferBinding(BindingIndex bindingIndex) { BufferBinding BindGroupBase::GetBindingAsBufferBinding(BindingIndex bindingIndex) {
ASSERT(!IsError()); ASSERT(!IsError());
ASSERT(bindingIndex < mLayout->GetBindingCount()); ASSERT(bindingIndex < mLayout->GetBindingCount());
@ -309,7 +314,7 @@ namespace dawn_native {
mBindingData.bufferData[bindingIndex].size}; mBindingData.bufferData[bindingIndex].size};
} }
SamplerBase* BindGroupBase::GetBindingAsSampler(BindingIndex bindingIndex) { SamplerBase* BindGroupBase::GetBindingAsSampler(BindingIndex bindingIndex) const {
ASSERT(!IsError()); ASSERT(!IsError());
ASSERT(bindingIndex < mLayout->GetBindingCount()); ASSERT(bindingIndex < mLayout->GetBindingCount());
ASSERT(mLayout->GetBindingInfo(bindingIndex).type == wgpu::BindingType::Sampler || ASSERT(mLayout->GetBindingInfo(bindingIndex).type == wgpu::BindingType::Sampler ||

View File

@ -44,8 +44,9 @@ namespace dawn_native {
static BindGroupBase* MakeError(DeviceBase* device); static BindGroupBase* MakeError(DeviceBase* device);
BindGroupLayoutBase* GetLayout(); BindGroupLayoutBase* GetLayout();
const BindGroupLayoutBase* GetLayout() const;
BufferBinding GetBindingAsBufferBinding(BindingIndex bindingIndex); BufferBinding GetBindingAsBufferBinding(BindingIndex bindingIndex);
SamplerBase* GetBindingAsSampler(BindingIndex bindingIndex); SamplerBase* GetBindingAsSampler(BindingIndex bindingIndex) const;
TextureViewBase* GetBindingAsTextureView(BindingIndex bindingIndex); TextureViewBase* GetBindingAsTextureView(BindingIndex bindingIndex);
protected: protected:

View File

@ -209,6 +209,8 @@ if (DAWN_ENABLE_D3D12)
"d3d12/ResourceHeapAllocationD3D12.h" "d3d12/ResourceHeapAllocationD3D12.h"
"d3d12/SamplerD3D12.cpp" "d3d12/SamplerD3D12.cpp"
"d3d12/SamplerD3D12.h" "d3d12/SamplerD3D12.h"
"d3d12/SamplerHeapCacheD3D12.cpp"
"d3d12/SamplerHeapCacheD3D12.h"
"d3d12/ShaderModuleD3D12.cpp" "d3d12/ShaderModuleD3D12.cpp"
"d3d12/ShaderModuleD3D12.h" "d3d12/ShaderModuleD3D12.h"
"d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp" "d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp"

View File

@ -18,7 +18,7 @@
#include "dawn_native/d3d12/BindGroupLayoutD3D12.h" #include "dawn_native/d3d12/BindGroupLayoutD3D12.h"
#include "dawn_native/d3d12/BufferD3D12.h" #include "dawn_native/d3d12/BufferD3D12.h"
#include "dawn_native/d3d12/DeviceD3D12.h" #include "dawn_native/d3d12/DeviceD3D12.h"
#include "dawn_native/d3d12/SamplerD3D12.h" #include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h" #include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
#include "dawn_native/d3d12/TextureD3D12.h" #include "dawn_native/d3d12/TextureD3D12.h"
@ -33,14 +33,11 @@ namespace dawn_native { namespace d3d12 {
BindGroup::BindGroup(Device* device, BindGroup::BindGroup(Device* device,
const BindGroupDescriptor* descriptor, const BindGroupDescriptor* descriptor,
uint32_t viewSizeIncrement, uint32_t viewSizeIncrement,
const CPUDescriptorHeapAllocation& viewAllocation, const CPUDescriptorHeapAllocation& viewAllocation)
uint32_t samplerSizeIncrement,
const CPUDescriptorHeapAllocation& samplerAllocation)
: BindGroupBase(this, device, descriptor) { : BindGroupBase(this, device, descriptor) {
BindGroupLayout* bgl = ToBackend(GetLayout()); BindGroupLayout* bgl = ToBackend(GetLayout());
mCPUViewAllocation = viewAllocation; mCPUViewAllocation = viewAllocation;
mCPUSamplerAllocation = samplerAllocation;
const auto& bindingOffsets = bgl->GetBindingOffsets(); const auto& bindingOffsets = bgl->GetBindingOffsets();
@ -129,11 +126,7 @@ namespace dawn_native { namespace d3d12 {
} }
case wgpu::BindingType::Sampler: case wgpu::BindingType::Sampler:
case wgpu::BindingType::ComparisonSampler: { case wgpu::BindingType::ComparisonSampler: {
auto* sampler = ToBackend(GetBindingAsSampler(bindingIndex)); // No-op as samplers will be later initialized by CreateSamplers().
auto& samplerDesc = sampler->GetSamplerDescriptor();
d3d12Device->CreateSampler(
&samplerDesc, samplerAllocation.OffsetFrom(samplerSizeIncrement,
bindingOffsets[bindingIndex]));
break; break;
} }
@ -156,32 +149,15 @@ namespace dawn_native { namespace d3d12 {
} }
BindGroup::~BindGroup() { BindGroup::~BindGroup() {
ToBackend(GetLayout()) ToBackend(GetLayout())->DeallocateBindGroup(this, &mCPUViewAllocation);
->DeallocateBindGroup(this, &mCPUViewAllocation, &mCPUSamplerAllocation);
ASSERT(!mCPUViewAllocation.IsValid()); ASSERT(!mCPUViewAllocation.IsValid());
ASSERT(!mCPUSamplerAllocation.IsValid());
} }
bool BindGroup::PopulateViews(ShaderVisibleDescriptorAllocator* viewAllocator) { bool BindGroup::PopulateViews(ShaderVisibleDescriptorAllocator* viewAllocator) {
const BindGroupLayout* bgl = ToBackend(GetLayout()); const BindGroupLayout* bgl = ToBackend(GetLayout());
return Populate(viewAllocator, bgl->GetCbvUavSrvDescriptorCount(),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, mCPUViewAllocation,
&mGPUViewAllocation);
}
bool BindGroup::PopulateSamplers(ShaderVisibleDescriptorAllocator* samplerAllocator) { const uint32_t descriptorCount = bgl->GetCbvUavSrvDescriptorCount();
const BindGroupLayout* bgl = ToBackend(GetLayout()); if (descriptorCount == 0 || viewAllocator->IsAllocationStillValid(mGPUViewAllocation)) {
return Populate(samplerAllocator, bgl->GetSamplerDescriptorCount(),
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, mCPUSamplerAllocation,
&mGPUSamplerAllocation);
}
bool BindGroup::Populate(ShaderVisibleDescriptorAllocator* allocator,
uint32_t descriptorCount,
D3D12_DESCRIPTOR_HEAP_TYPE heapType,
const CPUDescriptorHeapAllocation& stagingAllocation,
GPUDescriptorHeapAllocation* allocation) {
if (descriptorCount == 0 || allocator->IsAllocationStillValid(*allocation)) {
return true; return true;
} }
@ -190,16 +166,18 @@ namespace dawn_native { namespace d3d12 {
Device* device = ToBackend(GetDevice()); Device* device = ToBackend(GetDevice());
D3D12_CPU_DESCRIPTOR_HANDLE baseCPUDescriptor; D3D12_CPU_DESCRIPTOR_HANDLE baseCPUDescriptor;
if (!allocator->AllocateGPUDescriptors(descriptorCount, device->GetPendingCommandSerial(), if (!viewAllocator->AllocateGPUDescriptors(descriptorCount,
&baseCPUDescriptor, allocation)) { device->GetPendingCommandSerial(),
&baseCPUDescriptor, &mGPUViewAllocation)) {
return false; return false;
} }
// CPU bindgroups are sparsely allocated across CPU heaps. Instead of doing // CPU bindgroups are sparsely allocated across CPU heaps. Instead of doing
// simple copies per bindgroup, a single non-simple copy could be issued. // simple copies per bindgroup, a single non-simple copy could be issued.
// TODO(dawn:155): Consider doing this optimization. // TODO(dawn:155): Consider doing this optimization.
device->GetD3D12Device()->CopyDescriptorsSimple( device->GetD3D12Device()->CopyDescriptorsSimple(descriptorCount, baseCPUDescriptor,
descriptorCount, baseCPUDescriptor, stagingAllocation.GetBaseDescriptor(), heapType); mCPUViewAllocation.GetBaseDescriptor(),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
return true; return true;
} }
@ -209,6 +187,19 @@ namespace dawn_native { namespace d3d12 {
} }
D3D12_GPU_DESCRIPTOR_HANDLE BindGroup::GetBaseSamplerDescriptor() const { D3D12_GPU_DESCRIPTOR_HANDLE BindGroup::GetBaseSamplerDescriptor() const {
return mGPUSamplerAllocation.GetBaseDescriptor(); ASSERT(mSamplerAllocationEntry.Get() != nullptr);
return mSamplerAllocationEntry->GetBaseDescriptor();
}
bool BindGroup::PopulateSamplers(Device* device,
ShaderVisibleDescriptorAllocator* samplerAllocator) {
if (mSamplerAllocationEntry.Get() == nullptr) {
return true;
}
return mSamplerAllocationEntry->Populate(device, samplerAllocator);
}
void BindGroup::SetSamplerAllocationEntry(Ref<SamplerHeapCacheEntry> entry) {
mSamplerAllocationEntry = std::move(entry);
} }
}} // namespace dawn_native::d3d12 }} // namespace dawn_native::d3d12

View File

@ -24,7 +24,9 @@
namespace dawn_native { namespace d3d12 { namespace dawn_native { namespace d3d12 {
class Device; class Device;
class SamplerHeapCacheEntry;
class ShaderVisibleDescriptorAllocator; class ShaderVisibleDescriptorAllocator;
class StagingDescriptorAllocator;
class BindGroup final : public BindGroupBase, public PlacementAllocated { class BindGroup final : public BindGroupBase, public PlacementAllocated {
public: public:
@ -34,30 +36,23 @@ namespace dawn_native { namespace d3d12 {
BindGroup(Device* device, BindGroup(Device* device,
const BindGroupDescriptor* descriptor, const BindGroupDescriptor* descriptor,
uint32_t viewSizeIncrement, uint32_t viewSizeIncrement,
const CPUDescriptorHeapAllocation& viewAllocation, const CPUDescriptorHeapAllocation& viewAllocation);
uint32_t samplerSizeIncrement,
const CPUDescriptorHeapAllocation& samplerAllocation);
// Returns true if the BindGroup was successfully populated. // Returns true if the BindGroup was successfully populated.
bool PopulateViews(ShaderVisibleDescriptorAllocator* viewAllocator); bool PopulateViews(ShaderVisibleDescriptorAllocator* viewAllocator);
bool PopulateSamplers(ShaderVisibleDescriptorAllocator* samplerAllocator); bool PopulateSamplers(Device* device, ShaderVisibleDescriptorAllocator* samplerAllocator);
D3D12_GPU_DESCRIPTOR_HANDLE GetBaseViewDescriptor() const; D3D12_GPU_DESCRIPTOR_HANDLE GetBaseViewDescriptor() const;
D3D12_GPU_DESCRIPTOR_HANDLE GetBaseSamplerDescriptor() const; D3D12_GPU_DESCRIPTOR_HANDLE GetBaseSamplerDescriptor() const;
private: void SetSamplerAllocationEntry(Ref<SamplerHeapCacheEntry> entry);
bool Populate(ShaderVisibleDescriptorAllocator* allocator,
uint32_t descriptorCount,
D3D12_DESCRIPTOR_HEAP_TYPE heapType,
const CPUDescriptorHeapAllocation& stagingAllocation,
GPUDescriptorHeapAllocation* allocation);
private:
~BindGroup() override; ~BindGroup() override;
GPUDescriptorHeapAllocation mGPUSamplerAllocation; Ref<SamplerHeapCacheEntry> mSamplerAllocationEntry;
GPUDescriptorHeapAllocation mGPUViewAllocation;
CPUDescriptorHeapAllocation mCPUSamplerAllocation; GPUDescriptorHeapAllocation mGPUViewAllocation;
CPUDescriptorHeapAllocation mCPUViewAllocation; CPUDescriptorHeapAllocation mCPUViewAllocation;
}; };
}} // namespace dawn_native::d3d12 }} // namespace dawn_native::d3d12

View File

@ -17,6 +17,7 @@
#include "common/BitSetIterator.h" #include "common/BitSetIterator.h"
#include "dawn_native/d3d12/BindGroupD3D12.h" #include "dawn_native/d3d12/BindGroupD3D12.h"
#include "dawn_native/d3d12/DeviceD3D12.h" #include "dawn_native/d3d12/DeviceD3D12.h"
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
#include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h" #include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h"
namespace dawn_native { namespace d3d12 { namespace dawn_native { namespace d3d12 {
@ -147,28 +148,25 @@ namespace dawn_native { namespace d3d12 {
viewSizeIncrement = mViewAllocator->GetSizeIncrement(); viewSizeIncrement = mViewAllocator->GetSizeIncrement();
} }
uint32_t samplerSizeIncrement = 0; Ref<BindGroup> bindGroup = AcquireRef<BindGroup>(
CPUDescriptorHeapAllocation samplerAllocation; mBindGroupAllocator.Allocate(device, descriptor, viewSizeIncrement, viewAllocation));
if (GetSamplerDescriptorCount() > 0) { if (GetSamplerDescriptorCount() > 0) {
DAWN_TRY_ASSIGN(samplerAllocation, mSamplerAllocator->AllocateCPUDescriptors()); Ref<SamplerHeapCacheEntry> samplerHeapCacheEntry;
samplerSizeIncrement = mSamplerAllocator->GetSizeIncrement(); DAWN_TRY_ASSIGN(samplerHeapCacheEntry, device->GetSamplerHeapCache()->GetOrCreate(
bindGroup.Get(), mSamplerAllocator));
bindGroup->SetSamplerAllocationEntry(std::move(samplerHeapCacheEntry));
} }
return mBindGroupAllocator.Allocate(device, descriptor, viewSizeIncrement, viewAllocation, return bindGroup.Detach();
samplerSizeIncrement, samplerAllocation);
} }
void BindGroupLayout::DeallocateBindGroup(BindGroup* bindGroup, void BindGroupLayout::DeallocateBindGroup(BindGroup* bindGroup,
CPUDescriptorHeapAllocation* viewAllocation, CPUDescriptorHeapAllocation* viewAllocation) {
CPUDescriptorHeapAllocation* samplerAllocation) {
if (viewAllocation->IsValid()) { if (viewAllocation->IsValid()) {
mViewAllocator->Deallocate(viewAllocation); mViewAllocator->Deallocate(viewAllocation);
} }
if (samplerAllocation->IsValid()) {
mSamplerAllocator->Deallocate(samplerAllocation);
}
mBindGroupAllocator.Deallocate(bindGroup); mBindGroupAllocator.Deallocate(bindGroup);
} }

View File

@ -25,6 +25,7 @@ namespace dawn_native { namespace d3d12 {
class BindGroup; class BindGroup;
class CPUDescriptorHeapAllocation; class CPUDescriptorHeapAllocation;
class Device; class Device;
class SamplerHeapCacheEntry;
class StagingDescriptorAllocator; class StagingDescriptorAllocator;
class BindGroupLayout final : public BindGroupLayoutBase { class BindGroupLayout final : public BindGroupLayoutBase {
@ -33,9 +34,7 @@ namespace dawn_native { namespace d3d12 {
ResultOrError<BindGroup*> AllocateBindGroup(Device* device, ResultOrError<BindGroup*> AllocateBindGroup(Device* device,
const BindGroupDescriptor* descriptor); const BindGroupDescriptor* descriptor);
void DeallocateBindGroup(BindGroup* bindGroup, void DeallocateBindGroup(BindGroup* bindGroup, CPUDescriptorHeapAllocation* viewAllocation);
CPUDescriptorHeapAllocation* viewAllocation,
CPUDescriptorHeapAllocation* samplerAllocation);
enum DescriptorType { enum DescriptorType {
CBV, CBV,

View File

@ -30,6 +30,7 @@
#include "dawn_native/d3d12/RenderPassBuilderD3D12.h" #include "dawn_native/d3d12/RenderPassBuilderD3D12.h"
#include "dawn_native/d3d12/RenderPipelineD3D12.h" #include "dawn_native/d3d12/RenderPipelineD3D12.h"
#include "dawn_native/d3d12/SamplerD3D12.h" #include "dawn_native/d3d12/SamplerD3D12.h"
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h" #include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
#include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h" #include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h"
#include "dawn_native/d3d12/TextureCopySplitter.h" #include "dawn_native/d3d12/TextureCopySplitter.h"
@ -95,6 +96,7 @@ namespace dawn_native { namespace d3d12 {
public: public:
BindGroupStateTracker(Device* device) BindGroupStateTracker(Device* device)
: BindGroupAndStorageBarrierTrackerBase(), : BindGroupAndStorageBarrierTrackerBase(),
mDevice(device),
mViewAllocator(device->GetViewShaderVisibleDescriptorAllocator()), mViewAllocator(device->GetViewShaderVisibleDescriptorAllocator()),
mSamplerAllocator(device->GetSamplerShaderVisibleDescriptorAllocator()) { mSamplerAllocator(device->GetSamplerShaderVisibleDescriptorAllocator()) {
} }
@ -117,7 +119,7 @@ namespace dawn_native { namespace d3d12 {
for (uint32_t index : IterateBitSet(mDirtyBindGroups)) { for (uint32_t index : IterateBitSet(mDirtyBindGroups)) {
BindGroup* group = ToBackend(mBindGroups[index]); BindGroup* group = ToBackend(mBindGroups[index]);
didCreateBindGroupViews = group->PopulateViews(mViewAllocator); didCreateBindGroupViews = group->PopulateViews(mViewAllocator);
didCreateBindGroupSamplers = group->PopulateSamplers(mSamplerAllocator); didCreateBindGroupSamplers = group->PopulateSamplers(mDevice, mSamplerAllocator);
if (!didCreateBindGroupViews && !didCreateBindGroupSamplers) { if (!didCreateBindGroupViews && !didCreateBindGroupSamplers) {
break; break;
} }
@ -143,7 +145,8 @@ namespace dawn_native { namespace d3d12 {
for (uint32_t index : IterateBitSet(mBindGroupLayoutsMask)) { for (uint32_t index : IterateBitSet(mBindGroupLayoutsMask)) {
BindGroup* group = ToBackend(mBindGroups[index]); BindGroup* group = ToBackend(mBindGroups[index]);
didCreateBindGroupViews = group->PopulateViews(mViewAllocator); didCreateBindGroupViews = group->PopulateViews(mViewAllocator);
didCreateBindGroupSamplers = group->PopulateSamplers(mSamplerAllocator); didCreateBindGroupSamplers =
group->PopulateSamplers(mDevice, mSamplerAllocator);
ASSERT(didCreateBindGroupViews); ASSERT(didCreateBindGroupViews);
ASSERT(didCreateBindGroupSamplers); ASSERT(didCreateBindGroupSamplers);
} }
@ -310,6 +313,8 @@ namespace dawn_native { namespace d3d12 {
} }
} }
Device* mDevice;
bool mInCompute = false; bool mInCompute = false;
ShaderVisibleDescriptorAllocator* mViewAllocator; ShaderVisibleDescriptorAllocator* mViewAllocator;

View File

@ -33,6 +33,7 @@
#include "dawn_native/d3d12/ResidencyManagerD3D12.h" #include "dawn_native/d3d12/ResidencyManagerD3D12.h"
#include "dawn_native/d3d12/ResourceAllocatorManagerD3D12.h" #include "dawn_native/d3d12/ResourceAllocatorManagerD3D12.h"
#include "dawn_native/d3d12/SamplerD3D12.h" #include "dawn_native/d3d12/SamplerD3D12.h"
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
#include "dawn_native/d3d12/ShaderModuleD3D12.h" #include "dawn_native/d3d12/ShaderModuleD3D12.h"
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h" #include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
#include "dawn_native/d3d12/StagingBufferD3D12.h" #include "dawn_native/d3d12/StagingBufferD3D12.h"
@ -109,6 +110,8 @@ namespace dawn_native { namespace d3d12 {
mDepthStencilViewAllocator = std::make_unique<StagingDescriptorAllocator>( mDepthStencilViewAllocator = std::make_unique<StagingDescriptorAllocator>(
this, 1, kAttachmentDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); this, 1, kAttachmentDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
mSamplerHeapCache = std::make_unique<SamplerHeapCache>(this);
mMapRequestTracker = std::make_unique<MapRequestTracker>(this); mMapRequestTracker = std::make_unique<MapRequestTracker>(this);
mResidencyManager = std::make_unique<ResidencyManager>(this); mResidencyManager = std::make_unique<ResidencyManager>(this);
mResourceAllocatorManager = std::make_unique<ResourceAllocatorManager>(this); mResourceAllocatorManager = std::make_unique<ResourceAllocatorManager>(this);
@ -503,4 +506,8 @@ namespace dawn_native { namespace d3d12 {
return mDepthStencilViewAllocator.get(); return mDepthStencilViewAllocator.get();
} }
SamplerHeapCache* Device::GetSamplerHeapCache() {
return mSamplerHeapCache.get();
}
}} // namespace dawn_native::d3d12 }} // namespace dawn_native::d3d12

View File

@ -35,6 +35,7 @@ namespace dawn_native { namespace d3d12 {
class PlatformFunctions; class PlatformFunctions;
class ResidencyManager; class ResidencyManager;
class ResourceAllocatorManager; class ResourceAllocatorManager;
class SamplerHeapCache;
class ShaderVisibleDescriptorAllocator; class ShaderVisibleDescriptorAllocator;
class StagingDescriptorAllocator; class StagingDescriptorAllocator;
@ -107,6 +108,8 @@ namespace dawn_native { namespace d3d12 {
StagingDescriptorAllocator* GetSamplerStagingDescriptorAllocator( StagingDescriptorAllocator* GetSamplerStagingDescriptorAllocator(
uint32_t descriptorCount) const; uint32_t descriptorCount) const;
SamplerHeapCache* GetSamplerHeapCache();
StagingDescriptorAllocator* GetRenderTargetViewAllocator() const; StagingDescriptorAllocator* GetRenderTargetViewAllocator() const;
StagingDescriptorAllocator* GetDepthStencilViewAllocator() const; StagingDescriptorAllocator* GetDepthStencilViewAllocator() const;
@ -194,6 +197,10 @@ namespace dawn_native { namespace d3d12 {
std::unique_ptr<ShaderVisibleDescriptorAllocator> mViewShaderVisibleDescriptorAllocator; std::unique_ptr<ShaderVisibleDescriptorAllocator> mViewShaderVisibleDescriptorAllocator;
std::unique_ptr<ShaderVisibleDescriptorAllocator> mSamplerShaderVisibleDescriptorAllocator; std::unique_ptr<ShaderVisibleDescriptorAllocator> mSamplerShaderVisibleDescriptorAllocator;
// Sampler cache needs to be destroyed before the CPU sampler allocator to ensure the final
// release is called.
std::unique_ptr<SamplerHeapCache> mSamplerHeapCache;
}; };
}} // namespace dawn_native::d3d12 }} // namespace dawn_native::d3d12

View File

@ -0,0 +1,167 @@
// Copyright 2020 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
#include "common/Assert.h"
#include "common/HashUtils.h"
#include "dawn_native/d3d12/BindGroupD3D12.h"
#include "dawn_native/d3d12/BindGroupLayoutD3D12.h"
#include "dawn_native/d3d12/DeviceD3D12.h"
#include "dawn_native/d3d12/Forward.h"
#include "dawn_native/d3d12/SamplerD3D12.h"
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
#include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h"
namespace dawn_native { namespace d3d12 {
SamplerHeapCacheEntry::SamplerHeapCacheEntry(std::vector<Sampler*> samplers)
: mSamplers(std::move(samplers)) {
}
SamplerHeapCacheEntry::SamplerHeapCacheEntry(SamplerHeapCache* cache,
StagingDescriptorAllocator* allocator,
std::vector<Sampler*> samplers,
CPUDescriptorHeapAllocation allocation)
: mCPUAllocation(std::move(allocation)),
mSamplers(std::move(samplers)),
mAllocator(allocator),
mCache(cache) {
ASSERT(mCache != nullptr);
ASSERT(mCPUAllocation.IsValid());
ASSERT(!mSamplers.empty());
}
std::vector<Sampler*>&& SamplerHeapCacheEntry::AcquireSamplers() {
return std::move(mSamplers);
}
SamplerHeapCacheEntry::~SamplerHeapCacheEntry() {
// If this is a blueprint then the CPU allocation cannot exist and has no entry to remove.
if (mCPUAllocation.IsValid()) {
mCache->RemoveCacheEntry(this);
mAllocator->Deallocate(&mCPUAllocation);
}
ASSERT(!mCPUAllocation.IsValid());
}
bool SamplerHeapCacheEntry::Populate(Device* device,
ShaderVisibleDescriptorAllocator* allocator) {
if (allocator->IsAllocationStillValid(mGPUAllocation)) {
return true;
}
ASSERT(!mSamplers.empty());
// Attempt to allocate descriptors for the currently bound shader-visible heaps.
// If either failed, return early to re-allocate and switch the heaps.
const uint32_t descriptorCount = mSamplers.size();
D3D12_CPU_DESCRIPTOR_HANDLE baseCPUDescriptor;
if (!allocator->AllocateGPUDescriptors(descriptorCount, device->GetPendingCommandSerial(),
&baseCPUDescriptor, &mGPUAllocation)) {
return false;
}
// CPU bindgroups are sparsely allocated across CPU heaps. Instead of doing
// simple copies per bindgroup, a single non-simple copy could be issued.
// TODO(dawn:155): Consider doing this optimization.
device->GetD3D12Device()->CopyDescriptorsSimple(descriptorCount, baseCPUDescriptor,
mCPUAllocation.GetBaseDescriptor(),
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
return true;
}
D3D12_GPU_DESCRIPTOR_HANDLE SamplerHeapCacheEntry::GetBaseDescriptor() const {
return mGPUAllocation.GetBaseDescriptor();
}
ResultOrError<Ref<SamplerHeapCacheEntry>> SamplerHeapCache::GetOrCreate(
const BindGroup* group,
StagingDescriptorAllocator* samplerAllocator) {
const BindGroupLayout* bgl = ToBackend(group->GetLayout());
// If a previously created bindgroup used the same samplers, the backing sampler heap
// allocation can be reused. The packed list of samplers acts as the key to lookup the
// allocation in a cache.
// TODO(dawn:155): Avoid re-allocating the vector each lookup.
std::vector<Sampler*> samplers;
samplers.reserve(bgl->GetSamplerDescriptorCount());
for (BindingIndex bindingIndex = bgl->GetDynamicBufferCount();
bindingIndex < bgl->GetBindingCount(); ++bindingIndex) {
const BindingInfo& bindingInfo = bgl->GetBindingInfo(bindingIndex);
if (bindingInfo.type == wgpu::BindingType::Sampler ||
bindingInfo.type == wgpu::BindingType::ComparisonSampler) {
samplers.push_back(ToBackend(group->GetBindingAsSampler(bindingIndex)));
}
}
// Check the cache if there exists a sampler heap allocation that corresponds to the
// samplers.
SamplerHeapCacheEntry blueprint(std::move(samplers));
auto iter = mCache.find(&blueprint);
if (iter != mCache.end()) {
return Ref<SamplerHeapCacheEntry>(*iter);
}
// Steal the sampler vector back from the blueprint to avoid creating a new copy for the
// real entry below.
samplers = std::move(blueprint.AcquireSamplers());
CPUDescriptorHeapAllocation allocation;
DAWN_TRY_ASSIGN(allocation, samplerAllocator->AllocateCPUDescriptors());
const uint32_t samplerSizeIncrement = samplerAllocator->GetSizeIncrement();
ID3D12Device* d3d12Device = mDevice->GetD3D12Device();
for (uint32_t i = 0; i < samplers.size(); ++i) {
const auto& samplerDesc = samplers[i]->GetSamplerDescriptor();
d3d12Device->CreateSampler(&samplerDesc,
allocation.OffsetFrom(samplerSizeIncrement, i));
}
Ref<SamplerHeapCacheEntry> entry = AcquireRef(new SamplerHeapCacheEntry(
this, samplerAllocator, std::move(samplers), std::move(allocation)));
mCache.insert(entry.Get());
return std::move(entry);
}
SamplerHeapCache::SamplerHeapCache(Device* device) : mDevice(device) {
}
SamplerHeapCache::~SamplerHeapCache() {
ASSERT(mCache.empty());
}
void SamplerHeapCache::RemoveCacheEntry(SamplerHeapCacheEntry* entry) {
ASSERT(entry->GetRefCountForTesting() == 0);
size_t removedCount = mCache.erase(entry);
ASSERT(removedCount == 1);
}
size_t SamplerHeapCacheEntry::HashFunc::operator()(const SamplerHeapCacheEntry* entry) const {
size_t hash = 0;
for (const Sampler* sampler : entry->mSamplers) {
HashCombine(&hash, sampler);
}
return hash;
}
bool SamplerHeapCacheEntry::EqualityFunc::operator()(const SamplerHeapCacheEntry* a,
const SamplerHeapCacheEntry* b) const {
return a->mSamplers == b->mSamplers;
}
}} // namespace dawn_native::d3d12

View File

@ -0,0 +1,108 @@
// Copyright 2020 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DAWNNATIVE_D3D12_SAMPLERHEAPCACHE_H_
#define DAWNNATIVE_D3D12_SAMPLERHEAPCACHE_H_
#include "common/RefCounted.h"
#include "dawn_native/BindingInfo.h"
#include "dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h"
#include "dawn_native/d3d12/GPUDescriptorHeapAllocationD3D12.h"
#include <unordered_set>
#include <vector>
// |SamplerHeapCacheEntry| maintains a cache of sampler descriptor heap allocations.
// Each entry represents one or more sampler descriptors that co-exist in a CPU and
// GPU descriptor heap. The CPU-side allocation is deallocated once the final reference
// has been released while the GPU-side allocation is deallocated when the GPU is finished.
//
// The BindGroupLayout hands out these entries upon constructing the bindgroup. If the entry is not
// invalid, it will allocate and initialize so it may be reused by another bindgroup.
//
// The cache is primary needed for the GPU sampler heap, which is much smaller than the view heap
// and switches incur expensive pipeline flushes.
namespace dawn_native { namespace d3d12 {
class BindGroup;
class Device;
class Sampler;
class SamplerHeapCache;
class StagingDescriptorAllocator;
class ShaderVisibleDescriptorAllocator;
// Wraps sampler descriptor heap allocations in a cache.
class SamplerHeapCacheEntry : public RefCounted {
public:
SamplerHeapCacheEntry() = default;
SamplerHeapCacheEntry(std::vector<Sampler*> samplers);
SamplerHeapCacheEntry(SamplerHeapCache* cache,
StagingDescriptorAllocator* allocator,
std::vector<Sampler*> samplers,
CPUDescriptorHeapAllocation allocation);
~SamplerHeapCacheEntry() override;
D3D12_GPU_DESCRIPTOR_HANDLE GetBaseDescriptor() const;
std::vector<Sampler*>&& AcquireSamplers();
bool Populate(Device* device, ShaderVisibleDescriptorAllocator* allocator);
// Functors necessary for the unordered_map<SamplerHeapCacheEntry*>-based cache.
struct HashFunc {
size_t operator()(const SamplerHeapCacheEntry* entry) const;
};
struct EqualityFunc {
bool operator()(const SamplerHeapCacheEntry* a, const SamplerHeapCacheEntry* b) const;
};
private:
CPUDescriptorHeapAllocation mCPUAllocation;
GPUDescriptorHeapAllocation mGPUAllocation;
// Storing raw pointer because the sampler object will be already hashed
// by the device and will already be unique.
std::vector<Sampler*> mSamplers;
StagingDescriptorAllocator* mAllocator = nullptr;
SamplerHeapCache* mCache = nullptr;
};
// Cache descriptor heap allocations so that we don't create duplicate ones for every
// BindGroup.
class SamplerHeapCache {
public:
SamplerHeapCache(Device* device);
~SamplerHeapCache();
ResultOrError<Ref<SamplerHeapCacheEntry>> GetOrCreate(
const BindGroup* group,
StagingDescriptorAllocator* samplerAllocator);
void RemoveCacheEntry(SamplerHeapCacheEntry* entry);
private:
Device* mDevice;
using Cache = std::unordered_set<SamplerHeapCacheEntry*,
SamplerHeapCacheEntry::HashFunc,
SamplerHeapCacheEntry::EqualityFunc>;
Cache mCache;
};
}} // namespace dawn_native::d3d12
#endif // DAWNNATIVE_D3D12_SAMPLERHEAPCACHE_H_

View File

@ -115,12 +115,60 @@ class DummyStagingDescriptorAllocator {
StagingDescriptorAllocator mAllocator; StagingDescriptorAllocator mAllocator;
}; };
// Verify the shader visible sampler heap switch within a single submit. // Verify the shader visible view heaps switch over within a single submit.
TEST_P(D3D12DescriptorHeapTests, SwitchOverSamplerHeap) { TEST_P(D3D12DescriptorHeapTests, SwitchOverViewHeap) {
DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
utils::ComboRenderPipelineDescriptor renderPipelineDescriptor(device);
// Fill in a view heap with "view only" bindgroups (1x view per group) by creating a
// view bindgroup each draw. After HEAP_SIZE + 1 draws, the heaps must switch over.
renderPipelineDescriptor.vertexStage.module = mSimpleVSModule;
renderPipelineDescriptor.cFragmentStage.module = mSimpleFSModule;
wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&renderPipelineDescriptor);
utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, kRTSize, kRTSize);
Device* d3dDevice = reinterpret_cast<Device*>(device.Get());
ShaderVisibleDescriptorAllocator* allocator =
d3dDevice->GetViewShaderVisibleDescriptorAllocator();
const uint64_t heapSize = allocator->GetShaderVisibleHeapSizeForTesting();
const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
{
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
pass.SetPipeline(renderPipeline);
std::array<float, 4> redColor = {1, 0, 0, 1};
wgpu::Buffer uniformBuffer = utils::CreateBufferFromData(
device, &redColor, sizeof(redColor), wgpu::BufferUsage::Uniform);
for (uint32_t i = 0; i < heapSize + 1; ++i) {
pass.SetBindGroup(0, utils::MakeBindGroup(device, renderPipeline.GetBindGroupLayout(0),
{{0, uniformBuffer, 0, sizeof(redColor)}}));
pass.Draw(3);
}
pass.EndPass();
}
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + 1);
}
// Verify the shader visible sampler heaps does not switch over within a single submit.
TEST_P(D3D12DescriptorHeapTests, NoSwitchOverSamplerHeap) {
utils::ComboRenderPipelineDescriptor renderPipelineDescriptor(device); utils::ComboRenderPipelineDescriptor renderPipelineDescriptor(device);
// Fill in a sampler heap with "sampler only" bindgroups (1x sampler per group) by creating a // Fill in a sampler heap with "sampler only" bindgroups (1x sampler per group) by creating a
// sampler bindgroup each draw. After HEAP_SIZE + 1 draws, the heaps must switch over. // sampler bindgroup each draw. After HEAP_SIZE + 1 draws, the heaps WILL NOT switch over
// because the sampler heap allocations are de-duplicated.
renderPipelineDescriptor.vertexStage.module = renderPipelineDescriptor.vertexStage.module =
utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"( utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"(
#version 450 #version 450
@ -167,7 +215,7 @@ TEST_P(D3D12DescriptorHeapTests, SwitchOverSamplerHeap) {
wgpu::CommandBuffer commands = encoder.Finish(); wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands); queue.Submit(1, &commands);
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + 1); EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial);
} }
// Verify shader-visible heaps can be recycled for multiple submits. // Verify shader-visible heaps can be recycled for multiple submits.
@ -727,13 +775,8 @@ TEST_P(D3D12DescriptorHeapTests, EncodeManyUBOAndSamplers) {
EXPECT_EQ(viewAllocator->GetShaderVisibleHeapSerialForTesting(), EXPECT_EQ(viewAllocator->GetShaderVisibleHeapSerialForTesting(),
viewHeapSerial + kNumOfViewHeaps); viewHeapSerial + kNumOfViewHeaps);
const uint32_t numOfSamplerHeaps = EXPECT_EQ(samplerAllocator->GetShaderVisiblePoolSizeForTesting(), 0u);
numOfEncodedBindGroups / EXPECT_EQ(samplerAllocator->GetShaderVisibleHeapSerialForTesting(), samplerHeapSerial);
samplerAllocator->GetShaderVisibleHeapSizeForTesting(); // 1 sampler per group.
EXPECT_EQ(samplerAllocator->GetShaderVisiblePoolSizeForTesting(), numOfSamplerHeaps);
EXPECT_EQ(samplerAllocator->GetShaderVisibleHeapSerialForTesting(),
samplerHeapSerial + numOfSamplerHeaps);
} }
} }