D3D12: De-duplicate sampler heap allocations.
Allows bindgroups that use the same samplers to share a descriptor heap allocation. This is particularly important for sampler heaps which incur expensive pipeline flushes due to the smaller size requiring more frequent switches. The device dolls out entries to a sampler heap allocation cache. When the BindGroup is created, it does a lookup and refs the allocation. This ensures the cache does not grow unbounded or needlessly store unused entires. This change is a follow-up of de-coupling heaps. BUG=dawn:155 Change-Id: I3ab6f1bdb13a40905cb990cd7a2139e73da30303 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/20783 Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
parent
275a95c1dc
commit
e25ee25e77
|
@ -328,6 +328,8 @@ source_set("dawn_native_sources") {
|
|||
"d3d12/ResourceHeapAllocationD3D12.h",
|
||||
"d3d12/SamplerD3D12.cpp",
|
||||
"d3d12/SamplerD3D12.h",
|
||||
"d3d12/SamplerHeapCacheD3D12.cpp",
|
||||
"d3d12/SamplerHeapCacheD3D12.h",
|
||||
"d3d12/ShaderModuleD3D12.cpp",
|
||||
"d3d12/ShaderModuleD3D12.h",
|
||||
"d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp",
|
||||
|
|
|
@ -297,6 +297,11 @@ namespace dawn_native {
|
|||
return mLayout.Get();
|
||||
}
|
||||
|
||||
const BindGroupLayoutBase* BindGroupBase::GetLayout() const {
|
||||
ASSERT(!IsError());
|
||||
return mLayout.Get();
|
||||
}
|
||||
|
||||
BufferBinding BindGroupBase::GetBindingAsBufferBinding(BindingIndex bindingIndex) {
|
||||
ASSERT(!IsError());
|
||||
ASSERT(bindingIndex < mLayout->GetBindingCount());
|
||||
|
@ -309,7 +314,7 @@ namespace dawn_native {
|
|||
mBindingData.bufferData[bindingIndex].size};
|
||||
}
|
||||
|
||||
SamplerBase* BindGroupBase::GetBindingAsSampler(BindingIndex bindingIndex) {
|
||||
SamplerBase* BindGroupBase::GetBindingAsSampler(BindingIndex bindingIndex) const {
|
||||
ASSERT(!IsError());
|
||||
ASSERT(bindingIndex < mLayout->GetBindingCount());
|
||||
ASSERT(mLayout->GetBindingInfo(bindingIndex).type == wgpu::BindingType::Sampler ||
|
||||
|
|
|
@ -44,8 +44,9 @@ namespace dawn_native {
|
|||
static BindGroupBase* MakeError(DeviceBase* device);
|
||||
|
||||
BindGroupLayoutBase* GetLayout();
|
||||
const BindGroupLayoutBase* GetLayout() const;
|
||||
BufferBinding GetBindingAsBufferBinding(BindingIndex bindingIndex);
|
||||
SamplerBase* GetBindingAsSampler(BindingIndex bindingIndex);
|
||||
SamplerBase* GetBindingAsSampler(BindingIndex bindingIndex) const;
|
||||
TextureViewBase* GetBindingAsTextureView(BindingIndex bindingIndex);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -209,6 +209,8 @@ if (DAWN_ENABLE_D3D12)
|
|||
"d3d12/ResourceHeapAllocationD3D12.h"
|
||||
"d3d12/SamplerD3D12.cpp"
|
||||
"d3d12/SamplerD3D12.h"
|
||||
"d3d12/SamplerHeapCacheD3D12.cpp"
|
||||
"d3d12/SamplerHeapCacheD3D12.h"
|
||||
"d3d12/ShaderModuleD3D12.cpp"
|
||||
"d3d12/ShaderModuleD3D12.h"
|
||||
"d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp"
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include "dawn_native/d3d12/BindGroupLayoutD3D12.h"
|
||||
#include "dawn_native/d3d12/BufferD3D12.h"
|
||||
#include "dawn_native/d3d12/DeviceD3D12.h"
|
||||
#include "dawn_native/d3d12/SamplerD3D12.h"
|
||||
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
|
||||
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
|
||||
#include "dawn_native/d3d12/TextureD3D12.h"
|
||||
|
||||
|
@ -33,14 +33,11 @@ namespace dawn_native { namespace d3d12 {
|
|||
BindGroup::BindGroup(Device* device,
|
||||
const BindGroupDescriptor* descriptor,
|
||||
uint32_t viewSizeIncrement,
|
||||
const CPUDescriptorHeapAllocation& viewAllocation,
|
||||
uint32_t samplerSizeIncrement,
|
||||
const CPUDescriptorHeapAllocation& samplerAllocation)
|
||||
const CPUDescriptorHeapAllocation& viewAllocation)
|
||||
: BindGroupBase(this, device, descriptor) {
|
||||
BindGroupLayout* bgl = ToBackend(GetLayout());
|
||||
|
||||
mCPUViewAllocation = viewAllocation;
|
||||
mCPUSamplerAllocation = samplerAllocation;
|
||||
|
||||
const auto& bindingOffsets = bgl->GetBindingOffsets();
|
||||
|
||||
|
@ -129,11 +126,7 @@ namespace dawn_native { namespace d3d12 {
|
|||
}
|
||||
case wgpu::BindingType::Sampler:
|
||||
case wgpu::BindingType::ComparisonSampler: {
|
||||
auto* sampler = ToBackend(GetBindingAsSampler(bindingIndex));
|
||||
auto& samplerDesc = sampler->GetSamplerDescriptor();
|
||||
d3d12Device->CreateSampler(
|
||||
&samplerDesc, samplerAllocation.OffsetFrom(samplerSizeIncrement,
|
||||
bindingOffsets[bindingIndex]));
|
||||
// No-op as samplers will be later initialized by CreateSamplers().
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -156,32 +149,15 @@ namespace dawn_native { namespace d3d12 {
|
|||
}
|
||||
|
||||
BindGroup::~BindGroup() {
|
||||
ToBackend(GetLayout())
|
||||
->DeallocateBindGroup(this, &mCPUViewAllocation, &mCPUSamplerAllocation);
|
||||
ToBackend(GetLayout())->DeallocateBindGroup(this, &mCPUViewAllocation);
|
||||
ASSERT(!mCPUViewAllocation.IsValid());
|
||||
ASSERT(!mCPUSamplerAllocation.IsValid());
|
||||
}
|
||||
|
||||
bool BindGroup::PopulateViews(ShaderVisibleDescriptorAllocator* viewAllocator) {
|
||||
const BindGroupLayout* bgl = ToBackend(GetLayout());
|
||||
return Populate(viewAllocator, bgl->GetCbvUavSrvDescriptorCount(),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, mCPUViewAllocation,
|
||||
&mGPUViewAllocation);
|
||||
}
|
||||
|
||||
bool BindGroup::PopulateSamplers(ShaderVisibleDescriptorAllocator* samplerAllocator) {
|
||||
const BindGroupLayout* bgl = ToBackend(GetLayout());
|
||||
return Populate(samplerAllocator, bgl->GetSamplerDescriptorCount(),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, mCPUSamplerAllocation,
|
||||
&mGPUSamplerAllocation);
|
||||
}
|
||||
|
||||
bool BindGroup::Populate(ShaderVisibleDescriptorAllocator* allocator,
|
||||
uint32_t descriptorCount,
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE heapType,
|
||||
const CPUDescriptorHeapAllocation& stagingAllocation,
|
||||
GPUDescriptorHeapAllocation* allocation) {
|
||||
if (descriptorCount == 0 || allocator->IsAllocationStillValid(*allocation)) {
|
||||
const uint32_t descriptorCount = bgl->GetCbvUavSrvDescriptorCount();
|
||||
if (descriptorCount == 0 || viewAllocator->IsAllocationStillValid(mGPUViewAllocation)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -190,16 +166,18 @@ namespace dawn_native { namespace d3d12 {
|
|||
Device* device = ToBackend(GetDevice());
|
||||
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE baseCPUDescriptor;
|
||||
if (!allocator->AllocateGPUDescriptors(descriptorCount, device->GetPendingCommandSerial(),
|
||||
&baseCPUDescriptor, allocation)) {
|
||||
if (!viewAllocator->AllocateGPUDescriptors(descriptorCount,
|
||||
device->GetPendingCommandSerial(),
|
||||
&baseCPUDescriptor, &mGPUViewAllocation)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// CPU bindgroups are sparsely allocated across CPU heaps. Instead of doing
|
||||
// simple copies per bindgroup, a single non-simple copy could be issued.
|
||||
// TODO(dawn:155): Consider doing this optimization.
|
||||
device->GetD3D12Device()->CopyDescriptorsSimple(
|
||||
descriptorCount, baseCPUDescriptor, stagingAllocation.GetBaseDescriptor(), heapType);
|
||||
device->GetD3D12Device()->CopyDescriptorsSimple(descriptorCount, baseCPUDescriptor,
|
||||
mCPUViewAllocation.GetBaseDescriptor(),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -209,6 +187,19 @@ namespace dawn_native { namespace d3d12 {
|
|||
}
|
||||
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE BindGroup::GetBaseSamplerDescriptor() const {
|
||||
return mGPUSamplerAllocation.GetBaseDescriptor();
|
||||
ASSERT(mSamplerAllocationEntry.Get() != nullptr);
|
||||
return mSamplerAllocationEntry->GetBaseDescriptor();
|
||||
}
|
||||
|
||||
bool BindGroup::PopulateSamplers(Device* device,
|
||||
ShaderVisibleDescriptorAllocator* samplerAllocator) {
|
||||
if (mSamplerAllocationEntry.Get() == nullptr) {
|
||||
return true;
|
||||
}
|
||||
return mSamplerAllocationEntry->Populate(device, samplerAllocator);
|
||||
}
|
||||
|
||||
void BindGroup::SetSamplerAllocationEntry(Ref<SamplerHeapCacheEntry> entry) {
|
||||
mSamplerAllocationEntry = std::move(entry);
|
||||
}
|
||||
}} // namespace dawn_native::d3d12
|
||||
|
|
|
@ -24,7 +24,9 @@
|
|||
namespace dawn_native { namespace d3d12 {
|
||||
|
||||
class Device;
|
||||
class SamplerHeapCacheEntry;
|
||||
class ShaderVisibleDescriptorAllocator;
|
||||
class StagingDescriptorAllocator;
|
||||
|
||||
class BindGroup final : public BindGroupBase, public PlacementAllocated {
|
||||
public:
|
||||
|
@ -34,30 +36,23 @@ namespace dawn_native { namespace d3d12 {
|
|||
BindGroup(Device* device,
|
||||
const BindGroupDescriptor* descriptor,
|
||||
uint32_t viewSizeIncrement,
|
||||
const CPUDescriptorHeapAllocation& viewAllocation,
|
||||
uint32_t samplerSizeIncrement,
|
||||
const CPUDescriptorHeapAllocation& samplerAllocation);
|
||||
const CPUDescriptorHeapAllocation& viewAllocation);
|
||||
|
||||
// Returns true if the BindGroup was successfully populated.
|
||||
bool PopulateViews(ShaderVisibleDescriptorAllocator* viewAllocator);
|
||||
bool PopulateSamplers(ShaderVisibleDescriptorAllocator* samplerAllocator);
|
||||
bool PopulateSamplers(Device* device, ShaderVisibleDescriptorAllocator* samplerAllocator);
|
||||
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE GetBaseViewDescriptor() const;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE GetBaseSamplerDescriptor() const;
|
||||
|
||||
private:
|
||||
bool Populate(ShaderVisibleDescriptorAllocator* allocator,
|
||||
uint32_t descriptorCount,
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE heapType,
|
||||
const CPUDescriptorHeapAllocation& stagingAllocation,
|
||||
GPUDescriptorHeapAllocation* allocation);
|
||||
void SetSamplerAllocationEntry(Ref<SamplerHeapCacheEntry> entry);
|
||||
|
||||
private:
|
||||
~BindGroup() override;
|
||||
|
||||
GPUDescriptorHeapAllocation mGPUSamplerAllocation;
|
||||
GPUDescriptorHeapAllocation mGPUViewAllocation;
|
||||
Ref<SamplerHeapCacheEntry> mSamplerAllocationEntry;
|
||||
|
||||
CPUDescriptorHeapAllocation mCPUSamplerAllocation;
|
||||
GPUDescriptorHeapAllocation mGPUViewAllocation;
|
||||
CPUDescriptorHeapAllocation mCPUViewAllocation;
|
||||
};
|
||||
}} // namespace dawn_native::d3d12
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "common/BitSetIterator.h"
|
||||
#include "dawn_native/d3d12/BindGroupD3D12.h"
|
||||
#include "dawn_native/d3d12/DeviceD3D12.h"
|
||||
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
|
||||
#include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h"
|
||||
|
||||
namespace dawn_native { namespace d3d12 {
|
||||
|
@ -147,28 +148,25 @@ namespace dawn_native { namespace d3d12 {
|
|||
viewSizeIncrement = mViewAllocator->GetSizeIncrement();
|
||||
}
|
||||
|
||||
uint32_t samplerSizeIncrement = 0;
|
||||
CPUDescriptorHeapAllocation samplerAllocation;
|
||||
Ref<BindGroup> bindGroup = AcquireRef<BindGroup>(
|
||||
mBindGroupAllocator.Allocate(device, descriptor, viewSizeIncrement, viewAllocation));
|
||||
|
||||
if (GetSamplerDescriptorCount() > 0) {
|
||||
DAWN_TRY_ASSIGN(samplerAllocation, mSamplerAllocator->AllocateCPUDescriptors());
|
||||
samplerSizeIncrement = mSamplerAllocator->GetSizeIncrement();
|
||||
Ref<SamplerHeapCacheEntry> samplerHeapCacheEntry;
|
||||
DAWN_TRY_ASSIGN(samplerHeapCacheEntry, device->GetSamplerHeapCache()->GetOrCreate(
|
||||
bindGroup.Get(), mSamplerAllocator));
|
||||
bindGroup->SetSamplerAllocationEntry(std::move(samplerHeapCacheEntry));
|
||||
}
|
||||
|
||||
return mBindGroupAllocator.Allocate(device, descriptor, viewSizeIncrement, viewAllocation,
|
||||
samplerSizeIncrement, samplerAllocation);
|
||||
return bindGroup.Detach();
|
||||
}
|
||||
|
||||
void BindGroupLayout::DeallocateBindGroup(BindGroup* bindGroup,
|
||||
CPUDescriptorHeapAllocation* viewAllocation,
|
||||
CPUDescriptorHeapAllocation* samplerAllocation) {
|
||||
CPUDescriptorHeapAllocation* viewAllocation) {
|
||||
if (viewAllocation->IsValid()) {
|
||||
mViewAllocator->Deallocate(viewAllocation);
|
||||
}
|
||||
|
||||
if (samplerAllocation->IsValid()) {
|
||||
mSamplerAllocator->Deallocate(samplerAllocation);
|
||||
}
|
||||
|
||||
mBindGroupAllocator.Deallocate(bindGroup);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ namespace dawn_native { namespace d3d12 {
|
|||
class BindGroup;
|
||||
class CPUDescriptorHeapAllocation;
|
||||
class Device;
|
||||
class SamplerHeapCacheEntry;
|
||||
class StagingDescriptorAllocator;
|
||||
|
||||
class BindGroupLayout final : public BindGroupLayoutBase {
|
||||
|
@ -33,9 +34,7 @@ namespace dawn_native { namespace d3d12 {
|
|||
|
||||
ResultOrError<BindGroup*> AllocateBindGroup(Device* device,
|
||||
const BindGroupDescriptor* descriptor);
|
||||
void DeallocateBindGroup(BindGroup* bindGroup,
|
||||
CPUDescriptorHeapAllocation* viewAllocation,
|
||||
CPUDescriptorHeapAllocation* samplerAllocation);
|
||||
void DeallocateBindGroup(BindGroup* bindGroup, CPUDescriptorHeapAllocation* viewAllocation);
|
||||
|
||||
enum DescriptorType {
|
||||
CBV,
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "dawn_native/d3d12/RenderPassBuilderD3D12.h"
|
||||
#include "dawn_native/d3d12/RenderPipelineD3D12.h"
|
||||
#include "dawn_native/d3d12/SamplerD3D12.h"
|
||||
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
|
||||
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
|
||||
#include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h"
|
||||
#include "dawn_native/d3d12/TextureCopySplitter.h"
|
||||
|
@ -95,6 +96,7 @@ namespace dawn_native { namespace d3d12 {
|
|||
public:
|
||||
BindGroupStateTracker(Device* device)
|
||||
: BindGroupAndStorageBarrierTrackerBase(),
|
||||
mDevice(device),
|
||||
mViewAllocator(device->GetViewShaderVisibleDescriptorAllocator()),
|
||||
mSamplerAllocator(device->GetSamplerShaderVisibleDescriptorAllocator()) {
|
||||
}
|
||||
|
@ -117,7 +119,7 @@ namespace dawn_native { namespace d3d12 {
|
|||
for (uint32_t index : IterateBitSet(mDirtyBindGroups)) {
|
||||
BindGroup* group = ToBackend(mBindGroups[index]);
|
||||
didCreateBindGroupViews = group->PopulateViews(mViewAllocator);
|
||||
didCreateBindGroupSamplers = group->PopulateSamplers(mSamplerAllocator);
|
||||
didCreateBindGroupSamplers = group->PopulateSamplers(mDevice, mSamplerAllocator);
|
||||
if (!didCreateBindGroupViews && !didCreateBindGroupSamplers) {
|
||||
break;
|
||||
}
|
||||
|
@ -143,7 +145,8 @@ namespace dawn_native { namespace d3d12 {
|
|||
for (uint32_t index : IterateBitSet(mBindGroupLayoutsMask)) {
|
||||
BindGroup* group = ToBackend(mBindGroups[index]);
|
||||
didCreateBindGroupViews = group->PopulateViews(mViewAllocator);
|
||||
didCreateBindGroupSamplers = group->PopulateSamplers(mSamplerAllocator);
|
||||
didCreateBindGroupSamplers =
|
||||
group->PopulateSamplers(mDevice, mSamplerAllocator);
|
||||
ASSERT(didCreateBindGroupViews);
|
||||
ASSERT(didCreateBindGroupSamplers);
|
||||
}
|
||||
|
@ -310,6 +313,8 @@ namespace dawn_native { namespace d3d12 {
|
|||
}
|
||||
}
|
||||
|
||||
Device* mDevice;
|
||||
|
||||
bool mInCompute = false;
|
||||
|
||||
ShaderVisibleDescriptorAllocator* mViewAllocator;
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "dawn_native/d3d12/ResidencyManagerD3D12.h"
|
||||
#include "dawn_native/d3d12/ResourceAllocatorManagerD3D12.h"
|
||||
#include "dawn_native/d3d12/SamplerD3D12.h"
|
||||
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
|
||||
#include "dawn_native/d3d12/ShaderModuleD3D12.h"
|
||||
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
|
||||
#include "dawn_native/d3d12/StagingBufferD3D12.h"
|
||||
|
@ -109,6 +110,8 @@ namespace dawn_native { namespace d3d12 {
|
|||
mDepthStencilViewAllocator = std::make_unique<StagingDescriptorAllocator>(
|
||||
this, 1, kAttachmentDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
||||
|
||||
mSamplerHeapCache = std::make_unique<SamplerHeapCache>(this);
|
||||
|
||||
mMapRequestTracker = std::make_unique<MapRequestTracker>(this);
|
||||
mResidencyManager = std::make_unique<ResidencyManager>(this);
|
||||
mResourceAllocatorManager = std::make_unique<ResourceAllocatorManager>(this);
|
||||
|
@ -503,4 +506,8 @@ namespace dawn_native { namespace d3d12 {
|
|||
return mDepthStencilViewAllocator.get();
|
||||
}
|
||||
|
||||
SamplerHeapCache* Device::GetSamplerHeapCache() {
|
||||
return mSamplerHeapCache.get();
|
||||
}
|
||||
|
||||
}} // namespace dawn_native::d3d12
|
||||
|
|
|
@ -35,6 +35,7 @@ namespace dawn_native { namespace d3d12 {
|
|||
class PlatformFunctions;
|
||||
class ResidencyManager;
|
||||
class ResourceAllocatorManager;
|
||||
class SamplerHeapCache;
|
||||
class ShaderVisibleDescriptorAllocator;
|
||||
class StagingDescriptorAllocator;
|
||||
|
||||
|
@ -107,6 +108,8 @@ namespace dawn_native { namespace d3d12 {
|
|||
StagingDescriptorAllocator* GetSamplerStagingDescriptorAllocator(
|
||||
uint32_t descriptorCount) const;
|
||||
|
||||
SamplerHeapCache* GetSamplerHeapCache();
|
||||
|
||||
StagingDescriptorAllocator* GetRenderTargetViewAllocator() const;
|
||||
|
||||
StagingDescriptorAllocator* GetDepthStencilViewAllocator() const;
|
||||
|
@ -194,6 +197,10 @@ namespace dawn_native { namespace d3d12 {
|
|||
std::unique_ptr<ShaderVisibleDescriptorAllocator> mViewShaderVisibleDescriptorAllocator;
|
||||
|
||||
std::unique_ptr<ShaderVisibleDescriptorAllocator> mSamplerShaderVisibleDescriptorAllocator;
|
||||
|
||||
// Sampler cache needs to be destroyed before the CPU sampler allocator to ensure the final
|
||||
// release is called.
|
||||
std::unique_ptr<SamplerHeapCache> mSamplerHeapCache;
|
||||
};
|
||||
|
||||
}} // namespace dawn_native::d3d12
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
// Copyright 2020 The Dawn Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dawn_native/d3d12/SamplerHeapCacheD3D12.h"
|
||||
|
||||
#include "common/Assert.h"
|
||||
#include "common/HashUtils.h"
|
||||
#include "dawn_native/d3d12/BindGroupD3D12.h"
|
||||
#include "dawn_native/d3d12/BindGroupLayoutD3D12.h"
|
||||
#include "dawn_native/d3d12/DeviceD3D12.h"
|
||||
#include "dawn_native/d3d12/Forward.h"
|
||||
#include "dawn_native/d3d12/SamplerD3D12.h"
|
||||
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
|
||||
#include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h"
|
||||
|
||||
namespace dawn_native { namespace d3d12 {
|
||||
|
||||
SamplerHeapCacheEntry::SamplerHeapCacheEntry(std::vector<Sampler*> samplers)
|
||||
: mSamplers(std::move(samplers)) {
|
||||
}
|
||||
|
||||
SamplerHeapCacheEntry::SamplerHeapCacheEntry(SamplerHeapCache* cache,
|
||||
StagingDescriptorAllocator* allocator,
|
||||
std::vector<Sampler*> samplers,
|
||||
CPUDescriptorHeapAllocation allocation)
|
||||
: mCPUAllocation(std::move(allocation)),
|
||||
mSamplers(std::move(samplers)),
|
||||
mAllocator(allocator),
|
||||
mCache(cache) {
|
||||
ASSERT(mCache != nullptr);
|
||||
ASSERT(mCPUAllocation.IsValid());
|
||||
ASSERT(!mSamplers.empty());
|
||||
}
|
||||
|
||||
std::vector<Sampler*>&& SamplerHeapCacheEntry::AcquireSamplers() {
|
||||
return std::move(mSamplers);
|
||||
}
|
||||
|
||||
SamplerHeapCacheEntry::~SamplerHeapCacheEntry() {
|
||||
// If this is a blueprint then the CPU allocation cannot exist and has no entry to remove.
|
||||
if (mCPUAllocation.IsValid()) {
|
||||
mCache->RemoveCacheEntry(this);
|
||||
mAllocator->Deallocate(&mCPUAllocation);
|
||||
}
|
||||
|
||||
ASSERT(!mCPUAllocation.IsValid());
|
||||
}
|
||||
|
||||
bool SamplerHeapCacheEntry::Populate(Device* device,
|
||||
ShaderVisibleDescriptorAllocator* allocator) {
|
||||
if (allocator->IsAllocationStillValid(mGPUAllocation)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
ASSERT(!mSamplers.empty());
|
||||
|
||||
// Attempt to allocate descriptors for the currently bound shader-visible heaps.
|
||||
// If either failed, return early to re-allocate and switch the heaps.
|
||||
const uint32_t descriptorCount = mSamplers.size();
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE baseCPUDescriptor;
|
||||
if (!allocator->AllocateGPUDescriptors(descriptorCount, device->GetPendingCommandSerial(),
|
||||
&baseCPUDescriptor, &mGPUAllocation)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// CPU bindgroups are sparsely allocated across CPU heaps. Instead of doing
|
||||
// simple copies per bindgroup, a single non-simple copy could be issued.
|
||||
// TODO(dawn:155): Consider doing this optimization.
|
||||
device->GetD3D12Device()->CopyDescriptorsSimple(descriptorCount, baseCPUDescriptor,
|
||||
mCPUAllocation.GetBaseDescriptor(),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE SamplerHeapCacheEntry::GetBaseDescriptor() const {
|
||||
return mGPUAllocation.GetBaseDescriptor();
|
||||
}
|
||||
|
||||
ResultOrError<Ref<SamplerHeapCacheEntry>> SamplerHeapCache::GetOrCreate(
|
||||
const BindGroup* group,
|
||||
StagingDescriptorAllocator* samplerAllocator) {
|
||||
const BindGroupLayout* bgl = ToBackend(group->GetLayout());
|
||||
|
||||
// If a previously created bindgroup used the same samplers, the backing sampler heap
|
||||
// allocation can be reused. The packed list of samplers acts as the key to lookup the
|
||||
// allocation in a cache.
|
||||
// TODO(dawn:155): Avoid re-allocating the vector each lookup.
|
||||
std::vector<Sampler*> samplers;
|
||||
samplers.reserve(bgl->GetSamplerDescriptorCount());
|
||||
|
||||
for (BindingIndex bindingIndex = bgl->GetDynamicBufferCount();
|
||||
bindingIndex < bgl->GetBindingCount(); ++bindingIndex) {
|
||||
const BindingInfo& bindingInfo = bgl->GetBindingInfo(bindingIndex);
|
||||
if (bindingInfo.type == wgpu::BindingType::Sampler ||
|
||||
bindingInfo.type == wgpu::BindingType::ComparisonSampler) {
|
||||
samplers.push_back(ToBackend(group->GetBindingAsSampler(bindingIndex)));
|
||||
}
|
||||
}
|
||||
|
||||
// Check the cache if there exists a sampler heap allocation that corresponds to the
|
||||
// samplers.
|
||||
SamplerHeapCacheEntry blueprint(std::move(samplers));
|
||||
auto iter = mCache.find(&blueprint);
|
||||
if (iter != mCache.end()) {
|
||||
return Ref<SamplerHeapCacheEntry>(*iter);
|
||||
}
|
||||
|
||||
// Steal the sampler vector back from the blueprint to avoid creating a new copy for the
|
||||
// real entry below.
|
||||
samplers = std::move(blueprint.AcquireSamplers());
|
||||
|
||||
CPUDescriptorHeapAllocation allocation;
|
||||
DAWN_TRY_ASSIGN(allocation, samplerAllocator->AllocateCPUDescriptors());
|
||||
|
||||
const uint32_t samplerSizeIncrement = samplerAllocator->GetSizeIncrement();
|
||||
ID3D12Device* d3d12Device = mDevice->GetD3D12Device();
|
||||
|
||||
for (uint32_t i = 0; i < samplers.size(); ++i) {
|
||||
const auto& samplerDesc = samplers[i]->GetSamplerDescriptor();
|
||||
d3d12Device->CreateSampler(&samplerDesc,
|
||||
allocation.OffsetFrom(samplerSizeIncrement, i));
|
||||
}
|
||||
|
||||
Ref<SamplerHeapCacheEntry> entry = AcquireRef(new SamplerHeapCacheEntry(
|
||||
this, samplerAllocator, std::move(samplers), std::move(allocation)));
|
||||
mCache.insert(entry.Get());
|
||||
return std::move(entry);
|
||||
}
|
||||
|
||||
SamplerHeapCache::SamplerHeapCache(Device* device) : mDevice(device) {
|
||||
}
|
||||
|
||||
SamplerHeapCache::~SamplerHeapCache() {
|
||||
ASSERT(mCache.empty());
|
||||
}
|
||||
|
||||
void SamplerHeapCache::RemoveCacheEntry(SamplerHeapCacheEntry* entry) {
|
||||
ASSERT(entry->GetRefCountForTesting() == 0);
|
||||
size_t removedCount = mCache.erase(entry);
|
||||
ASSERT(removedCount == 1);
|
||||
}
|
||||
|
||||
size_t SamplerHeapCacheEntry::HashFunc::operator()(const SamplerHeapCacheEntry* entry) const {
|
||||
size_t hash = 0;
|
||||
for (const Sampler* sampler : entry->mSamplers) {
|
||||
HashCombine(&hash, sampler);
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool SamplerHeapCacheEntry::EqualityFunc::operator()(const SamplerHeapCacheEntry* a,
|
||||
const SamplerHeapCacheEntry* b) const {
|
||||
return a->mSamplers == b->mSamplers;
|
||||
}
|
||||
}} // namespace dawn_native::d3d12
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright 2020 The Dawn Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef DAWNNATIVE_D3D12_SAMPLERHEAPCACHE_H_
|
||||
#define DAWNNATIVE_D3D12_SAMPLERHEAPCACHE_H_
|
||||
|
||||
#include "common/RefCounted.h"
|
||||
#include "dawn_native/BindingInfo.h"
|
||||
#include "dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h"
|
||||
#include "dawn_native/d3d12/GPUDescriptorHeapAllocationD3D12.h"
|
||||
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
// |SamplerHeapCacheEntry| maintains a cache of sampler descriptor heap allocations.
|
||||
// Each entry represents one or more sampler descriptors that co-exist in a CPU and
|
||||
// GPU descriptor heap. The CPU-side allocation is deallocated once the final reference
|
||||
// has been released while the GPU-side allocation is deallocated when the GPU is finished.
|
||||
//
|
||||
// The BindGroupLayout hands out these entries upon constructing the bindgroup. If the entry is not
|
||||
// invalid, it will allocate and initialize so it may be reused by another bindgroup.
|
||||
//
|
||||
// The cache is primary needed for the GPU sampler heap, which is much smaller than the view heap
|
||||
// and switches incur expensive pipeline flushes.
|
||||
namespace dawn_native { namespace d3d12 {
|
||||
|
||||
class BindGroup;
|
||||
class Device;
|
||||
class Sampler;
|
||||
class SamplerHeapCache;
|
||||
class StagingDescriptorAllocator;
|
||||
class ShaderVisibleDescriptorAllocator;
|
||||
|
||||
// Wraps sampler descriptor heap allocations in a cache.
|
||||
class SamplerHeapCacheEntry : public RefCounted {
|
||||
public:
|
||||
SamplerHeapCacheEntry() = default;
|
||||
SamplerHeapCacheEntry(std::vector<Sampler*> samplers);
|
||||
SamplerHeapCacheEntry(SamplerHeapCache* cache,
|
||||
StagingDescriptorAllocator* allocator,
|
||||
std::vector<Sampler*> samplers,
|
||||
CPUDescriptorHeapAllocation allocation);
|
||||
~SamplerHeapCacheEntry() override;
|
||||
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE GetBaseDescriptor() const;
|
||||
|
||||
std::vector<Sampler*>&& AcquireSamplers();
|
||||
|
||||
bool Populate(Device* device, ShaderVisibleDescriptorAllocator* allocator);
|
||||
|
||||
// Functors necessary for the unordered_map<SamplerHeapCacheEntry*>-based cache.
|
||||
struct HashFunc {
|
||||
size_t operator()(const SamplerHeapCacheEntry* entry) const;
|
||||
};
|
||||
|
||||
struct EqualityFunc {
|
||||
bool operator()(const SamplerHeapCacheEntry* a, const SamplerHeapCacheEntry* b) const;
|
||||
};
|
||||
|
||||
private:
|
||||
CPUDescriptorHeapAllocation mCPUAllocation;
|
||||
GPUDescriptorHeapAllocation mGPUAllocation;
|
||||
|
||||
// Storing raw pointer because the sampler object will be already hashed
|
||||
// by the device and will already be unique.
|
||||
std::vector<Sampler*> mSamplers;
|
||||
|
||||
StagingDescriptorAllocator* mAllocator = nullptr;
|
||||
SamplerHeapCache* mCache = nullptr;
|
||||
};
|
||||
|
||||
// Cache descriptor heap allocations so that we don't create duplicate ones for every
|
||||
// BindGroup.
|
||||
class SamplerHeapCache {
|
||||
public:
|
||||
SamplerHeapCache(Device* device);
|
||||
~SamplerHeapCache();
|
||||
|
||||
ResultOrError<Ref<SamplerHeapCacheEntry>> GetOrCreate(
|
||||
const BindGroup* group,
|
||||
StagingDescriptorAllocator* samplerAllocator);
|
||||
|
||||
void RemoveCacheEntry(SamplerHeapCacheEntry* entry);
|
||||
|
||||
private:
|
||||
Device* mDevice;
|
||||
|
||||
using Cache = std::unordered_set<SamplerHeapCacheEntry*,
|
||||
SamplerHeapCacheEntry::HashFunc,
|
||||
SamplerHeapCacheEntry::EqualityFunc>;
|
||||
|
||||
Cache mCache;
|
||||
};
|
||||
|
||||
}} // namespace dawn_native::d3d12
|
||||
|
||||
#endif // DAWNNATIVE_D3D12_SAMPLERHEAPCACHE_H_
|
|
@ -115,12 +115,60 @@ class DummyStagingDescriptorAllocator {
|
|||
StagingDescriptorAllocator mAllocator;
|
||||
};
|
||||
|
||||
// Verify the shader visible sampler heap switch within a single submit.
|
||||
TEST_P(D3D12DescriptorHeapTests, SwitchOverSamplerHeap) {
|
||||
// Verify the shader visible view heaps switch over within a single submit.
|
||||
TEST_P(D3D12DescriptorHeapTests, SwitchOverViewHeap) {
|
||||
DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
|
||||
dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
|
||||
|
||||
utils::ComboRenderPipelineDescriptor renderPipelineDescriptor(device);
|
||||
|
||||
// Fill in a view heap with "view only" bindgroups (1x view per group) by creating a
|
||||
// view bindgroup each draw. After HEAP_SIZE + 1 draws, the heaps must switch over.
|
||||
renderPipelineDescriptor.vertexStage.module = mSimpleVSModule;
|
||||
renderPipelineDescriptor.cFragmentStage.module = mSimpleFSModule;
|
||||
|
||||
wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&renderPipelineDescriptor);
|
||||
utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, kRTSize, kRTSize);
|
||||
|
||||
Device* d3dDevice = reinterpret_cast<Device*>(device.Get());
|
||||
ShaderVisibleDescriptorAllocator* allocator =
|
||||
d3dDevice->GetViewShaderVisibleDescriptorAllocator();
|
||||
const uint64_t heapSize = allocator->GetShaderVisibleHeapSizeForTesting();
|
||||
|
||||
const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
|
||||
|
||||
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||
{
|
||||
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
|
||||
|
||||
pass.SetPipeline(renderPipeline);
|
||||
|
||||
std::array<float, 4> redColor = {1, 0, 0, 1};
|
||||
wgpu::Buffer uniformBuffer = utils::CreateBufferFromData(
|
||||
device, &redColor, sizeof(redColor), wgpu::BufferUsage::Uniform);
|
||||
|
||||
for (uint32_t i = 0; i < heapSize + 1; ++i) {
|
||||
pass.SetBindGroup(0, utils::MakeBindGroup(device, renderPipeline.GetBindGroupLayout(0),
|
||||
{{0, uniformBuffer, 0, sizeof(redColor)}}));
|
||||
pass.Draw(3);
|
||||
}
|
||||
|
||||
pass.EndPass();
|
||||
}
|
||||
|
||||
wgpu::CommandBuffer commands = encoder.Finish();
|
||||
queue.Submit(1, &commands);
|
||||
|
||||
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + 1);
|
||||
}
|
||||
|
||||
// Verify the shader visible sampler heaps does not switch over within a single submit.
|
||||
TEST_P(D3D12DescriptorHeapTests, NoSwitchOverSamplerHeap) {
|
||||
utils::ComboRenderPipelineDescriptor renderPipelineDescriptor(device);
|
||||
|
||||
// Fill in a sampler heap with "sampler only" bindgroups (1x sampler per group) by creating a
|
||||
// sampler bindgroup each draw. After HEAP_SIZE + 1 draws, the heaps must switch over.
|
||||
// sampler bindgroup each draw. After HEAP_SIZE + 1 draws, the heaps WILL NOT switch over
|
||||
// because the sampler heap allocations are de-duplicated.
|
||||
renderPipelineDescriptor.vertexStage.module =
|
||||
utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"(
|
||||
#version 450
|
||||
|
@ -167,7 +215,7 @@ TEST_P(D3D12DescriptorHeapTests, SwitchOverSamplerHeap) {
|
|||
wgpu::CommandBuffer commands = encoder.Finish();
|
||||
queue.Submit(1, &commands);
|
||||
|
||||
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + 1);
|
||||
EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial);
|
||||
}
|
||||
|
||||
// Verify shader-visible heaps can be recycled for multiple submits.
|
||||
|
@ -727,13 +775,8 @@ TEST_P(D3D12DescriptorHeapTests, EncodeManyUBOAndSamplers) {
|
|||
EXPECT_EQ(viewAllocator->GetShaderVisibleHeapSerialForTesting(),
|
||||
viewHeapSerial + kNumOfViewHeaps);
|
||||
|
||||
const uint32_t numOfSamplerHeaps =
|
||||
numOfEncodedBindGroups /
|
||||
samplerAllocator->GetShaderVisibleHeapSizeForTesting(); // 1 sampler per group.
|
||||
|
||||
EXPECT_EQ(samplerAllocator->GetShaderVisiblePoolSizeForTesting(), numOfSamplerHeaps);
|
||||
EXPECT_EQ(samplerAllocator->GetShaderVisibleHeapSerialForTesting(),
|
||||
samplerHeapSerial + numOfSamplerHeaps);
|
||||
EXPECT_EQ(samplerAllocator->GetShaderVisiblePoolSizeForTesting(), 0u);
|
||||
EXPECT_EQ(samplerAllocator->GetShaderVisibleHeapSerialForTesting(), samplerHeapSerial);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue