Reland "D3D12: Stage BindGroups on CPU descriptor heaps."

This reverts commit c7f454c241
and relands commit 2479860e4b.

> D3D12: Stage BindGroups on CPU descriptor heaps.
>
> Instead of directly populating GPU heaps, pre-encoded
> BindGroups are staged on CPU heaps then copied over
> to the GPU. Non-shader visible allocators are stored
> on the BGL, which hands out fixed-size chunks to
> simplify memory managment. To enable memory re-use,
> CPU allocations are tied to the lifetime of BindGroup
> objects.

Reason for revert: We can reland this CL now that the CTS suppression merged.

Note: Adds validation to ensure binding size > 0.

Bug: dawn:155
Bug: dawn:375
Change-Id: I75b9773bbb7c70bcea803a7ad8b6480d21ea90f7
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/18904
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Kai Ninomiya <kainino@chromium.org>
This commit is contained in:
Bryan Bernhart 2020-04-06 22:07:42 +00:00 committed by Commit Bot service account
parent c64242d4c2
commit cb859a2037
16 changed files with 694 additions and 82 deletions

View File

@ -291,6 +291,8 @@ source_set("libdawn_native_sources") {
"src/dawn_native/d3d12/BindGroupLayoutD3D12.h",
"src/dawn_native/d3d12/BufferD3D12.cpp",
"src/dawn_native/d3d12/BufferD3D12.h",
"src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.cpp",
"src/dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h",
"src/dawn_native/d3d12/CommandAllocatorManager.cpp",
"src/dawn_native/d3d12/CommandAllocatorManager.h",
"src/dawn_native/d3d12/CommandBufferD3D12.cpp",
@ -316,6 +318,8 @@ source_set("libdawn_native_sources") {
"src/dawn_native/d3d12/HeapD3D12.h",
"src/dawn_native/d3d12/NativeSwapChainImplD3D12.cpp",
"src/dawn_native/d3d12/NativeSwapChainImplD3D12.h",
"src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.cpp",
"src/dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h",
"src/dawn_native/d3d12/PipelineLayoutD3D12.cpp",
"src/dawn_native/d3d12/PipelineLayoutD3D12.h",
"src/dawn_native/d3d12/PlatformFunctions.cpp",

View File

@ -19,6 +19,7 @@
#include <algorithm>
#include <cmath>
#include <limits>
#if defined(DAWN_COMPILER_MSVC)
# include <intrin.h>
@ -152,3 +153,10 @@ float SRGBToLinear(float srgb) {
return std::pow((srgb + 0.055f) / 1.055f, 2.4f);
}
}
uint64_t RoundUp(uint64_t n, uint64_t m) {
ASSERT(m > 0);
ASSERT(n > 0);
ASSERT(m <= std::numeric_limits<uint64_t>::max() - n);
return ((n + m - 1) / m) * m;
}

View File

@ -29,6 +29,7 @@ uint32_t ScanForward(uint32_t bits);
uint32_t Log2(uint32_t value);
uint32_t Log2(uint64_t value);
bool IsPowerOfTwo(uint64_t n);
uint64_t RoundUp(uint64_t n, uint64_t m);
uint64_t NextPowerOfTwo(uint64_t n);
bool IsPtrAligned(const void* ptr, size_t alignment);

View File

@ -43,6 +43,10 @@ namespace dawn_native {
return DAWN_VALIDATION_ERROR("Buffer binding size larger than the buffer");
}
if (bindingSize == 0) {
return DAWN_VALIDATION_ERROR("Buffer binding size cannot be zero.");
}
// Note that no overflow can happen because we already checked that
// bufferSize >= bindingSize
if (binding.offset > bufferSize - bindingSize) {

View File

@ -164,6 +164,8 @@ if (DAWN_ENABLE_D3D12)
"d3d12/BindGroupLayoutD3D12.h"
"d3d12/BufferD3D12.cpp"
"d3d12/BufferD3D12.h"
"d3d12/CPUDescriptorHeapAllocationD3D12.cpp"
"d3d12/CPUDescriptorHeapAllocationD3D12.h"
"d3d12/CommandAllocatorManager.cpp"
"d3d12/CommandAllocatorManager.h"
"d3d12/CommandBufferD3D12.cpp"
@ -189,6 +191,8 @@ if (DAWN_ENABLE_D3D12)
"d3d12/HeapD3D12.h"
"d3d12/NativeSwapChainImplD3D12.cpp"
"d3d12/NativeSwapChainImplD3D12.h"
"d3d12/NonShaderVisibleDescriptorAllocatorD3D12.cpp"
"d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h"
"d3d12/PipelineLayoutD3D12.cpp"
"d3d12/PipelineLayoutD3D12.h"
"d3d12/PlatformFunctions.cpp"

View File

@ -25,76 +25,37 @@
namespace dawn_native { namespace d3d12 {
// static
BindGroup* BindGroup::Create(Device* device, const BindGroupDescriptor* descriptor) {
ResultOrError<BindGroup*> BindGroup::Create(Device* device,
const BindGroupDescriptor* descriptor) {
return ToBackend(descriptor->layout)->AllocateBindGroup(device, descriptor);
}
BindGroup::BindGroup(Device* device, const BindGroupDescriptor* descriptor)
BindGroup::BindGroup(Device* device,
const BindGroupDescriptor* descriptor,
uint32_t viewSizeIncrement,
const CPUDescriptorHeapAllocation& viewAllocation,
uint32_t samplerSizeIncrement,
const CPUDescriptorHeapAllocation& samplerAllocation)
: BindGroupBase(this, device, descriptor) {
}
BindGroupLayout* bgl = ToBackend(GetLayout());
BindGroup::~BindGroup() {
ToBackend(GetLayout())->DeallocateBindGroup(this);
}
ResultOrError<bool> BindGroup::Populate(ShaderVisibleDescriptorAllocator* allocator) {
Device* device = ToBackend(GetDevice());
if (allocator->IsAllocationStillValid(mLastUsageSerial, mHeapSerial)) {
return true;
}
// Attempt to allocate descriptors for the currently bound shader-visible heaps.
// If either failed, return early to re-allocate and switch the heaps.
const BindGroupLayout* bgl = ToBackend(GetLayout());
const Serial pendingSerial = device->GetPendingCommandSerial();
const uint32_t cbvUavSrvDescriptorCount = bgl->GetCbvUavSrvDescriptorCount();
DescriptorHeapAllocation cbvSrvUavDescriptorHeapAllocation;
if (cbvUavSrvDescriptorCount > 0) {
DAWN_TRY_ASSIGN(
cbvSrvUavDescriptorHeapAllocation,
allocator->AllocateGPUDescriptors(cbvUavSrvDescriptorCount, pendingSerial,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV));
if (cbvSrvUavDescriptorHeapAllocation.IsInvalid()) {
return false;
}
mBaseCbvSrvUavDescriptor = cbvSrvUavDescriptorHeapAllocation.GetGPUHandle(0);
}
const uint32_t samplerDescriptorCount = bgl->GetSamplerDescriptorCount();
DescriptorHeapAllocation samplerDescriptorHeapAllocation;
if (samplerDescriptorCount > 0) {
DAWN_TRY_ASSIGN(samplerDescriptorHeapAllocation,
allocator->AllocateGPUDescriptors(samplerDescriptorCount, pendingSerial,
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER));
if (samplerDescriptorHeapAllocation.IsInvalid()) {
return false;
}
mBaseSamplerDescriptor = samplerDescriptorHeapAllocation.GetGPUHandle(0);
}
// Record both the device and heap serials to determine later if the allocations are still
// valid.
mLastUsageSerial = pendingSerial;
mHeapSerial = allocator->GetShaderVisibleHeapsSerial();
mCPUViewAllocation = viewAllocation;
mCPUSamplerAllocation = samplerAllocation;
const auto& bindingOffsets = bgl->GetBindingOffsets();
ID3D12Device* d3d12Device = device->GetD3D12Device().Get();
for (BindingIndex bindingIndex = 0; bindingIndex < bgl->GetBindingCount(); ++bindingIndex) {
// It's not necessary to create descriptors in the descriptor heap for dynamic resources.
// This is because they are created as root descriptors which are never heap allocated.
// Since dynamic buffers are packed in the front, we can skip over these bindings by
// starting from the dynamic buffer count.
for (BindingIndex bindingIndex = bgl->GetDynamicBufferCount();
bindingIndex < bgl->GetBindingCount(); ++bindingIndex) {
const BindingInfo& bindingInfo = bgl->GetBindingInfo(bindingIndex);
// It's not necessary to create descriptors in descriptor heap for dynamic
// resources. So skip allocating descriptors in descriptor heaps for dynamic
// buffers.
if (bindingInfo.hasDynamicOffset) {
continue;
}
// Increment size does not need to be stored and is only used to get a handle
// local to the allocation with OffsetFrom().
switch (bindingInfo.type) {
case wgpu::BindingType::UniformBuffer: {
BufferBinding binding = GetBindingAsBufferBinding(bindingIndex);
@ -106,8 +67,8 @@ namespace dawn_native { namespace d3d12 {
desc.BufferLocation = ToBackend(binding.buffer)->GetVA() + binding.offset;
d3d12Device->CreateConstantBufferView(
&desc, cbvSrvUavDescriptorHeapAllocation.GetCPUHandle(
bindingOffsets[bindingIndex]));
&desc,
viewAllocation.OffsetFrom(viewSizeIncrement, bindingOffsets[bindingIndex]));
break;
}
case wgpu::BindingType::StorageBuffer: {
@ -131,8 +92,7 @@ namespace dawn_native { namespace d3d12 {
d3d12Device->CreateUnorderedAccessView(
ToBackend(binding.buffer)->GetD3D12Resource().Get(), nullptr, &desc,
cbvSrvUavDescriptorHeapAllocation.GetCPUHandle(
bindingOffsets[bindingIndex]));
viewAllocation.OffsetFrom(viewSizeIncrement, bindingOffsets[bindingIndex]));
break;
}
case wgpu::BindingType::ReadonlyStorageBuffer: {
@ -152,8 +112,7 @@ namespace dawn_native { namespace d3d12 {
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
d3d12Device->CreateShaderResourceView(
ToBackend(binding.buffer)->GetD3D12Resource().Get(), &desc,
cbvSrvUavDescriptorHeapAllocation.GetCPUHandle(
bindingOffsets[bindingIndex]));
viewAllocation.OffsetFrom(viewSizeIncrement, bindingOffsets[bindingIndex]));
break;
}
case wgpu::BindingType::SampledTexture: {
@ -161,16 +120,15 @@ namespace dawn_native { namespace d3d12 {
auto& srv = view->GetSRVDescriptor();
d3d12Device->CreateShaderResourceView(
ToBackend(view->GetTexture())->GetD3D12Resource(), &srv,
cbvSrvUavDescriptorHeapAllocation.GetCPUHandle(
bindingOffsets[bindingIndex]));
viewAllocation.OffsetFrom(viewSizeIncrement, bindingOffsets[bindingIndex]));
break;
}
case wgpu::BindingType::Sampler: {
auto* sampler = ToBackend(GetBindingAsSampler(bindingIndex));
auto& samplerDesc = sampler->GetSamplerDescriptor();
d3d12Device->CreateSampler(
&samplerDesc,
samplerDescriptorHeapAllocation.GetCPUHandle(bindingOffsets[bindingIndex]));
&samplerDesc, samplerAllocation.OffsetFrom(samplerSizeIncrement,
bindingOffsets[bindingIndex]));
break;
}
@ -183,12 +141,77 @@ namespace dawn_native { namespace d3d12 {
// TODO(shaobo.yan@intel.com): Implement dynamic buffer offset.
}
}
}
BindGroup::~BindGroup() {
ToBackend(GetLayout())
->DeallocateBindGroup(this, &mCPUViewAllocation, &mCPUSamplerAllocation);
ASSERT(!mCPUViewAllocation.IsValid());
ASSERT(!mCPUSamplerAllocation.IsValid());
}
ResultOrError<bool> BindGroup::Populate(ShaderVisibleDescriptorAllocator* allocator) {
Device* device = ToBackend(GetDevice());
if (allocator->IsAllocationStillValid(mLastUsageSerial, mHeapSerial)) {
return true;
}
// Attempt to allocate descriptors for the currently bound shader-visible heaps.
// If either failed, return early to re-allocate and switch the heaps.
const BindGroupLayout* bgl = ToBackend(GetLayout());
const Serial pendingSerial = device->GetPendingCommandSerial();
ID3D12Device* d3d12Device = device->GetD3D12Device().Get();
// CPU bindgroups are sparsely allocated across CPU heaps. Instead of doing
// simple copies per bindgroup, a single non-simple copy could be issued.
// TODO(dawn:155): Consider doing this optimization.
const uint32_t viewDescriptorCount = bgl->GetCbvUavSrvDescriptorCount();
if (viewDescriptorCount > 0) {
DescriptorHeapAllocation viewDescriptorHeapAllocation;
DAWN_TRY_ASSIGN(
viewDescriptorHeapAllocation,
allocator->AllocateGPUDescriptors(viewDescriptorCount, pendingSerial,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV));
if (viewDescriptorHeapAllocation.IsInvalid()) {
return false;
}
d3d12Device->CopyDescriptorsSimple(
viewDescriptorCount, viewDescriptorHeapAllocation.GetCPUHandle(0),
mCPUViewAllocation.OffsetFrom(0, 0), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
mBaseViewDescriptor = viewDescriptorHeapAllocation.GetGPUHandle(0);
}
const uint32_t samplerDescriptorCount = bgl->GetSamplerDescriptorCount();
if (samplerDescriptorCount > 0) {
DescriptorHeapAllocation samplerDescriptorHeapAllocation;
DAWN_TRY_ASSIGN(samplerDescriptorHeapAllocation,
allocator->AllocateGPUDescriptors(samplerDescriptorCount, pendingSerial,
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER));
if (samplerDescriptorHeapAllocation.IsInvalid()) {
return false;
}
d3d12Device->CopyDescriptorsSimple(
samplerDescriptorCount, samplerDescriptorHeapAllocation.GetCPUHandle(0),
mCPUSamplerAllocation.OffsetFrom(0, 0), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
mBaseSamplerDescriptor = samplerDescriptorHeapAllocation.GetGPUHandle(0);
}
// Record both the device and heap serials to determine later if the allocations are still
// valid.
mLastUsageSerial = pendingSerial;
mHeapSerial = allocator->GetShaderVisibleHeapsSerial();
return true;
}
D3D12_GPU_DESCRIPTOR_HANDLE BindGroup::GetBaseCbvUavSrvDescriptor() const {
return mBaseCbvSrvUavDescriptor;
return mBaseViewDescriptor;
}
D3D12_GPU_DESCRIPTOR_HANDLE BindGroup::GetBaseSamplerDescriptor() const {

View File

@ -18,7 +18,7 @@
#include "common/PlacementAllocated.h"
#include "common/Serial.h"
#include "dawn_native/BindGroup.h"
#include "dawn_native/d3d12/d3d12_platform.h"
#include "dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h"
namespace dawn_native { namespace d3d12 {
@ -27,9 +27,15 @@ namespace dawn_native { namespace d3d12 {
class BindGroup final : public BindGroupBase, public PlacementAllocated {
public:
static BindGroup* Create(Device* device, const BindGroupDescriptor* descriptor);
static ResultOrError<BindGroup*> Create(Device* device,
const BindGroupDescriptor* descriptor);
BindGroup(Device* device, const BindGroupDescriptor* descriptor);
BindGroup(Device* device,
const BindGroupDescriptor* descriptor,
uint32_t viewSizeIncrement,
const CPUDescriptorHeapAllocation& viewAllocation,
uint32_t samplerSizeIncrement,
const CPUDescriptorHeapAllocation& samplerAllocation);
// Returns true if the BindGroup was successfully populated.
ResultOrError<bool> Populate(ShaderVisibleDescriptorAllocator* allocator);
@ -43,8 +49,11 @@ namespace dawn_native { namespace d3d12 {
Serial mLastUsageSerial = 0;
Serial mHeapSerial = 0;
D3D12_GPU_DESCRIPTOR_HANDLE mBaseCbvSrvUavDescriptor = {0};
D3D12_GPU_DESCRIPTOR_HANDLE mBaseViewDescriptor = {0};
D3D12_GPU_DESCRIPTOR_HANDLE mBaseSamplerDescriptor = {0};
CPUDescriptorHeapAllocation mCPUSamplerAllocation;
CPUDescriptorHeapAllocation mCPUViewAllocation;
};
}} // namespace dawn_native::d3d12

View File

@ -17,6 +17,7 @@
#include "common/BitSetIterator.h"
#include "dawn_native/d3d12/BindGroupD3D12.h"
#include "dawn_native/d3d12/DeviceD3D12.h"
#include "dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h"
namespace dawn_native { namespace d3d12 {
namespace {
@ -41,6 +42,9 @@ namespace dawn_native { namespace d3d12 {
}
} // anonymous namespace
// TODO(dawn:155): Figure out this value.
static constexpr uint16_t kDescriptorHeapSize = 1024;
BindGroupLayout::BindGroupLayout(Device* device, const BindGroupLayoutDescriptor* descriptor)
: BindGroupLayoutBase(device, descriptor),
mDescriptorCounts{},
@ -128,14 +132,54 @@ namespace dawn_native { namespace d3d12 {
DescriptorType descriptorType = WGPUBindingTypeToDescriptorType(bindingInfo.type);
mBindingOffsets[bindingIndex] += descriptorOffsets[descriptorType];
}
const uint32_t viewDescriptorCount = GetCbvUavSrvDescriptorCount();
if (viewDescriptorCount > 0) {
mViewAllocator = std::make_unique<NonShaderVisibleDescriptorAllocator>(
device, viewDescriptorCount, kDescriptorHeapSize,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
BindGroup* BindGroupLayout::AllocateBindGroup(Device* device,
const uint32_t samplerDescriptorCount = GetSamplerDescriptorCount();
if (samplerDescriptorCount > 0) {
mSamplerAllocator = std::make_unique<NonShaderVisibleDescriptorAllocator>(
device, samplerDescriptorCount, kDescriptorHeapSize,
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
}
}
ResultOrError<BindGroup*> BindGroupLayout::AllocateBindGroup(
Device* device,
const BindGroupDescriptor* descriptor) {
return mBindGroupAllocator.Allocate(device, descriptor);
uint32_t viewSizeIncrement = 0;
CPUDescriptorHeapAllocation viewAllocation;
if (GetCbvUavSrvDescriptorCount() > 0) {
DAWN_TRY_ASSIGN(viewAllocation, mViewAllocator->AllocateCPUDescriptors());
viewSizeIncrement = mViewAllocator->GetSizeIncrement();
}
uint32_t samplerSizeIncrement = 0;
CPUDescriptorHeapAllocation samplerAllocation;
if (GetSamplerDescriptorCount() > 0) {
DAWN_TRY_ASSIGN(samplerAllocation, mSamplerAllocator->AllocateCPUDescriptors());
samplerSizeIncrement = mSamplerAllocator->GetSizeIncrement();
}
return mBindGroupAllocator.Allocate(device, descriptor, viewSizeIncrement, viewAllocation,
samplerSizeIncrement, samplerAllocation);
}
void BindGroupLayout::DeallocateBindGroup(BindGroup* bindGroup,
CPUDescriptorHeapAllocation* viewAllocation,
CPUDescriptorHeapAllocation* samplerAllocation) {
if (viewAllocation->IsValid()) {
mViewAllocator->Deallocate(viewAllocation);
}
if (samplerAllocation->IsValid()) {
mSamplerAllocator->Deallocate(samplerAllocation);
}
void BindGroupLayout::DeallocateBindGroup(BindGroup* bindGroup) {
mBindGroupAllocator.Deallocate(bindGroup);
}

View File

@ -24,13 +24,18 @@ namespace dawn_native { namespace d3d12 {
class BindGroup;
class Device;
class NonShaderVisibleDescriptorAllocator;
class CPUDescriptorHeapAllocation;
class BindGroupLayout final : public BindGroupLayoutBase {
public:
BindGroupLayout(Device* device, const BindGroupLayoutDescriptor* descriptor);
BindGroup* AllocateBindGroup(Device* device, const BindGroupDescriptor* descriptor);
void DeallocateBindGroup(BindGroup* bindGroup);
ResultOrError<BindGroup*> AllocateBindGroup(Device* device,
const BindGroupDescriptor* descriptor);
void DeallocateBindGroup(BindGroup* bindGroup,
CPUDescriptorHeapAllocation* viewAllocation,
CPUDescriptorHeapAllocation* samplerAllocation);
enum DescriptorType {
CBV,
@ -55,6 +60,10 @@ namespace dawn_native { namespace d3d12 {
D3D12_DESCRIPTOR_RANGE mRanges[DescriptorType::Count];
SlabAllocator<BindGroup> mBindGroupAllocator;
// TODO(dawn:155): Store and bucket allocators by size on the device.
std::unique_ptr<NonShaderVisibleDescriptorAllocator> mSamplerAllocator;
std::unique_ptr<NonShaderVisibleDescriptorAllocator> mViewAllocator;
};
}} // namespace dawn_native::d3d12

View File

@ -0,0 +1,48 @@
// Copyright 2020 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h"
#include "dawn_native/Error.h"
namespace dawn_native { namespace d3d12 {
CPUDescriptorHeapAllocation::CPUDescriptorHeapAllocation(
D3D12_CPU_DESCRIPTOR_HANDLE baseDescriptor,
uint32_t heapIndex)
: mBaseDescriptor(baseDescriptor), mHeapIndex(heapIndex) {
}
D3D12_CPU_DESCRIPTOR_HANDLE CPUDescriptorHeapAllocation::OffsetFrom(
uint32_t sizeIncrementInBytes,
uint32_t offsetInDescriptorCount) const {
ASSERT(IsValid());
D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = mBaseDescriptor;
cpuHandle.ptr += sizeIncrementInBytes * offsetInDescriptorCount;
return cpuHandle;
}
uint32_t CPUDescriptorHeapAllocation::GetHeapIndex() const {
ASSERT(mHeapIndex >= 0);
return mHeapIndex;
}
bool CPUDescriptorHeapAllocation::IsValid() const {
return mBaseDescriptor.ptr != 0;
}
void CPUDescriptorHeapAllocation::Invalidate() {
mBaseDescriptor = {0};
}
}} // namespace dawn_native::d3d12

View File

@ -0,0 +1,45 @@
// Copyright 2020 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DAWNNATIVE_D3D12_CPUDESCRIPTORHEAPALLOCATION_H_
#define DAWNNATIVE_D3D12_CPUDESCRIPTORHEAPALLOCATION_H_
#include <cstdint>
#include "dawn_native/d3d12/d3d12_platform.h"
namespace dawn_native { namespace d3d12 {
// Wrapper for a handle into a CPU-only descriptor heap.
class CPUDescriptorHeapAllocation {
public:
CPUDescriptorHeapAllocation() = default;
CPUDescriptorHeapAllocation(D3D12_CPU_DESCRIPTOR_HANDLE baseDescriptor, uint32_t heapIndex);
D3D12_CPU_DESCRIPTOR_HANDLE OffsetFrom(uint32_t sizeIncrementInBytes,
uint32_t offsetInDescriptorCount) const;
uint32_t GetHeapIndex() const;
bool IsValid() const;
void Invalidate();
private:
D3D12_CPU_DESCRIPTOR_HANDLE mBaseDescriptor = {0};
uint32_t mHeapIndex = -1;
};
}} // namespace dawn_native::d3d12
#endif // DAWNNATIVE_D3D12_CPUDESCRIPTORHEAPALLOCATION_H_

View File

@ -0,0 +1,137 @@
// Copyright 2020 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "common/Math.h"
#include "dawn_native/d3d12/D3D12Error.h"
#include "dawn_native/d3d12/DeviceD3D12.h"
#include "dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h"
namespace dawn_native { namespace d3d12 {
NonShaderVisibleDescriptorAllocator::NonShaderVisibleDescriptorAllocator(
Device* device,
uint32_t descriptorCount,
uint32_t heapSize,
D3D12_DESCRIPTOR_HEAP_TYPE heapType)
: mDevice(device),
mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)),
mBlockSize(descriptorCount * mSizeIncrement),
mHeapSize(RoundUp(heapSize, descriptorCount)),
mHeapType(heapType) {
ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
ASSERT(descriptorCount <= heapSize);
}
NonShaderVisibleDescriptorAllocator::~NonShaderVisibleDescriptorAllocator() {
const Index freeBlockIndicesSize = GetFreeBlockIndicesSize();
for (auto& buffer : mPool) {
ASSERT(buffer.freeBlockIndices.size() == freeBlockIndicesSize);
}
ASSERT(mAvailableHeaps.size() == mPool.size());
}
ResultOrError<CPUDescriptorHeapAllocation>
NonShaderVisibleDescriptorAllocator::AllocateCPUDescriptors() {
if (mAvailableHeaps.empty()) {
DAWN_TRY(AllocateCPUHeap());
}
ASSERT(!mAvailableHeaps.empty());
const uint32_t heapIndex = mAvailableHeaps.back();
NonShaderVisibleBuffer& buffer = mPool[heapIndex];
ASSERT(!buffer.freeBlockIndices.empty());
const Index blockIndex = buffer.freeBlockIndices.back();
buffer.freeBlockIndices.pop_back();
if (buffer.freeBlockIndices.empty()) {
mAvailableHeaps.pop_back();
}
const D3D12_CPU_DESCRIPTOR_HANDLE baseCPUDescriptor = {
buffer.heap->GetCPUDescriptorHandleForHeapStart().ptr + (blockIndex * mBlockSize)};
return CPUDescriptorHeapAllocation{baseCPUDescriptor, heapIndex};
}
MaybeError NonShaderVisibleDescriptorAllocator::AllocateCPUHeap() {
D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
heapDescriptor.Type = mHeapType;
heapDescriptor.NumDescriptors = mHeapSize;
heapDescriptor.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
heapDescriptor.NodeMask = 0;
ComPtr<ID3D12DescriptorHeap> heap;
DAWN_TRY(CheckHRESULT(
mDevice->GetD3D12Device()->CreateDescriptorHeap(&heapDescriptor, IID_PPV_ARGS(&heap)),
"ID3D12Device::CreateDescriptorHeap"));
NonShaderVisibleBuffer newBuffer;
newBuffer.heap = std::move(heap);
const Index freeBlockIndicesSize = GetFreeBlockIndicesSize();
newBuffer.freeBlockIndices.reserve(freeBlockIndicesSize);
for (Index blockIndex = 0; blockIndex < freeBlockIndicesSize; blockIndex++) {
newBuffer.freeBlockIndices.push_back(blockIndex);
}
mAvailableHeaps.push_back(mPool.size());
mPool.emplace_back(std::move(newBuffer));
return {};
}
void NonShaderVisibleDescriptorAllocator::Deallocate(CPUDescriptorHeapAllocation* allocation) {
const uint32_t heapIndex = allocation->GetHeapIndex();
ASSERT(heapIndex < mPool.size());
// Insert the deallocated block back into the free-list. Order does not matter. However,
// having blocks be non-contigious could slow down future allocations due to poor cache
// locality.
// TODO(dawn:155): Consider more optimization.
std::vector<Index>& freeBlockIndices = mPool[heapIndex].freeBlockIndices;
if (freeBlockIndices.empty()) {
mAvailableHeaps.emplace_back(heapIndex);
}
const D3D12_CPU_DESCRIPTOR_HANDLE heapStart =
mPool[heapIndex].heap->GetCPUDescriptorHandleForHeapStart();
const D3D12_CPU_DESCRIPTOR_HANDLE baseDescriptor = allocation->OffsetFrom(0, 0);
const Index blockIndex = (baseDescriptor.ptr - heapStart.ptr) / mBlockSize;
freeBlockIndices.emplace_back(blockIndex);
// Invalidate the handle in case the developer accidentally uses it again.
allocation->Invalidate();
}
uint32_t NonShaderVisibleDescriptorAllocator::GetSizeIncrement() const {
return mSizeIncrement;
}
NonShaderVisibleDescriptorAllocator::Index
NonShaderVisibleDescriptorAllocator::GetFreeBlockIndicesSize() const {
return ((mHeapSize * mSizeIncrement) / mBlockSize);
}
}} // namespace dawn_native::d3d12

View File

@ -0,0 +1,78 @@
// Copyright 2020 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DAWNNATIVE_D3D12_NONSHADERVISIBLEDESCRIPTORALLOCATOR_H_
#define DAWNNATIVE_D3D12_NONSHADERVISIBLEDESCRIPTORALLOCATOR_H_
#include "dawn_native/Error.h"
#include "dawn_native/d3d12/CPUDescriptorHeapAllocationD3D12.h"
#include <vector>
// |NonShaderVisibleDescriptorAllocator| allocates a fixed-size block of descriptors from a CPU
// descriptor heap pool.
// Internally, it manages a list of heaps using a fixed-size block allocator. The fixed-size
// block allocator is backed by a list of free blocks (free-list). The heap is in one of two
// states: AVAILABLE or not. To allocate, the next free block is removed from the free-list
// and the corresponding heap offset is returned. The AVAILABLE heap always has room for
// at-least one free block. If no AVAILABLE heap exists, a new heap is created and inserted
// back into the pool to be immediately used. To deallocate, the block corresponding to the
// offset is inserted back into the free-list.
namespace dawn_native { namespace d3d12 {
class Device;
class NonShaderVisibleDescriptorAllocator {
public:
NonShaderVisibleDescriptorAllocator() = default;
NonShaderVisibleDescriptorAllocator(Device* device,
uint32_t descriptorCount,
uint32_t heapSize,
D3D12_DESCRIPTOR_HEAP_TYPE heapType);
~NonShaderVisibleDescriptorAllocator();
ResultOrError<CPUDescriptorHeapAllocation> AllocateCPUDescriptors();
void Deallocate(CPUDescriptorHeapAllocation* allocation);
uint32_t GetSizeIncrement() const;
private:
using Index = uint16_t;
struct NonShaderVisibleBuffer {
ComPtr<ID3D12DescriptorHeap> heap;
std::vector<Index> freeBlockIndices;
};
MaybeError AllocateCPUHeap();
Index GetFreeBlockIndicesSize() const;
std::vector<uint32_t> mAvailableHeaps; // Indices into the pool.
std::vector<NonShaderVisibleBuffer> mPool;
Device* mDevice;
uint32_t mSizeIncrement; // Size of the descriptor (in bytes).
uint32_t mBlockSize; // Size of the block of descriptors (in bytes).
uint32_t mHeapSize; // Size of the heap (in number of descriptors).
D3D12_DESCRIPTOR_HEAP_TYPE mHeapType;
};
}} // namespace dawn_native::d3d12
#endif // DAWNNATIVE_D3D12_NONSHADERVISIBLEDESCRIPTORALLOCATOR_H_

View File

@ -82,7 +82,7 @@ TEST(Math, AlignPtr) {
ASSERT_GE(aligned - unaligned, 0);
ASSERT_LT(static_cast<size_t>(aligned - unaligned), kTestAlignment);
ASSERT_EQ(reinterpret_cast<uintptr_t>(aligned) & (kTestAlignment -1), 0u);
ASSERT_EQ(reinterpret_cast<uintptr_t>(aligned) & (kTestAlignment - 1), 0u);
}
}
@ -191,3 +191,21 @@ TEST(Math, SRGBToLinear) {
ASSERT_FLOAT_EQ(SRGBToLinear(0.5f), 0.21404114f);
}
// Tests for RoundUp
TEST(Math, RoundUp) {
ASSERT_EQ(RoundUp(2, 2), 2u);
ASSERT_EQ(RoundUp(2, 4), 4u);
ASSERT_EQ(RoundUp(6, 2), 6u);
ASSERT_EQ(RoundUp(8, 4), 8u);
ASSERT_EQ(RoundUp(12, 6), 12u);
ASSERT_EQ(RoundUp(3, 3), 3u);
ASSERT_EQ(RoundUp(3, 5), 5u);
ASSERT_EQ(RoundUp(5, 3), 6u);
ASSERT_EQ(RoundUp(9, 5), 10u);
// Test extrema
ASSERT_EQ(RoundUp(0x7FFFFFFFFFFFFFFFull, 0x8000000000000000ull), 0x8000000000000000ull);
ASSERT_EQ(RoundUp(1, 1), 1u);
}

View File

@ -240,7 +240,7 @@ TEST_F(BindGroupValidationTest, BufferBindingType) {
binding.textureView = nullptr;
binding.buffer = nullptr;
binding.offset = 0;
binding.size = 0;
binding.size = 1024;
wgpu::BindGroupDescriptor descriptor;
descriptor.layout = layout;
@ -421,7 +421,9 @@ TEST_F(BindGroupValidationTest, BufferBindingOOB) {
// Success case, touching the end of the buffer works
utils::MakeBindGroup(device, layout, {{0, buffer, 3*256, 256}});
utils::MakeBindGroup(device, layout, {{0, buffer, 1024, 0}});
// Error case, zero size is invalid.
ASSERT_DEVICE_ERROR(utils::MakeBindGroup(device, layout, {{0, buffer, 1024, 0}}));
// Success case, touching the full buffer works
utils::MakeBindGroup(device, layout, {{0, buffer, 0, 1024}});

View File

@ -15,7 +15,9 @@
#include "tests/DawnTest.h"
#include "dawn_native/Toggles.h"
#include "dawn_native/d3d12/BindGroupLayoutD3D12.h"
#include "dawn_native/d3d12/DeviceD3D12.h"
#include "dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h"
#include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
#include "utils/ComboRenderPipelineDescriptor.h"
#include "utils/WGPUHelpers.h"
@ -93,6 +95,31 @@ class D3D12DescriptorHeapTests : public DawnTest {
wgpu::ShaderModule mSimpleFSModule;
};
class DummyNonShaderVisibleDescriptorAllocator {
public:
DummyNonShaderVisibleDescriptorAllocator(Device* device,
uint32_t descriptorCount,
uint32_t allocationsPerHeap)
: mAllocator(device,
descriptorCount,
allocationsPerHeap * descriptorCount,
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) {
}
CPUDescriptorHeapAllocation AllocateCPUDescriptors() {
dawn_native::ResultOrError<CPUDescriptorHeapAllocation> result =
mAllocator.AllocateCPUDescriptors();
return (result.IsSuccess()) ? result.AcquireSuccess() : CPUDescriptorHeapAllocation{};
}
void Deallocate(CPUDescriptorHeapAllocation& allocation) {
mAllocator.Deallocate(&allocation);
}
private:
NonShaderVisibleDescriptorAllocator mAllocator;
};
// Verify the shader visible heaps switch over within a single submit.
TEST_P(D3D12DescriptorHeapTests, SwitchOverHeaps) {
utils::ComboRenderPipelineDescriptor renderPipelineDescriptor(device);
@ -688,6 +715,157 @@ TEST_P(D3D12DescriptorHeapTests, EncodeManyUBOAndSamplers) {
}
}
// Verify a single allocate/deallocate.
// One non-shader visible heap will be created.
TEST_P(D3D12DescriptorHeapTests, Single) {
constexpr uint32_t kDescriptorCount = 4;
constexpr uint32_t kAllocationsPerHeap = 3;
DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount,
kAllocationsPerHeap);
CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
EXPECT_EQ(allocation.GetHeapIndex(), 0u);
EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
allocator.Deallocate(allocation);
EXPECT_FALSE(allocation.IsValid());
}
// Verify allocating many times causes the pool to increase in size.
// Creates |kNumOfHeaps| non-shader visible heaps.
TEST_P(D3D12DescriptorHeapTests, Sequential) {
constexpr uint32_t kDescriptorCount = 4;
constexpr uint32_t kAllocationsPerHeap = 3;
DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount,
kAllocationsPerHeap);
// Allocate |kNumOfHeaps| worth.
constexpr uint32_t kNumOfHeaps = 2;
std::set<uint32_t> allocatedHeaps;
std::vector<CPUDescriptorHeapAllocation> allocations;
for (uint32_t i = 0; i < kAllocationsPerHeap * kNumOfHeaps; i++) {
CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
EXPECT_EQ(allocation.GetHeapIndex(), i / kAllocationsPerHeap);
EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
allocations.push_back(allocation);
allocatedHeaps.insert(allocation.GetHeapIndex());
}
EXPECT_EQ(allocatedHeaps.size(), kNumOfHeaps);
// Deallocate all.
for (CPUDescriptorHeapAllocation& allocation : allocations) {
allocator.Deallocate(allocation);
EXPECT_FALSE(allocation.IsValid());
}
}
// Verify that re-allocating a number of allocations < pool size, all heaps are reused.
// Creates and reuses |kNumofHeaps| non-shader visible heaps.
TEST_P(D3D12DescriptorHeapTests, ReuseFreedHeaps) {
constexpr uint32_t kDescriptorCount = 4;
constexpr uint32_t kAllocationsPerHeap = 25;
DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount,
kAllocationsPerHeap);
constexpr uint32_t kNumofHeaps = 10;
std::list<CPUDescriptorHeapAllocation> allocations;
std::set<size_t> allocationPtrs;
// Allocate |kNumofHeaps| heaps worth.
for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
allocations.push_back(allocation);
EXPECT_TRUE(allocationPtrs.insert(allocation.OffsetFrom(0, 0).ptr).second);
}
// Deallocate all.
for (CPUDescriptorHeapAllocation& allocation : allocations) {
allocator.Deallocate(allocation);
EXPECT_FALSE(allocation.IsValid());
}
allocations.clear();
// Re-allocate all again.
std::set<size_t> reallocatedPtrs;
for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
allocations.push_back(allocation);
EXPECT_TRUE(reallocatedPtrs.insert(allocation.OffsetFrom(0, 0).ptr).second);
EXPECT_TRUE(std::find(allocationPtrs.begin(), allocationPtrs.end(),
allocation.OffsetFrom(0, 0).ptr) != allocationPtrs.end());
}
// Deallocate all again.
for (CPUDescriptorHeapAllocation& allocation : allocations) {
allocator.Deallocate(allocation);
EXPECT_FALSE(allocation.IsValid());
}
}
// Verify allocating then deallocating many times.
TEST_P(D3D12DescriptorHeapTests, AllocateDeallocateMany) {
constexpr uint32_t kDescriptorCount = 4;
constexpr uint32_t kAllocationsPerHeap = 25;
DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount,
kAllocationsPerHeap);
std::list<CPUDescriptorHeapAllocation> list3;
std::list<CPUDescriptorHeapAllocation> list5;
std::list<CPUDescriptorHeapAllocation> allocations;
constexpr uint32_t kNumofHeaps = 2;
// Allocate |kNumofHeaps| heaps worth.
for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
if (i % 3 == 0) {
list3.push_back(allocation);
} else {
allocations.push_back(allocation);
}
}
// Deallocate every 3rd allocation.
for (auto it = list3.begin(); it != list3.end(); it = list3.erase(it)) {
allocator.Deallocate(*it);
}
// Allocate again.
for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
if (i % 5 == 0) {
list5.push_back(allocation);
} else {
allocations.push_back(allocation);
}
}
// Deallocate every 5th allocation.
for (auto it = list5.begin(); it != list5.end(); it = list5.erase(it)) {
allocator.Deallocate(*it);
}
// Allocate again.
for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
allocations.push_back(allocation);
}
// Deallocate remaining.
for (CPUDescriptorHeapAllocation& allocation : allocations) {
allocator.Deallocate(allocation);
EXPECT_FALSE(allocation.IsValid());
}
}
DAWN_INSTANTIATE_TEST(D3D12DescriptorHeapTests,
D3D12Backend(),
D3D12Backend({"use_d3d12_small_shader_visible_heap"}));