D3D12: Bucket descriptor allocation by powers of two

WebGPU currently allows as many as 108 view descriptors per bind
group. This is too many to have one descriptor allocator per size,
so we need to bucket them by size.

Bug: dawn:443, dawn:488
Change-Id: I4fc8cf7cd0dc8292bb6a8488fd2ceb7575e1e5f7
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24787
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Bryan Bernhart <bryan.bernhart@intel.com>
Commit-Queue: Austin Eng <enga@chromium.org>
This commit is contained in:
Austin Eng 2020-07-17 01:11:16 +00:00 committed by Commit Bot service account
parent e8e089ad28
commit cb2938a1af
4 changed files with 86 additions and 17 deletions

View File

@ -31,6 +31,22 @@ uint32_t Log2(uint64_t value);
bool IsPowerOfTwo(uint64_t n); bool IsPowerOfTwo(uint64_t n);
uint64_t RoundUp(uint64_t n, uint64_t m); uint64_t RoundUp(uint64_t n, uint64_t m);
constexpr uint32_t ConstexprLog2(uint64_t v) {
return v <= 1 ? 0 : 1 + ConstexprLog2(v / 2);
}
constexpr uint32_t ConstexprLog2Ceil(uint64_t v) {
return v <= 1 ? 0 : ConstexprLog2(v - 1) + 1;
}
inline uint32_t Log2Ceil(uint32_t v) {
return v <= 1 ? 0 : Log2(v - 1) + 1;
}
inline uint32_t Log2Ceil(uint64_t v) {
return v <= 1 ? 0 : Log2(v - 1) + 1;
}
uint64_t NextPowerOfTwo(uint64_t n); uint64_t NextPowerOfTwo(uint64_t n);
bool IsPtrAligned(const void* ptr, size_t alignment); bool IsPtrAligned(const void* ptr, size_t alignment);
void* AlignVoidPtr(void* ptr, size_t alignment); void* AlignVoidPtr(void* ptr, size_t alignment);

View File

@ -90,16 +90,15 @@ namespace dawn_native { namespace d3d12 {
mCommandAllocatorManager = std::make_unique<CommandAllocatorManager>(this); mCommandAllocatorManager = std::make_unique<CommandAllocatorManager>(this);
// Zero sized allocator is never requested and does not need to exist. // Zero sized allocator is never requested and does not need to exist.
for (uint32_t countIndex = 1; countIndex <= kMaxViewDescriptorsPerBindGroup; countIndex++) { for (uint32_t countIndex = 0; countIndex < kNumViewDescriptorAllocators; countIndex++) {
mViewAllocators[countIndex] = std::make_unique<StagingDescriptorAllocator>( mViewAllocators[countIndex + 1] = std::make_unique<StagingDescriptorAllocator>(
this, countIndex, kShaderVisibleDescriptorHeapSize, this, 1u << countIndex, kShaderVisibleDescriptorHeapSize,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
} }
for (uint32_t countIndex = 1; countIndex <= kMaxSamplerDescriptorsPerBindGroup; for (uint32_t countIndex = 0; countIndex < kNumSamplerDescriptorAllocators; countIndex++) {
countIndex++) { mSamplerAllocators[countIndex + 1] = std::make_unique<StagingDescriptorAllocator>(
mSamplerAllocators[countIndex] = std::make_unique<StagingDescriptorAllocator>( this, 1u << countIndex, kShaderVisibleDescriptorHeapSize,
this, countIndex, kShaderVisibleDescriptorHeapSize,
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
} }
@ -559,13 +558,17 @@ namespace dawn_native { namespace d3d12 {
StagingDescriptorAllocator* Device::GetViewStagingDescriptorAllocator( StagingDescriptorAllocator* Device::GetViewStagingDescriptorAllocator(
uint32_t descriptorCount) const { uint32_t descriptorCount) const {
ASSERT(descriptorCount <= kMaxViewDescriptorsPerBindGroup); ASSERT(descriptorCount <= kMaxViewDescriptorsPerBindGroup);
return mViewAllocators[descriptorCount].get(); // This is Log2 of the next power of two, plus 1.
uint32_t allocatorIndex = descriptorCount == 0 ? 0 : Log2Ceil(descriptorCount) + 1;
return mViewAllocators[allocatorIndex].get();
} }
StagingDescriptorAllocator* Device::GetSamplerStagingDescriptorAllocator( StagingDescriptorAllocator* Device::GetSamplerStagingDescriptorAllocator(
uint32_t descriptorCount) const { uint32_t descriptorCount) const {
ASSERT(descriptorCount <= kMaxSamplerDescriptorsPerBindGroup); ASSERT(descriptorCount <= kMaxSamplerDescriptorsPerBindGroup);
return mSamplerAllocators[descriptorCount].get(); // This is Log2 of the next power of two, plus 1.
uint32_t allocatorIndex = descriptorCount == 0 ? 0 : Log2Ceil(descriptorCount) + 1;
return mSamplerAllocators[allocatorIndex].get();
} }
StagingDescriptorAllocator* Device::GetRenderTargetViewAllocator() const { StagingDescriptorAllocator* Device::GetRenderTargetViewAllocator() const {

View File

@ -193,21 +193,24 @@ namespace dawn_native { namespace d3d12 {
std::unique_ptr<ResourceAllocatorManager> mResourceAllocatorManager; std::unique_ptr<ResourceAllocatorManager> mResourceAllocatorManager;
std::unique_ptr<ResidencyManager> mResidencyManager; std::unique_ptr<ResidencyManager> mResidencyManager;
// TODO(enga): Consider bucketing these if the count is too many.
static constexpr uint32_t kMaxSamplerDescriptorsPerBindGroup = static constexpr uint32_t kMaxSamplerDescriptorsPerBindGroup =
3 * kMaxSamplersPerShaderStage; 3 * kMaxSamplersPerShaderStage;
static constexpr uint32_t kMaxViewDescriptorsPerBindGroup = static constexpr uint32_t kMaxViewDescriptorsPerBindGroup =
kMaxBindingsPerPipelineLayout - kMaxSamplerDescriptorsPerBindGroup; kMaxBindingsPerPipelineLayout - kMaxSamplerDescriptorsPerBindGroup;
// Index corresponds to the descriptor count in the range [0, static constexpr uint32_t kNumSamplerDescriptorAllocators =
// kMaxSamplerDescriptorsPerBindGroup]. ConstexprLog2Ceil(kMaxSamplerDescriptorsPerBindGroup) + 1;
std::array<std::unique_ptr<StagingDescriptorAllocator>, static constexpr uint32_t kNumViewDescriptorAllocators =
kMaxSamplerDescriptorsPerBindGroup + 1> ConstexprLog2Ceil(kMaxViewDescriptorsPerBindGroup) + 1;
// Index corresponds to Log2Ceil(descriptorCount) where descriptorCount is in
// the range [0, kMaxSamplerDescriptorsPerBindGroup].
std::array<std::unique_ptr<StagingDescriptorAllocator>, kNumViewDescriptorAllocators + 1>
mViewAllocators; mViewAllocators;
// Index corresponds to the descriptor count in the range [0, // Index corresponds to Log2Ceil(descriptorCount) where descriptorCount is in
// kMaxViewDescriptorsPerBindGroup]. // the range [0, kMaxViewDescriptorsPerBindGroup].
std::array<std::unique_ptr<StagingDescriptorAllocator>, kMaxViewDescriptorsPerBindGroup + 1> std::array<std::unique_ptr<StagingDescriptorAllocator>, kNumSamplerDescriptorAllocators + 1>
mSamplerAllocators; mSamplerAllocators;
std::unique_ptr<StagingDescriptorAllocator> mRenderTargetViewAllocator; std::unique_ptr<StagingDescriptorAllocator> mRenderTargetViewAllocator;

View File

@ -37,14 +37,61 @@ TEST(Math, Log2) {
ASSERT_EQ(Log2(0xFFFFFFFFu), 31u); ASSERT_EQ(Log2(0xFFFFFFFFu), 31u);
ASSERT_EQ(Log2(static_cast<uint64_t>(0xFFFFFFFFFFFFFFFF)), 63u); ASSERT_EQ(Log2(static_cast<uint64_t>(0xFFFFFFFFFFFFFFFF)), 63u);
static_assert(ConstexprLog2(1u) == 0u, "");
static_assert(ConstexprLog2(0xFFFFFFFFu) == 31u, "");
static_assert(ConstexprLog2(static_cast<uint64_t>(0xFFFFFFFFFFFFFFFF)) == 63u, "");
// Test boundary between two logs // Test boundary between two logs
ASSERT_EQ(Log2(0x80000000u), 31u); ASSERT_EQ(Log2(0x80000000u), 31u);
ASSERT_EQ(Log2(0x7FFFFFFFu), 30u); ASSERT_EQ(Log2(0x7FFFFFFFu), 30u);
ASSERT_EQ(Log2(static_cast<uint64_t>(0x8000000000000000)), 63u); ASSERT_EQ(Log2(static_cast<uint64_t>(0x8000000000000000)), 63u);
ASSERT_EQ(Log2(static_cast<uint64_t>(0x7FFFFFFFFFFFFFFF)), 62u); ASSERT_EQ(Log2(static_cast<uint64_t>(0x7FFFFFFFFFFFFFFF)), 62u);
static_assert(ConstexprLog2(0x80000000u) == 31u, "");
static_assert(ConstexprLog2(0x7FFFFFFFu) == 30u, "");
static_assert(ConstexprLog2(static_cast<uint64_t>(0x8000000000000000)) == 63u, "");
static_assert(ConstexprLog2(static_cast<uint64_t>(0x7FFFFFFFFFFFFFFF)) == 62u, "");
ASSERT_EQ(Log2(16u), 4u); ASSERT_EQ(Log2(16u), 4u);
ASSERT_EQ(Log2(15u), 3u); ASSERT_EQ(Log2(15u), 3u);
static_assert(ConstexprLog2(16u) == 4u, "");
static_assert(ConstexprLog2(15u) == 3u, "");
}
// Tests for Log2Ceil
TEST(Math, Log2Ceil) {
// Test extrema
ASSERT_EQ(Log2Ceil(1u), 0u);
ASSERT_EQ(Log2Ceil(0xFFFFFFFFu), 32u);
ASSERT_EQ(Log2Ceil(static_cast<uint64_t>(0xFFFFFFFFFFFFFFFF)), 64u);
static_assert(ConstexprLog2Ceil(1u) == 0u, "");
static_assert(ConstexprLog2Ceil(0xFFFFFFFFu) == 32u, "");
static_assert(ConstexprLog2Ceil(static_cast<uint64_t>(0xFFFFFFFFFFFFFFFF)) == 64u, "");
// Test boundary between two logs
ASSERT_EQ(Log2Ceil(0x80000001u), 32u);
ASSERT_EQ(Log2Ceil(0x80000000u), 31u);
ASSERT_EQ(Log2Ceil(0x7FFFFFFFu), 31u);
ASSERT_EQ(Log2Ceil(static_cast<uint64_t>(0x8000000000000001)), 64u);
ASSERT_EQ(Log2Ceil(static_cast<uint64_t>(0x8000000000000000)), 63u);
ASSERT_EQ(Log2Ceil(static_cast<uint64_t>(0x7FFFFFFFFFFFFFFF)), 63u);
static_assert(ConstexprLog2Ceil(0x80000001u) == 32u, "");
static_assert(ConstexprLog2Ceil(0x80000000u) == 31u, "");
static_assert(ConstexprLog2Ceil(0x7FFFFFFFu) == 31u, "");
static_assert(ConstexprLog2Ceil(static_cast<uint64_t>(0x8000000000000001)) == 64u, "");
static_assert(ConstexprLog2Ceil(static_cast<uint64_t>(0x8000000000000000)) == 63u, "");
static_assert(ConstexprLog2Ceil(static_cast<uint64_t>(0x7FFFFFFFFFFFFFFF)) == 63u, "");
ASSERT_EQ(Log2Ceil(17u), 5u);
ASSERT_EQ(Log2Ceil(16u), 4u);
ASSERT_EQ(Log2Ceil(15u), 4u);
static_assert(ConstexprLog2Ceil(17u) == 5u, "");
static_assert(ConstexprLog2Ceil(16u) == 4u, "");
static_assert(ConstexprLog2Ceil(15u) == 4u, "");
} }
// Tests for IsPowerOfTwo // Tests for IsPowerOfTwo