From cb2938a1afd7ce82505e61e52df62a6e94832f84 Mon Sep 17 00:00:00 2001 From: Austin Eng Date: Fri, 17 Jul 2020 01:11:16 +0000 Subject: [PATCH] D3D12: Bucket descriptor allocation by powers of two WebGPU currently allows as many as 108 view descriptors per bind group. This is too many to have one descriptor allocator per size, so we need to bucket them by size. Bug: dawn:443, dawn:488 Change-Id: I4fc8cf7cd0dc8292bb6a8488fd2ceb7575e1e5f7 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24787 Reviewed-by: Austin Eng Reviewed-by: Bryan Bernhart Commit-Queue: Austin Eng --- src/common/Math.h | 16 +++++++++ src/dawn_native/d3d12/DeviceD3D12.cpp | 21 +++++++----- src/dawn_native/d3d12/DeviceD3D12.h | 19 ++++++----- src/tests/unittests/MathTests.cpp | 47 +++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 17 deletions(-) diff --git a/src/common/Math.h b/src/common/Math.h index db941f279a..c673785e26 100644 --- a/src/common/Math.h +++ b/src/common/Math.h @@ -31,6 +31,22 @@ uint32_t Log2(uint64_t value); bool IsPowerOfTwo(uint64_t n); uint64_t RoundUp(uint64_t n, uint64_t m); +constexpr uint32_t ConstexprLog2(uint64_t v) { + return v <= 1 ? 0 : 1 + ConstexprLog2(v / 2); +} + +constexpr uint32_t ConstexprLog2Ceil(uint64_t v) { + return v <= 1 ? 0 : ConstexprLog2(v - 1) + 1; +} + +inline uint32_t Log2Ceil(uint32_t v) { + return v <= 1 ? 0 : Log2(v - 1) + 1; +} + +inline uint32_t Log2Ceil(uint64_t v) { + return v <= 1 ? 0 : Log2(v - 1) + 1; +} + uint64_t NextPowerOfTwo(uint64_t n); bool IsPtrAligned(const void* ptr, size_t alignment); void* AlignVoidPtr(void* ptr, size_t alignment); diff --git a/src/dawn_native/d3d12/DeviceD3D12.cpp b/src/dawn_native/d3d12/DeviceD3D12.cpp index e3a4c3cf4f..bbca615dc4 100644 --- a/src/dawn_native/d3d12/DeviceD3D12.cpp +++ b/src/dawn_native/d3d12/DeviceD3D12.cpp @@ -90,16 +90,15 @@ namespace dawn_native { namespace d3d12 { mCommandAllocatorManager = std::make_unique(this); // Zero sized allocator is never requested and does not need to exist. - for (uint32_t countIndex = 1; countIndex <= kMaxViewDescriptorsPerBindGroup; countIndex++) { - mViewAllocators[countIndex] = std::make_unique( - this, countIndex, kShaderVisibleDescriptorHeapSize, + for (uint32_t countIndex = 0; countIndex < kNumViewDescriptorAllocators; countIndex++) { + mViewAllocators[countIndex + 1] = std::make_unique( + this, 1u << countIndex, kShaderVisibleDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - for (uint32_t countIndex = 1; countIndex <= kMaxSamplerDescriptorsPerBindGroup; - countIndex++) { - mSamplerAllocators[countIndex] = std::make_unique( - this, countIndex, kShaderVisibleDescriptorHeapSize, + for (uint32_t countIndex = 0; countIndex < kNumSamplerDescriptorAllocators; countIndex++) { + mSamplerAllocators[countIndex + 1] = std::make_unique( + this, 1u << countIndex, kShaderVisibleDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); } @@ -559,13 +558,17 @@ namespace dawn_native { namespace d3d12 { StagingDescriptorAllocator* Device::GetViewStagingDescriptorAllocator( uint32_t descriptorCount) const { ASSERT(descriptorCount <= kMaxViewDescriptorsPerBindGroup); - return mViewAllocators[descriptorCount].get(); + // This is Log2 of the next power of two, plus 1. + uint32_t allocatorIndex = descriptorCount == 0 ? 0 : Log2Ceil(descriptorCount) + 1; + return mViewAllocators[allocatorIndex].get(); } StagingDescriptorAllocator* Device::GetSamplerStagingDescriptorAllocator( uint32_t descriptorCount) const { ASSERT(descriptorCount <= kMaxSamplerDescriptorsPerBindGroup); - return mSamplerAllocators[descriptorCount].get(); + // This is Log2 of the next power of two, plus 1. + uint32_t allocatorIndex = descriptorCount == 0 ? 0 : Log2Ceil(descriptorCount) + 1; + return mSamplerAllocators[allocatorIndex].get(); } StagingDescriptorAllocator* Device::GetRenderTargetViewAllocator() const { diff --git a/src/dawn_native/d3d12/DeviceD3D12.h b/src/dawn_native/d3d12/DeviceD3D12.h index efb9fd25d9..d4cb0819be 100644 --- a/src/dawn_native/d3d12/DeviceD3D12.h +++ b/src/dawn_native/d3d12/DeviceD3D12.h @@ -193,21 +193,24 @@ namespace dawn_native { namespace d3d12 { std::unique_ptr mResourceAllocatorManager; std::unique_ptr mResidencyManager; - // TODO(enga): Consider bucketing these if the count is too many. static constexpr uint32_t kMaxSamplerDescriptorsPerBindGroup = 3 * kMaxSamplersPerShaderStage; static constexpr uint32_t kMaxViewDescriptorsPerBindGroup = kMaxBindingsPerPipelineLayout - kMaxSamplerDescriptorsPerBindGroup; - // Index corresponds to the descriptor count in the range [0, - // kMaxSamplerDescriptorsPerBindGroup]. - std::array, - kMaxSamplerDescriptorsPerBindGroup + 1> + static constexpr uint32_t kNumSamplerDescriptorAllocators = + ConstexprLog2Ceil(kMaxSamplerDescriptorsPerBindGroup) + 1; + static constexpr uint32_t kNumViewDescriptorAllocators = + ConstexprLog2Ceil(kMaxViewDescriptorsPerBindGroup) + 1; + + // Index corresponds to Log2Ceil(descriptorCount) where descriptorCount is in + // the range [0, kMaxSamplerDescriptorsPerBindGroup]. + std::array, kNumViewDescriptorAllocators + 1> mViewAllocators; - // Index corresponds to the descriptor count in the range [0, - // kMaxViewDescriptorsPerBindGroup]. - std::array, kMaxViewDescriptorsPerBindGroup + 1> + // Index corresponds to Log2Ceil(descriptorCount) where descriptorCount is in + // the range [0, kMaxViewDescriptorsPerBindGroup]. + std::array, kNumSamplerDescriptorAllocators + 1> mSamplerAllocators; std::unique_ptr mRenderTargetViewAllocator; diff --git a/src/tests/unittests/MathTests.cpp b/src/tests/unittests/MathTests.cpp index a553f7b417..2294fe1aa0 100644 --- a/src/tests/unittests/MathTests.cpp +++ b/src/tests/unittests/MathTests.cpp @@ -37,14 +37,61 @@ TEST(Math, Log2) { ASSERT_EQ(Log2(0xFFFFFFFFu), 31u); ASSERT_EQ(Log2(static_cast(0xFFFFFFFFFFFFFFFF)), 63u); + static_assert(ConstexprLog2(1u) == 0u, ""); + static_assert(ConstexprLog2(0xFFFFFFFFu) == 31u, ""); + static_assert(ConstexprLog2(static_cast(0xFFFFFFFFFFFFFFFF)) == 63u, ""); + // Test boundary between two logs ASSERT_EQ(Log2(0x80000000u), 31u); ASSERT_EQ(Log2(0x7FFFFFFFu), 30u); ASSERT_EQ(Log2(static_cast(0x8000000000000000)), 63u); ASSERT_EQ(Log2(static_cast(0x7FFFFFFFFFFFFFFF)), 62u); + static_assert(ConstexprLog2(0x80000000u) == 31u, ""); + static_assert(ConstexprLog2(0x7FFFFFFFu) == 30u, ""); + static_assert(ConstexprLog2(static_cast(0x8000000000000000)) == 63u, ""); + static_assert(ConstexprLog2(static_cast(0x7FFFFFFFFFFFFFFF)) == 62u, ""); + ASSERT_EQ(Log2(16u), 4u); ASSERT_EQ(Log2(15u), 3u); + + static_assert(ConstexprLog2(16u) == 4u, ""); + static_assert(ConstexprLog2(15u) == 3u, ""); +} + +// Tests for Log2Ceil +TEST(Math, Log2Ceil) { + // Test extrema + ASSERT_EQ(Log2Ceil(1u), 0u); + ASSERT_EQ(Log2Ceil(0xFFFFFFFFu), 32u); + ASSERT_EQ(Log2Ceil(static_cast(0xFFFFFFFFFFFFFFFF)), 64u); + + static_assert(ConstexprLog2Ceil(1u) == 0u, ""); + static_assert(ConstexprLog2Ceil(0xFFFFFFFFu) == 32u, ""); + static_assert(ConstexprLog2Ceil(static_cast(0xFFFFFFFFFFFFFFFF)) == 64u, ""); + + // Test boundary between two logs + ASSERT_EQ(Log2Ceil(0x80000001u), 32u); + ASSERT_EQ(Log2Ceil(0x80000000u), 31u); + ASSERT_EQ(Log2Ceil(0x7FFFFFFFu), 31u); + ASSERT_EQ(Log2Ceil(static_cast(0x8000000000000001)), 64u); + ASSERT_EQ(Log2Ceil(static_cast(0x8000000000000000)), 63u); + ASSERT_EQ(Log2Ceil(static_cast(0x7FFFFFFFFFFFFFFF)), 63u); + + static_assert(ConstexprLog2Ceil(0x80000001u) == 32u, ""); + static_assert(ConstexprLog2Ceil(0x80000000u) == 31u, ""); + static_assert(ConstexprLog2Ceil(0x7FFFFFFFu) == 31u, ""); + static_assert(ConstexprLog2Ceil(static_cast(0x8000000000000001)) == 64u, ""); + static_assert(ConstexprLog2Ceil(static_cast(0x8000000000000000)) == 63u, ""); + static_assert(ConstexprLog2Ceil(static_cast(0x7FFFFFFFFFFFFFFF)) == 63u, ""); + + ASSERT_EQ(Log2Ceil(17u), 5u); + ASSERT_EQ(Log2Ceil(16u), 4u); + ASSERT_EQ(Log2Ceil(15u), 4u); + + static_assert(ConstexprLog2Ceil(17u) == 5u, ""); + static_assert(ConstexprLog2Ceil(16u) == 4u, ""); + static_assert(ConstexprLog2Ceil(15u) == 4u, ""); } // Tests for IsPowerOfTwo