D3D12: work around buffer size insufficient issue for copy

D3D12 requires more buffer storage than it should. If the buffer used for B2T/T2B copy is big enough according to WebGPU spec but it doesn't meet D3D12's requirement, we need to workaround it via split the copy into two copies, in order to make B2T/T2B copy being done correctly on D3D12. Bug: dawn:1289, dawn:1278 Change-Id: Id88c580dce00553845817677a2d356690ef4acfe Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/86860 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Yunchao He <yunchao.he@intel.com>
2025-07-15 01:26:06 +00:00 · 2022-04-20 16:18:05 +00:00 · 2022-04-20 16:18:05 +00:00 · 803f033392
commit 803f033392
parent b3483b68f7
7 changed files with 119 additions and 25 deletions
--- a/src/dawn/native/CommandValidation.cpp
+++ b/src/dawn/native/CommandValidation.cpp
@ -111,13 +111,6 @@ namespace dawn::native {
               static_cast<uint64_t>(maxStart);
    }

-    template <typename A, typename B>
-    DAWN_FORCE_INLINE uint64_t Safe32x32(A a, B b) {
-        static_assert(std::is_same<A, uint32_t>::value, "'a' must be uint32_t");
-        static_assert(std::is_same<B, uint32_t>::value, "'b' must be uint32_t");
-        return uint64_t(a) * uint64_t(b);
-    }
-
    ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo,
                                                       const Extent3D& copySize,
                                                       uint32_t bytesPerRow,
--- a/src/dawn/native/CommandValidation.h
+++ b/src/dawn/native/CommandValidation.h
@ -38,6 +38,13 @@ namespace dawn::native {
                                   uint64_t bufferOffset,
                                   uint64_t size);

+    template <typename A, typename B>
+    DAWN_FORCE_INLINE uint64_t Safe32x32(A a, B b) {
+        static_assert(std::is_same<A, uint32_t>::value, "'a' must be uint32_t");
+        static_assert(std::is_same<B, uint32_t>::value, "'b' must be uint32_t");
+        return uint64_t(a) * uint64_t(b);
+    }
+
    ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo,
                                                       const Extent3D& copySize,
                                                       uint32_t bytesPerRow,
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@ -259,6 +259,15 @@ namespace dawn::native {
              "Initialize workgroup memory with OpConstantNull on Vulkan when the Vulkan extension "
              "VK_KHR_zero_initialize_workgroup_memory is supported.",
              "https://crbug.com/dawn/1302"}},
+            {Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings,
+             {"d3d12_split_buffer_texture_copy_for_rows_per_image_paddings",
+              "D3D12 requires more buffer storage than it should when rowsPerImage is greater than "
+              "copyHeight, which means there are pure padding row(s) on each image. In this "
+              "situation, the buffer used for B2T/T2B copy might be big enough according to "
+              "WebGPU's spec but it doesn't meet D3D12's requirement, then we need to workaround "
+              "it via split the copy operation into two copies, in order to make B2T/T2B copy "
+              "being done correctly on D3D12.",
+              "https://crbug.com/dawn/1289"}},

            // Comment to separate the }} so it is clearer what to copy-paste to add a toggle.
        }};
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h
@ -67,6 +67,7 @@ namespace dawn::native {
        RecordDetailedTimingInTraceEvents,
        DisableTimestampQueryConversion,
        VulkanUseZeroInitializeWorkgroupMemoryExtension,
+        D3D12SplitBufferTextureCopyForRowsPerImagePaddings,

        EnumCount,
        InvalidEnum = EnumCount,
--- a/src/dawn/native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/DeviceD3D12.cpp
@ -612,6 +612,11 @@ namespace dawn::native::d3d12 {
                    true);
            }
        }
+
+        // Currently this workaround is needed on any D3D12 backend for some particular situations.
+        // But we may need to limit it if D3D12 runtime fixes the bug on its new release. See
+        // https://crbug.com/dawn/1289 for more information.
+        SetToggle(Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings, true);
    }

    MaybeError Device::WaitForIdleForDestruction() {
--- a/src/dawn/native/d3d12/UtilsD3D12.cpp
+++ b/src/dawn/native/d3d12/UtilsD3D12.cpp
@ -19,6 +19,7 @@
 #include <utility>

 #include "dawn/common/Assert.h"
+#include "dawn/native/CommandValidation.h"
 #include "dawn/native/Format.h"
 #include "dawn/native/d3d12/BufferD3D12.h"
 #include "dawn/native/d3d12/CommandRecordingContext.h"
@ -27,6 +28,63 @@

 namespace dawn::native::d3d12 {

+    namespace {
+
+        uint64_t RequiredCopySizeByD3D12(const uint32_t bytesPerRow,
+                                         const uint32_t rowsPerImage,
+                                         const Extent3D& copySize,
+                                         const TexelBlockInfo& blockInfo) {
+            uint64_t bytesPerImage = Safe32x32(bytesPerRow, rowsPerImage);
+
+            // Required copy size for B2T/T2B copy on D3D12 is smaller than (but very close to)
+            // depth * bytesPerImage. The latter is already checked at ComputeRequiredBytesInCopy()
+            // in CommandValidation.cpp.
+            uint64_t requiredCopySizeByD3D12 = bytesPerImage * (copySize.depthOrArrayLayers - 1);
+
+            // When calculating the required copy size for B2T/T2B copy, D3D12 doesn't respect
+            // rowsPerImage paddings on the last image for 3D texture, but it does respect
+            // bytesPerRow paddings on the last row.
+            ASSERT(blockInfo.width == 1);
+            ASSERT(blockInfo.height == 1);
+            uint64_t lastRowBytes = Safe32x32(blockInfo.byteSize, copySize.width);
+            ASSERT(rowsPerImage > copySize.height);
+            uint64_t lastImageBytesByD3D12 =
+                Safe32x32(bytesPerRow, rowsPerImage - 1) + lastRowBytes;
+
+            requiredCopySizeByD3D12 += lastImageBytesByD3D12;
+            return requiredCopySizeByD3D12;
+        }
+
+        // This function is used to access whether we need a workaround for D3D12's wrong algorithm
+        // of calculating required buffer size for B2T/T2B copy. The workaround is needed only when
+        //   - The corresponding toggle is enabled.
+        //   - It is a 3D texture (so the format is uncompressed).
+        //   - There are multiple depth images to be copied (copySize.depthOrArrayLayers > 1).
+        //   - It has rowsPerImage paddings (rowsPerImage > copySize.height).
+        //   - The buffer size doesn't meet D3D12's requirement.
+        bool NeedBufferSizeWorkaroundForBufferTextureCopyOnD3D12(const BufferCopy& bufferCopy,
+                                                                 const TextureCopy& textureCopy,
+                                                                 const Extent3D& copySize) {
+            TextureBase* texture = textureCopy.texture.Get();
+            Device* device = ToBackend(texture->GetDevice());
+
+            if (!device->IsToggleEnabled(
+                    Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings) ||
+                texture->GetDimension() != wgpu::TextureDimension::e3D ||
+                copySize.depthOrArrayLayers <= 1 || bufferCopy.rowsPerImage <= copySize.height) {
+                return false;
+            }
+
+            const TexelBlockInfo& blockInfo =
+                texture->GetFormat().GetAspectInfo(textureCopy.aspect).block;
+            uint64_t requiredCopySizeByD3D12 = RequiredCopySizeByD3D12(
+                bufferCopy.bytesPerRow, bufferCopy.rowsPerImage, copySize, blockInfo);
+            return bufferCopy.buffer->GetAllocatedSize() - bufferCopy.offset <
+                   requiredCopySizeByD3D12;
+        }
+
+    }  // anonymous namespace
+
    ResultOrError<std::wstring> ConvertStringToWstring(const char* str) {
        size_t len = strlen(str);
        if (len == 0) {
@ -285,8 +343,36 @@ namespace dawn::native::d3d12 {
                                 const BufferCopy& bufferCopy,
                                 const TextureCopy& textureCopy,
                                 const Extent3D& copySize) {
-        RecordBufferTextureCopyWithBufferHandle(direction, commandList,
-                                                ToBackend(bufferCopy.buffer)->GetD3D12Resource(),
+        ID3D12Resource* bufferResource = ToBackend(bufferCopy.buffer)->GetD3D12Resource();
+
+        if (NeedBufferSizeWorkaroundForBufferTextureCopyOnD3D12(bufferCopy, textureCopy,
+                                                                copySize)) {
+            // Split the copy into two copies if the size of bufferCopy.buffer doesn't meet D3D12's
+            // requirement and a workaround is needed:
+            //   - The first copy will copy all depth images but the last depth image,
+            //   - The second copy will copy the last depth image.
+            Extent3D extentForAllButTheLastImage = copySize;
+            extentForAllButTheLastImage.depthOrArrayLayers -= 1;
+            RecordBufferTextureCopyWithBufferHandle(
+                direction, commandList, bufferResource, bufferCopy.offset, bufferCopy.bytesPerRow,
+                bufferCopy.rowsPerImage, textureCopy, extentForAllButTheLastImage);
+
+            Extent3D extentForTheLastImage = copySize;
+            extentForTheLastImage.depthOrArrayLayers = 1;
+
+            TextureCopy textureCopyForTheLastImage = textureCopy;
+            textureCopyForTheLastImage.origin.z += copySize.depthOrArrayLayers - 1;
+
+            uint64_t copiedBytes = bufferCopy.bytesPerRow * bufferCopy.rowsPerImage *
+                                   (copySize.depthOrArrayLayers - 1);
+            RecordBufferTextureCopyWithBufferHandle(
+                direction, commandList, bufferResource, bufferCopy.offset + copiedBytes,
+                bufferCopy.bytesPerRow, bufferCopy.rowsPerImage, textureCopyForTheLastImage,
+                extentForTheLastImage);
+            return;
+        }
+
+        RecordBufferTextureCopyWithBufferHandle(direction, commandList, bufferResource,
                                                bufferCopy.offset, bufferCopy.bytesPerRow,
                                                bufferCopy.rowsPerImage, textureCopy, copySize);
    }
--- a/src/dawn/tests/end2end/RequiredBufferSizeInCopyTests.cpp
+++ b/src/dawn/tests/end2end/RequiredBufferSizeInCopyTests.cpp
@ -178,10 +178,6 @@ TEST_P(RequiredBufferSizeInCopyTests, BufferSizeOnBoundary) {
                        kBytesPerRow * (rowsPerImage - 1) + kBytesPerBlock * copySize.width;
        DoTest(size, copySize, rowsPerImage);

-        // TODO(crbug.com/dawn/1278, 1288, 1289): Required buffer size for copy is wrong on D3D12.
-        bool mayNeedWorkaround = GetParam().mTextureDimension == wgpu::TextureDimension::e3D &&
-                                 extraRowsPerImage > 0 && copySize.depthOrArrayLayers > 1;
-        DAWN_SUPPRESS_TEST_IF(IsD3D12() && mayNeedWorkaround);
        size -= kBytesPerBlock;
        DoTest(size, copySize, rowsPerImage);
    }
@ -190,25 +186,22 @@ TEST_P(RequiredBufferSizeInCopyTests, BufferSizeOnBoundary) {
 // The buffer doesn't have storage for any paddings on the last image. WebGPU spec doesn't require
 // storage for these paddings, and the copy operation will never access to these paddings. So it
 // should work.
-TEST_P(RequiredBufferSizeInCopyTests, MininumBufferSize) {
+TEST_P(RequiredBufferSizeInCopyTests, MinimumBufferSize) {
    wgpu::Extent3D copySize = kCopySize;
    copySize.depthOrArrayLayers = GetParam().mCopyDepth;
    const uint64_t extraRowsPerImage = GetParam().mExtraRowsPerImage;
    const uint64_t rowsPerImage = extraRowsPerImage + copySize.height;

-    // TODO(crbug.com/dawn/1278, 1288, 1289): Required buffer size for copy is wrong on D3D12.
-    bool mayNeedWorkaround = GetParam().mTextureDimension == wgpu::TextureDimension::e3D &&
-                             extraRowsPerImage > 0 && copySize.depthOrArrayLayers > 1;
-    DAWN_SUPPRESS_TEST_IF(IsD3D12() && mayNeedWorkaround);
    uint64_t size =
        kOffset + utils::RequiredBytesInCopy(kBytesPerRow, rowsPerImage, copySize, kFormat);
    DoTest(size, copySize, rowsPerImage);
 }

-DAWN_INSTANTIATE_TEST_P(RequiredBufferSizeInCopyTests,
-                        {D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(),
-                         VulkanBackend()},
-                        {Type::T2BCopy, Type::B2TCopy},
-                        {wgpu::TextureDimension::e3D, wgpu::TextureDimension::e2D},
-                        {2u, 1u},
-                        {1u, 0u});
+DAWN_INSTANTIATE_TEST_P(
+    RequiredBufferSizeInCopyTests,
+    {D3D12Backend(), D3D12Backend({"d3d12_split_buffer_texture_copy_for_rows_per_image_paddings"}),
+     MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()},
+    {Type::T2BCopy, Type::B2TCopy},
+    {wgpu::TextureDimension::e3D, wgpu::TextureDimension::e2D},
+    {2u, 1u},
+    {1u, 0u});