Implement new formula for requiredBytesInCopy

Changed upstream in: - https://github.com/gpuweb/gpuweb/pull/1014 - https://github.com/gpuweb/gpuweb/pull/1130 Note that in some of the cases where width==0 || height==0 || depth==0, this increases the number of linear data bytes required for a copy. Since this is a corner case, no deprecation logic is added. Removes a duplicated copy of this logic in TestUtils.cpp. Bug: dawn:520 Change-Id: I3b3d079c6ef316df7d95ba5c349bf8de4646fa4d Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/30741 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Kai Ninomiya <kainino@chromium.org>
2025-12-13 15:16:16 +00:00 · 2020-10-23 21:21:33 +00:00
parent ca5aa235da
commit c9d0b492d5
10 changed files with 157 additions and 87 deletions
--- a/src/dawn_native/CommandValidation.cpp
+++ b/src/dawn_native/CommandValidation.cpp
@@ -381,47 +381,53 @@ namespace dawn_native {
               static_cast<uint64_t>(maxStart);
    }
    template <typename A, typename B>
    DAWN_FORCE_INLINE uint64_t Safe32x32(A a, B b) {
        static_assert(std::is_same<A, uint32_t>::value, "'a' must be uint32_t");
        static_assert(std::is_same<B, uint32_t>::value, "'b' must be uint32_t");
        return uint64_t(a) * uint64_t(b);
    }
    ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo,
                                                       const Extent3D& copySize,
                                                       uint32_t bytesPerRow,
                                                       uint32_t rowsPerImage) {
        ASSERT(copySize.width % blockInfo.width == 0);
        ASSERT(copySize.height % blockInfo.height == 0);
        uint32_t widthInBlocks = copySize.width / blockInfo.width;
        uint32_t heightInBlocks = copySize.height / blockInfo.height;
        uint64_t bytesInLastRow = Safe32x32(widthInBlocks, blockInfo.byteSize);
        uint64_t bytesPerImage = Safe32x32(bytesPerRow, rowsPerImage);
-        // Default value for rowsPerImage
+        if (copySize.depth == 0) {
        if (rowsPerImage == 0) {
            rowsPerImage = heightInBlocks;
        }
        ASSERT(rowsPerImage >= heightInBlocks);
        if (copySize.height > 1 || copySize.depth > 1) {
            ASSERT(bytesPerRow >= copySize.width / blockInfo.width * blockInfo.byteSize);
        }
        if (copySize.width == 0 || copySize.height == 0 || copySize.depth == 0) {
            return 0;
        }
-        ASSERT(copySize.height >= 1);
+        // Check for potential overflows for the rest of the computations. We have the following
-        ASSERT(copySize.depth >= 1);
+        // inequalities:
-
+        //
-        // bytesPerImage won't overflow since we're multiplying two uint32_t numbers
+        //   lastRowBytes <= bytesPerRow
-        uint64_t bytesPerImage = uint64_t(rowsPerImage) * bytesPerRow;
+        //   heightInBlocks <= rowsPerImage
-        // Provided that copySize.height > 1: bytesInLastSlice won't overflow since it's at most
+        //
-        // bytesPerImage. Otherwise the result is a multiplication of two uint32_t numbers.
+        // So:
-        uint64_t bytesInLastSlice =
+        //
-            uint64_t(bytesPerRow) * (heightInBlocks - 1) +
+        //   bytesInLastImage  = bytesPerRow * (heightInBlocks - 1) + bytesInLastRow
-            (uint64_t(copySize.width) / blockInfo.width * blockInfo.byteSize);
+        //                    <= bytesPerRow * heightInBlocks
-
+        //                    <= bytesPerRow * rowsPerImage
-        // This error cannot be thrown for copySize.depth = 1.
+        //                    <= bytesPerImage
-        // For copySize.depth > 1 we know that:
+        //
-        // requiredBytesInCopy >= (copySize.depth * bytesPerImage) / 2, so if
+        // This means that if the computation of depth * bytesPerImage doesn't overflow, none of the
-        // copySize.depth * bytesPerImage overflows uint64_t, then requiredBytesInCopy is definitely
+        // computations for requiredBytesInCopy will. (and it's not a very pessimizing check)
-        // too large to fit in the available data size.
+        if (bytesPerImage > std::numeric_limits<uint64_t>::max() / copySize.depth) {
-        if (std::numeric_limits<uint64_t>::max() / copySize.depth < bytesPerImage) {
+            return DAWN_VALIDATION_ERROR("requiredBytesInCopy is too large.");
            return DAWN_VALIDATION_ERROR("requiredBytesInCopy is too large");
        }
-        return bytesPerImage * (copySize.depth - 1) + bytesInLastSlice;
+
        uint64_t requiredBytesInCopy = bytesPerImage * (copySize.depth - 1);
        if (heightInBlocks > 0) {
            uint64_t bytesInLastImage = Safe32x32(bytesPerRow, heightInBlocks - 1) + bytesInLastRow;
            requiredBytesInCopy += bytesInLastImage;
        }
        return requiredBytesInCopy;
    }
    MaybeError ValidateCopySizeFitsInBuffer(const Ref<BufferBase>& buffer,
@@ -436,7 +442,7 @@ namespace dawn_native {
        return {};
    }
-    MaybeError ValidateLinearTextureData(const TextureDataLayout& layout,
+    MaybeError ValidateLinearTextureData(TextureDataLayout layout,
                                         uint64_t byteSize,
                                         const TexelBlockInfo& blockInfo,
                                         const Extent3D& copyExtent) {
@@ -445,19 +451,32 @@ namespace dawn_native {
            return DAWN_VALIDATION_ERROR("Offset must be a multiple of the texel or block size");
        }
        ASSERT(copyExtent.width % blockInfo.width == 0);
        uint32_t widthInBlocks = copyExtent.width / blockInfo.width;
        ASSERT(copyExtent.height % blockInfo.height == 0);
        uint32_t heightInBlocks = copyExtent.height / blockInfo.height;
        // Default value for rowsPerImage
        if (layout.rowsPerImage == 0) {
            layout.rowsPerImage = heightInBlocks;
        }
        // Validation for other members in layout:
-        if ((copyExtent.height > 1 || copyExtent.depth > 1) &&
+        ASSERT(Safe32x32(widthInBlocks, blockInfo.byteSize) <=
-            layout.bytesPerRow < copyExtent.width / blockInfo.width * blockInfo.byteSize) {
+               std::numeric_limits<uint32_t>::max());
-            return DAWN_VALIDATION_ERROR(
+        uint32_t lastRowBytes = widthInBlocks * blockInfo.byteSize;
-                "bytesPerRow must not be less than the number of bytes per row");
+        if (lastRowBytes > layout.bytesPerRow) {
            if (copyExtent.height > 1 || copyExtent.depth > 1) {
                return DAWN_VALIDATION_ERROR("The byte size of a row must be <= bytesPerRow.");
            } else {
                // bytesPerRow is unused. Populate it with a valid value for later validation.
                layout.bytesPerRow = lastRowBytes;
            }
        }
        // TODO(tommek@google.com): to match the spec there should be another condition here
        // on rowsPerImage >= copyExtent.height if copyExtent.depth > 1.
        ASSERT(copyExtent.height % blockInfo.height == 0);
        uint32_t heightInBlocks = copyExtent.height / blockInfo.height;
        // Validation for the copy being in-bounds:
        if (layout.rowsPerImage != 0 && layout.rowsPerImage < heightInBlocks) {
            return DAWN_VALIDATION_ERROR(
@@ -476,7 +495,7 @@ namespace dawn_native {
            layout.offset <= byteSize && (requiredBytesInCopy <= (byteSize - layout.offset));
        if (!fitsInData) {
            return DAWN_VALIDATION_ERROR(
-                "Required size for texture data layout exceeds the given size");
+                "Required size for texture data layout exceeds the linear data size.");
        }
        return {};
--- a/src/dawn_native/CommandValidation.h
+++ b/src/dawn_native/CommandValidation.h
@@ -51,7 +51,7 @@ namespace dawn_native {
                                                       uint32_t bytesPerRow,
                                                       uint32_t rowsPerImage);
-    MaybeError ValidateLinearTextureData(const TextureDataLayout& layout,
+    MaybeError ValidateLinearTextureData(TextureDataLayout layout,
                                         uint64_t byteSize,
                                         const TexelBlockInfo& blockInfo,
                                         const Extent3D& copyExtent);
--- a/src/tests/DawnTest.cpp
+++ b/src/tests/DawnTest.cpp
@@ -27,6 +27,7 @@
 #include "utils/PlatformDebugLogger.h"
 #include "utils/SystemUtils.h"
 #include "utils/TerribleCommandBuffer.h"
 #include "utils/TestUtils.h"
 #include "utils/WGPUHelpers.h"
 #include <algorithm>
@@ -851,7 +852,10 @@ std::ostringstream& DawnTestBase::AddTextureExpectationImpl(const char* file,
        ASSERT(bytesPerRow == Align(bytesPerRow, kTextureBytesPerRowAlignment));
    }
-    uint32_t size = bytesPerRow * (height - 1) + width * dataSize;
+    uint32_t rowsPerImage = height;
    uint32_t depth = 1;
    uint32_t size =
        utils::RequiredBytesInCopy(bytesPerRow, rowsPerImage, width, height, depth, dataSize);
    // TODO(enga): We should have the map async alignment in Contants.h. Also, it should change to 8
    // for Float64Array.
--- a/src/tests/end2end/BufferZeroInitTests.cpp
+++ b/src/tests/end2end/BufferZeroInitTests.cpp
@@ -864,9 +864,9 @@ TEST_P(BufferZeroInitTest, CopyBufferToTexture) {
    const wgpu::TextureCopyView textureCopyView =
        utils::CreateTextureCopyView(texture, 0, {0, 0, 0});
-    const uint32_t requiredBufferSizeForCopy = utils::GetBytesInBufferTextureCopy(
+    const uint32_t rowsPerImage = kTextureSize.height;
-        kTextureFormat, kTextureSize.width, kTextureBytesPerRowAlignment, kTextureSize.width,
+    const uint32_t requiredBufferSizeForCopy = utils::RequiredBytesInCopy(
-        kTextureSize.depth);
+        kTextureBytesPerRowAlignment, rowsPerImage, kTextureSize, kTextureFormat);
    constexpr wgpu::BufferUsage kBufferUsage =
        wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
--- a/src/tests/end2end/CompressedTextureFormatTests.cpp
+++ b/src/tests/end2end/CompressedTextureFormatTests.cpp
@@ -52,18 +52,18 @@ class CompressedTextureBCFormatTest : public DawnTest {
    std::vector<uint8_t> UploadData(const CopyConfig& copyConfig) {
        uint32_t copyWidthInBlock = copyConfig.copyExtent3D.width / kBCBlockWidthInTexels;
        uint32_t copyHeightInBlock = copyConfig.copyExtent3D.height / kBCBlockHeightInTexels;
-        uint32_t rowPitchInBytes = 0;
+        uint32_t copyBytesPerRow = 0;
        if (copyConfig.bytesPerRowAlignment != 0) {
-            rowPitchInBytes = copyConfig.bytesPerRowAlignment;
+            copyBytesPerRow = copyConfig.bytesPerRowAlignment;
        } else {
-            rowPitchInBytes = copyWidthInBlock *
+            copyBytesPerRow = copyWidthInBlock *
                              utils::GetTexelBlockSizeInBytes(copyConfig.textureDescriptor.format);
        }
        uint32_t copyRowsPerImage = copyConfig.rowsPerImage;
        if (copyRowsPerImage == 0) {
            copyRowsPerImage = copyHeightInBlock;
        }
-        uint32_t copyBytesPerImage = rowPitchInBytes * copyRowsPerImage;
+        uint32_t copyBytesPerImage = copyBytesPerRow * copyRowsPerImage;
        uint32_t uploadBufferSize =
            copyConfig.bufferOffset + copyBytesPerImage * copyConfig.copyExtent3D.depth;
@@ -75,7 +75,7 @@ class CompressedTextureBCFormatTest : public DawnTest {
            for (uint32_t h = 0; h < copyHeightInBlock; ++h) {
                for (uint32_t w = 0; w < copyWidthInBlock; ++w) {
                    uint32_t uploadBufferOffset = copyConfig.bufferOffset +
-                                                  copyBytesPerImage * layer + rowPitchInBytes * h +
+                                                  copyBytesPerImage * layer + copyBytesPerRow * h +
                                                  oneBlockCompressedTextureData.size() * w;
                    std::memcpy(&data[uploadBufferOffset], oneBlockCompressedTextureData.data(),
                                oneBlockCompressedTextureData.size() * sizeof(uint8_t));
--- a/src/tests/end2end/CopyTests.cpp
+++ b/src/tests/end2end/CopyTests.cpp
@@ -57,13 +57,14 @@ class CopyTests : public DawnTest {
    }
    static BufferSpec MinimumBufferSpec(uint32_t width,
-                                        uint32_t rowsPerImage,
+                                        uint32_t height,
                                        uint32_t arrayLayer = 1,
                                        bool testZeroRowsPerImage = true) {
        const uint32_t bytesPerRow = utils::GetMinimumBytesPerRow(kTextureFormat, width);
-        const uint32_t totalBufferSize = utils::GetBytesInBufferTextureCopy(
+        const uint32_t rowsPerImage = height;
-            kTextureFormat, width, bytesPerRow, rowsPerImage, arrayLayer);
+        const uint32_t totalBufferSize = utils::RequiredBytesInCopy(
-        uint32_t appliedRowsPerImage = testZeroRowsPerImage ? 0 : rowsPerImage;
+            bytesPerRow, rowsPerImage, {width, height, arrayLayer}, kTextureFormat);
        uint32_t appliedRowsPerImage = testZeroRowsPerImage ? 0 : height;
        return {totalBufferSize, 0, bytesPerRow, appliedRowsPerImage};
    }
--- a/src/tests/end2end/TextureZeroInitTests.cpp
+++ b/src/tests/end2end/TextureZeroInitTests.cpp
@@ -145,10 +145,11 @@ TEST_P(TextureZeroInitTest, CopyMultipleTextureArrayLayersToBufferSource) {
    wgpu::Texture texture = device.CreateTexture(&descriptor);
    const uint32_t bytesPerRow = utils::GetMinimumBytesPerRow(kColorFormat, kSize);
    const uint32_t rowsPerImage = kSize;
    wgpu::BufferDescriptor bufferDescriptor;
    bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    bufferDescriptor.size =
+    bufferDescriptor.size = utils::RequiredBytesInCopy(bytesPerRow, rowsPerImage,
-        utils::GetBytesInBufferTextureCopy(kColorFormat, kSize, bytesPerRow, kSize, kArrayLayers);
+                                                       {kSize, kSize, kArrayLayers}, kColorFormat);
    wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor);
    const wgpu::BufferCopyView bufferCopyView =
--- a/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
+++ b/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
@@ -923,6 +923,34 @@ TEST_F(CopyCommandTest_T2B, Success) {
    }
 }
 // Edge cases around requiredBytesInCopy computation for empty copies
 TEST_F(CopyCommandTest_T2B, Empty) {
    wgpu::Texture source =
        Create2DTexture(16, 16, 1, 2, wgpu::TextureFormat::RGBA8Unorm, wgpu::TextureUsage::CopySrc);
    TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
                CreateBuffer(0, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 0, 0});
    TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
                CreateBuffer(0, wgpu::BufferUsage::CopyDst), 0, 256, 4, {4, 0, 0});
    TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
                CreateBuffer(0, wgpu::BufferUsage::CopyDst), 0, 256, 4, {4, 4, 0});
    TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
                CreateBuffer(1024, wgpu::BufferUsage::CopyDst), 0, 256, 4, {4, 0, 2});
    TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0},
                CreateBuffer(1023, wgpu::BufferUsage::CopyDst), 0, 256, 4, {4, 0, 2});
    TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
                CreateBuffer(1792, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 4, 2});
    TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0},
                CreateBuffer(1791, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 4, 2});
    TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
                CreateBuffer(1024, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 0, 2});
    TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0},
                CreateBuffer(1023, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 0, 2});
 }
 // Test OOB conditions on the texture
 TEST_F(CopyCommandTest_T2B, OutOfBoundsOnTexture) {
    uint64_t bufferSize = BufferSizeForTextureCopy(4, 4, 1);
--- a/src/utils/TestUtils.cpp
+++ b/src/utils/TestUtils.cpp
@@ -25,27 +25,18 @@
 namespace utils {
    uint32_t GetMinimumBytesPerRow(wgpu::TextureFormat format, uint32_t width) {
-        const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(format);
+        const uint32_t bytesPerBlock = utils::GetTexelBlockSizeInBytes(format);
-        return Align(bytesPerTexel * width, kTextureBytesPerRowAlignment);
+        return Align(bytesPerBlock * width, kTextureBytesPerRowAlignment);
    }
    uint32_t GetBytesInBufferTextureCopy(wgpu::TextureFormat format,
                                         uint32_t width,
                                         uint32_t bytesPerRow,
                                         uint32_t rowsPerImage,
                                         uint32_t copyArrayLayerCount) {
        ASSERT(rowsPerImage > 0);
        const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(format);
        const uint32_t bytesAtLastImage = bytesPerRow * (rowsPerImage - 1) + bytesPerTexel * width;
        return bytesPerRow * rowsPerImage * (copyArrayLayerCount - 1) + bytesAtLastImage;
    }
    // TODO(jiawei.shao@intel.com): support compressed texture formats
    TextureDataCopyLayout GetTextureDataCopyLayoutForTexture2DAtLevel(
        wgpu::TextureFormat format,
        wgpu::Extent3D textureSizeAtLevel0,
        uint32_t mipmapLevel,
        uint32_t rowsPerImage) {
        // TODO(jiawei.shao@intel.com): support compressed texture formats
        ASSERT(utils::GetTextureFormatBlockWidth(format) == 1);
        TextureDataCopyLayout layout;
        layout.mipSize = {textureSizeAtLevel0.width >> mipmapLevel,
@@ -56,9 +47,17 @@ namespace utils {
        uint32_t appliedRowsPerImage = rowsPerImage > 0 ? rowsPerImage : layout.mipSize.height;
        layout.bytesPerImage = layout.bytesPerRow * appliedRowsPerImage;
-        layout.byteLength =
+        // TODO(kainino@chromium.org): Remove this intermediate variable.
-            GetBytesInBufferTextureCopy(format, layout.mipSize.width, layout.bytesPerRow,
+        // It is currently needed because of an issue in the D3D12 copy splitter
-                                        appliedRowsPerImage, textureSizeAtLevel0.depth);
+        // (or maybe in D3D12 itself?) which requires there to be enough room in the
        // buffer for the last image to have a height of `rowsPerImage` instead of
        // the actual height.
        wgpu::Extent3D mipSizeWithHeightWorkaround = layout.mipSize;
        mipSizeWithHeightWorkaround.height =
            appliedRowsPerImage * utils::GetTextureFormatBlockHeight(format);
        layout.byteLength = RequiredBytesInCopy(layout.bytesPerRow, appliedRowsPerImage,
                                                mipSizeWithHeightWorkaround, format);
        const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(format);
        layout.texelBlocksPerRow = layout.bytesPerRow / bytesPerTexel;
@@ -72,18 +71,35 @@ namespace utils {
                                 uint64_t rowsPerImage,
                                 wgpu::Extent3D copyExtent,
                                 wgpu::TextureFormat textureFormat) {
        if (copyExtent.width == 0 || copyExtent.height == 0 || copyExtent.depth == 0) {
            return 0;
        } else {
        uint32_t blockSize = utils::GetTexelBlockSizeInBytes(textureFormat);
        uint32_t blockWidth = utils::GetTextureFormatBlockWidth(textureFormat);
        uint32_t blockHeight = utils::GetTextureFormatBlockHeight(textureFormat);
        ASSERT(copyExtent.width % blockWidth == 0);
        uint32_t widthInBlocks = copyExtent.width / blockWidth;
        ASSERT(copyExtent.height % blockHeight == 0);
        uint32_t heightInBlocks = copyExtent.height / blockHeight;
        return RequiredBytesInCopy(bytesPerRow, rowsPerImage, widthInBlocks, heightInBlocks,
                                   copyExtent.depth, blockSize);
    }
    uint64_t RequiredBytesInCopy(uint64_t bytesPerRow,
                                 uint64_t rowsPerImage,
                                 uint64_t widthInBlocks,
                                 uint64_t heightInBlocks,
                                 uint64_t depth,
                                 uint64_t bytesPerBlock) {
        if (depth == 0) {
            return 0;
        }
        uint64_t bytesPerImage = bytesPerRow * rowsPerImage;
-            uint64_t bytesInLastSlice = bytesPerRow * (copyExtent.height / blockHeight - 1) +
+        uint64_t requiredBytesInCopy = bytesPerImage * (depth - 1);
-                                        (copyExtent.width / blockWidth * blockSize);
+        if (heightInBlocks != 0) {
-            return bytesPerImage * (copyExtent.depth - 1) + bytesInLastSlice;
+            uint64_t lastRowBytes = widthInBlocks * bytesPerBlock;
            uint64_t lastImageBytes = bytesPerRow * (heightInBlocks - 1) + lastRowBytes;
            requiredBytesInCopy += lastImageBytes;
        }
        return requiredBytesInCopy;
    }
    uint64_t GetTexelCountInCopyRegion(uint64_t bytesPerRow,
--- a/src/utils/TestUtils.h
+++ b/src/utils/TestUtils.h
@@ -30,11 +30,6 @@ namespace utils {
    };
    uint32_t GetMinimumBytesPerRow(wgpu::TextureFormat format, uint32_t width);
    uint32_t GetBytesInBufferTextureCopy(wgpu::TextureFormat format,
                                         uint32_t width,
                                         uint32_t bytesPerRow,
                                         uint32_t rowsPerImage,
                                         uint32_t copyArrayLayerCount);
    TextureDataCopyLayout GetTextureDataCopyLayoutForTexture2DAtLevel(
        wgpu::TextureFormat format,
        wgpu::Extent3D textureSizeAtLevel0,
@@ -45,6 +40,12 @@ namespace utils {
                                 uint64_t rowsPerImage,
                                 wgpu::Extent3D copyExtent,
                                 wgpu::TextureFormat textureFormat);
    uint64_t RequiredBytesInCopy(uint64_t bytesPerRow,
                                 uint64_t rowsPerImage,
                                 uint64_t widthInBlocks,
                                 uint64_t heightInBlocks,
                                 uint64_t depth,
                                 uint64_t bytesPerBlock);
    uint64_t GetTexelCountInCopyRegion(uint64_t bytesPerRow,
                                       uint64_t rowsPerImage,