Change rowsPerImage units from texels to blocks

Spec change: https://github.com/gpuweb/gpuweb/pull/958 Bug: dawn:520 Change-Id: I05c4792a2832d12cd68877f2b1b99cdf9ef26925 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/29981 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Kai Ninomiya <kainino@chromium.org>
2025-12-15 16:16:08 +00:00 · 2020-10-12 23:13:53 +00:00
parent 51af1b428f
commit d1bca09f4a
17 changed files with 115 additions and 161 deletions
--- a/src/dawn_native/CommandBuffer.cpp
+++ b/src/dawn_native/CommandBuffer.cpp
@@ -166,22 +166,23 @@ namespace dawn_native {
            return false;
        }

-        if (copy->destination.rowsPerImage > copy->copySize.height) {
-            return false;
-        }
-
        const TextureBase* texture = copy->source.texture.Get();
        const TexelBlockInfo& blockInfo =
            texture->GetFormat().GetTexelBlockInfo(copy->source.aspect);
+        const uint64_t heightInBlocks = copy->copySize.height / blockInfo.blockHeight;
+
+        if (copy->destination.rowsPerImage > heightInBlocks) {
+            return false;
+        }
+
        const uint64_t copyTextureDataSizePerRow =
            copy->copySize.width / blockInfo.blockWidth * blockInfo.blockByteSize;
        if (copy->destination.bytesPerRow > copyTextureDataSizePerRow) {
            return false;
        }

-        const uint64_t overwrittenRangeSize = copyTextureDataSizePerRow *
-                                              (copy->copySize.height / blockInfo.blockHeight) *
-                                              copy->copySize.depth;
+        const uint64_t overwrittenRangeSize =
+            copyTextureDataSizePerRow * heightInBlocks * copy->copySize.depth;
        if (copy->destination.buffer->GetSize() > overwrittenRangeSize) {
            return false;
        }
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp
@@ -648,10 +648,12 @@ namespace dawn_native {
                // copyExtent.height by blockHeight while the divisibility conditions are
                // checked in validating texture copy range.
                DAWN_TRY(ValidateTextureCopyRange(*destination, *copySize));
-                DAWN_TRY(ValidateLinearTextureData(
-                    source->layout, source->buffer->GetSize(),
-                    destination->texture->GetFormat().GetTexelBlockInfo(destination->aspect),
-                    *copySize));
+            }
+            const TexelBlockInfo& blockInfo =
+                destination->texture->GetFormat().GetTexelBlockInfo(destination->aspect);
+            if (GetDevice()->IsValidationEnabled()) {
+                DAWN_TRY(ValidateLinearTextureData(source->layout, source->buffer->GetSize(),
+                                                   blockInfo, *copySize));

                mTopLevelBuffers.insert(source->buffer);
                mTopLevelTextures.insert(destination->texture);
@@ -660,12 +662,11 @@ namespace dawn_native {
            // Compute default value for rowsPerImage
            uint32_t defaultedRowsPerImage = source->layout.rowsPerImage;
            if (defaultedRowsPerImage == 0) {
-                defaultedRowsPerImage = copySize->height;
+                ASSERT(copySize->height % blockInfo.blockHeight == 0);
+                defaultedRowsPerImage = copySize->height / blockInfo.blockHeight;
            }

            // In the case of one row copy bytesPerRow might not contain enough bytes
-            const TexelBlockInfo& blockInfo =
-                destination->texture->GetFormat().GetTexelBlockInfo(destination->aspect);
            uint32_t bytesPerRow = source->layout.bytesPerRow;
            if (copySize->height <= 1 && copySize->depth <= 1) {
                bytesPerRow =
@@ -711,6 +712,10 @@ namespace dawn_native {
                // copyExtent.height by blockHeight while the divisibility conditions are
                // checked in validating texture copy range.
                DAWN_TRY(ValidateTextureCopyRange(*source, *copySize));
+            }
+            const TexelBlockInfo& blockInfo =
+                source->texture->GetFormat().GetTexelBlockInfo(source->aspect);
+            if (GetDevice()->IsValidationEnabled()) {
                DAWN_TRY(ValidateLinearTextureData(
                    destination->layout, destination->buffer->GetSize(),
                    source->texture->GetFormat().GetTexelBlockInfo(source->aspect), *copySize));
@@ -722,12 +727,11 @@ namespace dawn_native {
            // Compute default value for rowsPerImage
            uint32_t defaultedRowsPerImage = destination->layout.rowsPerImage;
            if (defaultedRowsPerImage == 0) {
-                defaultedRowsPerImage = copySize->height;
+                ASSERT(copySize->height % blockInfo.blockHeight == 0);
+                defaultedRowsPerImage = copySize->height / blockInfo.blockHeight;
            }

            // In the case of one row copy bytesPerRow might not contain enough bytes
-            const TexelBlockInfo& blockInfo =
-                source->texture->GetFormat().GetTexelBlockInfo(source->aspect);
            uint32_t bytesPerRow = destination->layout.bytesPerRow;
            if (copySize->height <= 1 && copySize->depth <= 1) {
                bytesPerRow =
--- a/src/dawn_native/CommandValidation.cpp
+++ b/src/dawn_native/CommandValidation.cpp
@@ -374,12 +374,15 @@ namespace dawn_native {
                                                       const Extent3D& copySize,
                                                       uint32_t bytesPerRow,
                                                       uint32_t rowsPerImage) {
+        ASSERT(copySize.height % blockInfo.blockHeight == 0);
+        uint32_t heightInBlocks = copySize.height / blockInfo.blockHeight;
+
        // Default value for rowsPerImage
        if (rowsPerImage == 0) {
-            rowsPerImage = copySize.height;
+            rowsPerImage = heightInBlocks;
        }

-        ASSERT(rowsPerImage >= copySize.height);
+        ASSERT(rowsPerImage >= heightInBlocks);
        if (copySize.height > 1 || copySize.depth > 1) {
            ASSERT(bytesPerRow >= copySize.width / blockInfo.blockWidth * blockInfo.blockByteSize);
        }
@@ -391,13 +394,12 @@ namespace dawn_native {
        ASSERT(copySize.height >= 1);
        ASSERT(copySize.depth >= 1);

-        uint32_t texelBlockRowsPerImage = rowsPerImage / blockInfo.blockHeight;
        // bytesPerImage won't overflow since we're multiplying two uint32_t numbers
-        uint64_t bytesPerImage = uint64_t(texelBlockRowsPerImage) * bytesPerRow;
+        uint64_t bytesPerImage = uint64_t(rowsPerImage) * bytesPerRow;
        // Provided that copySize.height > 1: bytesInLastSlice won't overflow since it's at most
        // bytesPerImage. Otherwise the result is a multiplication of two uint32_t numbers.
        uint64_t bytesInLastSlice =
-            uint64_t(bytesPerRow) * (copySize.height / blockInfo.blockHeight - 1) +
+            uint64_t(bytesPerRow) * (heightInBlocks - 1) +
            (uint64_t(copySize.width) / blockInfo.blockWidth * blockInfo.blockByteSize);

        // This error cannot be thrown for copySize.depth = 1.
@@ -428,11 +430,6 @@ namespace dawn_native {
                                         const TexelBlockInfo& blockInfo,
                                         const Extent3D& copyExtent) {
        // Validation for the texel block alignments:
-        if (layout.rowsPerImage % blockInfo.blockHeight != 0) {
-            return DAWN_VALIDATION_ERROR(
-                "rowsPerImage must be a multiple of compressed texture format block height");
-        }
-
        if (layout.offset % blockInfo.blockByteSize != 0) {
            return DAWN_VALIDATION_ERROR("Offset must be a multiple of the texel or block size");
        }
@@ -448,9 +445,13 @@ namespace dawn_native {
        // TODO(tommek@google.com): to match the spec there should be another condition here
        // on rowsPerImage >= copyExtent.height if copyExtent.depth > 1.

+        ASSERT(copyExtent.height % blockInfo.blockHeight == 0);
+        uint32_t heightInBlocks = copyExtent.height / blockInfo.blockHeight;
+
        // Validation for the copy being in-bounds:
-        if (layout.rowsPerImage != 0 && layout.rowsPerImage < copyExtent.height) {
-            return DAWN_VALIDATION_ERROR("rowsPerImage must not be less than the copy height.");
+        if (layout.rowsPerImage != 0 && layout.rowsPerImage < heightInBlocks) {
+            return DAWN_VALIDATION_ERROR(
+                "rowsPerImage must not be less than the copy height in blocks.");
        }

        // We compute required bytes in copy after validating texel block alignments
--- a/src/dawn_native/Queue.cpp
+++ b/src/dawn_native/Queue.cpp
@@ -32,12 +32,13 @@
 #include <cstring>

 namespace dawn_native {
+
    namespace {

        void CopyTextureData(uint8_t* dstPointer,
                             const uint8_t* srcPointer,
                             uint32_t depth,
-                             uint32_t rowsPerImageInBlock,
+                             uint32_t rowsPerImage,
                             uint64_t imageAdditionalStride,
                             uint32_t actualBytesPerRow,
                             uint32_t dstBytesPerRow,
@@ -48,7 +49,7 @@ namespace dawn_native {

            if (!copyWholeLayer) {  // copy row by row
                for (uint32_t d = 0; d < depth; ++d) {
-                    for (uint32_t h = 0; h < rowsPerImageInBlock; ++h) {
+                    for (uint32_t h = 0; h < rowsPerImage; ++h) {
                        memcpy(dstPointer, srcPointer, actualBytesPerRow);
                        dstPointer += dstBytesPerRow;
                        srcPointer += srcBytesPerRow;
@@ -56,7 +57,7 @@ namespace dawn_native {
                    srcPointer += imageAdditionalStride;
                }
            } else {
-                uint64_t layerSize = uint64_t(rowsPerImageInBlock) * actualBytesPerRow;
+                uint64_t layerSize = uint64_t(rowsPerImage) * actualBytesPerRow;
                if (!copyWholeData) {  // copy layer by layer
                    for (uint32_t d = 0; d < depth; ++d) {
                        memcpy(dstPointer, srcPointer, layerSize);
@@ -103,19 +104,18 @@ namespace dawn_native {
            const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
            srcPointer += dataLayout.offset;

-            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / blockInfo.blockHeight;
-            uint32_t dataRowsPerImageInBlock = dataLayout.rowsPerImage / blockInfo.blockHeight;
-            if (dataRowsPerImageInBlock == 0) {
-                dataRowsPerImageInBlock = writeSizePixel.height / blockInfo.blockHeight;
+            uint32_t dataRowsPerImage = dataLayout.rowsPerImage;
+            if (dataRowsPerImage == 0) {
+                dataRowsPerImage = writeSizePixel.height / blockInfo.blockHeight;
            }

-            ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
+            ASSERT(dataRowsPerImage >= alignedRowsPerImage);
            uint64_t imageAdditionalStride =
-                dataLayout.bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
+                dataLayout.bytesPerRow * (dataRowsPerImage - alignedRowsPerImage);

-            CopyTextureData(dstPointer, srcPointer, writeSizePixel.depth,
-                            alignedRowsPerImageInBlock, imageAdditionalStride, alignedBytesPerRow,
-                            optimallyAlignedBytesPerRow, dataLayout.bytesPerRow);
+            CopyTextureData(dstPointer, srcPointer, writeSizePixel.depth, alignedRowsPerImage,
+                            imageAdditionalStride, alignedBytesPerRow, optimallyAlignedBytesPerRow,
+                            dataLayout.bytesPerRow);

            return uploadHandle;
        }
@@ -271,9 +271,11 @@ namespace dawn_native {
        // We are only copying the part of the data that will appear in the texture.
        // Note that validating texture copy range ensures that writeSizePixel->width and
        // writeSizePixel->height are multiples of blockWidth and blockHeight respectively.
+        ASSERT(writeSizePixel.width % blockInfo.blockWidth == 0);
+        ASSERT(writeSizePixel.height % blockInfo.blockHeight == 0);
        uint32_t alignedBytesPerRow =
-            (writeSizePixel.width) / blockInfo.blockWidth * blockInfo.blockByteSize;
-        uint32_t alignedRowsPerImage = writeSizePixel.height;
+            writeSizePixel.width / blockInfo.blockWidth * blockInfo.blockByteSize;
+        uint32_t alignedRowsPerImage = writeSizePixel.height / blockInfo.blockHeight;

        uint32_t optimalBytesPerRowAlignment = GetDevice()->GetOptimalBytesPerRowAlignment();
        uint32_t optimallyAlignedBytesPerRow =
@@ -445,38 +447,4 @@ namespace dawn_native {
            device->GetCurrentErrorScope());
    }

-    void CopyTextureData(uint8_t* dstPointer,
-                         const uint8_t* srcPointer,
-                         uint32_t depth,
-                         uint32_t rowsPerImageInBlock,
-                         uint64_t imageAdditionalStride,
-                         uint32_t actualBytesPerRow,
-                         uint32_t dstBytesPerRow,
-                         uint32_t srcBytesPerRow) {
-        bool copyWholeLayer =
-            actualBytesPerRow == dstBytesPerRow && dstBytesPerRow == srcBytesPerRow;
-        bool copyWholeData = copyWholeLayer && imageAdditionalStride == 0;
-
-        if (!copyWholeLayer) {  // copy row by row
-            for (uint32_t d = 0; d < depth; ++d) {
-                for (uint32_t h = 0; h < rowsPerImageInBlock; ++h) {
-                    memcpy(dstPointer, srcPointer, actualBytesPerRow);
-                    dstPointer += dstBytesPerRow;
-                    srcPointer += srcBytesPerRow;
-                }
-                srcPointer += imageAdditionalStride;
-            }
-        } else {
-            uint64_t layerSize = uint64_t(rowsPerImageInBlock) * actualBytesPerRow;
-            if (!copyWholeData) {  // copy layer by layer
-                for (uint32_t d = 0; d < depth; ++d) {
-                    memcpy(dstPointer, srcPointer, layerSize);
-                    dstPointer += layerSize;
-                    srcPointer += layerSize + imageAdditionalStride;
-                }
-            } else {  // do a single copy
-                memcpy(dstPointer, srcPointer, layerSize * depth);
-            }
-        }
-    }
 }  // namespace dawn_native
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -778,8 +778,7 @@ namespace dawn_native { namespace d3d12 {
                        copy->destination.bytesPerRow, copy->destination.rowsPerImage);

                    const uint64_t bytesPerSlice =
-                        copy->destination.bytesPerRow *
-                        (copy->destination.rowsPerImage / blockInfo.blockHeight);
+                        copy->destination.bytesPerRow * copy->destination.rowsPerImage;

                    // copySplits.copies2D[1] is always calculated for the second copy slice with
                    // extra "bytesPerSlice" copy offset compared with the first copy slice. So
--- a/src/dawn_native/d3d12/TextureCopySplitter.cpp
+++ b/src/dawn_native/d3d12/TextureCopySplitter.cpp
@@ -71,7 +71,7 @@ namespace dawn_native { namespace d3d12 {
        ASSERT(alignedOffset < offset);
        ASSERT(offset - alignedOffset < D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);

-        uint32_t slicePitch = bytesPerRow * (rowsPerImage / blockInfo.blockHeight);
+        uint32_t slicePitch = bytesPerRow * rowsPerImage;
        Origin3D texelOffset = ComputeTexelOffsets(
            blockInfo, static_cast<uint32_t>(offset - alignedOffset), bytesPerRow, slicePitch);

@@ -79,6 +79,7 @@ namespace dawn_native { namespace d3d12 {
            copySize.width / blockInfo.blockWidth * blockInfo.blockByteSize;
        uint32_t byteOffsetInRowPitch =
            texelOffset.x / blockInfo.blockWidth * blockInfo.blockByteSize;
+        uint32_t rowsPerImageInTexels = rowsPerImage * blockInfo.blockHeight;
        if (copyBytesPerRowPitch + byteOffsetInRowPitch <= bytesPerRow) {
            // The region's rows fit inside the bytes per row. In this case, extend the width of the
            // PlacedFootprint and copy the buffer with an offset location
@@ -111,7 +112,7 @@ namespace dawn_native { namespace d3d12 {

            copy.copies[0].bufferOffset = texelOffset;
            copy.copies[0].bufferSize.width = copySize.width + texelOffset.x;
-            copy.copies[0].bufferSize.height = rowsPerImage + texelOffset.y;
+            copy.copies[0].bufferSize.height = rowsPerImageInTexels + texelOffset.y;
            copy.copies[0].bufferSize.depth = copySize.depth + texelOffset.z;

            return copy;
@@ -163,7 +164,7 @@ namespace dawn_native { namespace d3d12 {

        copy.copies[0].bufferOffset = texelOffset;
        copy.copies[0].bufferSize.width = texelsPerRow;
-        copy.copies[0].bufferSize.height = rowsPerImage + texelOffset.y;
+        copy.copies[0].bufferSize.height = rowsPerImageInTexels + texelOffset.y;
        copy.copies[0].bufferSize.depth = copySize.depth + texelOffset.z;

        copy.copies[1].textureOffset.x = origin.x + copy.copies[0].copySize.width;
@@ -179,7 +180,8 @@ namespace dawn_native { namespace d3d12 {
        copy.copies[1].bufferOffset.y = texelOffset.y + blockInfo.blockHeight;
        copy.copies[1].bufferOffset.z = texelOffset.z;
        copy.copies[1].bufferSize.width = copy.copies[1].copySize.width;
-        copy.copies[1].bufferSize.height = rowsPerImage + texelOffset.y + blockInfo.blockHeight;
+        copy.copies[1].bufferSize.height =
+            rowsPerImageInTexels + texelOffset.y + blockInfo.blockHeight;
        copy.copies[1].bufferSize.depth = copySize.depth + texelOffset.z;

        return copy;
@@ -193,7 +195,7 @@ namespace dawn_native { namespace d3d12 {
                                               uint32_t rowsPerImage) {
        TextureCopySplits copies;

-        const uint64_t bytesPerSlice = bytesPerRow * (rowsPerImage / blockInfo.blockHeight);
+        const uint64_t bytesPerSlice = bytesPerRow * rowsPerImage;

        // The function ComputeTextureCopySplit() decides how to split the copy based on:
        // - the alignment of the buffer offset with D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT (512)
--- a/src/dawn_native/d3d12/TextureD3D12.cpp
+++ b/src/dawn_native/d3d12/TextureD3D12.cpp
@@ -970,7 +970,7 @@ namespace dawn_native { namespace d3d12 {
                    // compute d3d12 texture copy locations for texture and buffer
                    Extent3D copySize = GetMipLevelVirtualSize(level);

-                    uint32_t rowsPerImage = GetHeight();
+                    uint32_t rowsPerImage = GetHeight() / blockInfo.blockHeight;
                    Texture2DCopySplit copySplit = ComputeTextureCopySplit(
                        {0, 0, 0}, copySize, blockInfo, uploadHandle.startOffset, bytesPerRow,
                        rowsPerImage);
--- a/src/dawn_native/d3d12/UtilsD3D12.cpp
+++ b/src/dawn_native/d3d12/UtilsD3D12.cpp
@@ -189,7 +189,7 @@ namespace dawn_native { namespace d3d12 {
        const TextureCopySplits copySplits = ComputeTextureCopySplits(
            textureCopy.origin, copySize, blockInfo, offsetBytes, bytesPerRow, rowsPerImage);

-        const uint64_t bytesPerSlice = bytesPerRow * (rowsPerImage / blockInfo.blockHeight);
+        const uint64_t bytesPerSlice = bytesPerRow * rowsPerImage;

        // copySplits.copies2D[1] is always calculated for the second copy slice with
        // extra "bytesPerSlice" copy offset compared with the first copy slice. So
--- a/src/dawn_native/metal/DeviceMTL.mm
+++ b/src/dawn_native/metal/DeviceMTL.mm
@@ -301,8 +301,8 @@ namespace dawn_native { namespace metal {
        uint32_t blockSize = blockInfo.blockByteSize;
        uint32_t blockWidth = blockInfo.blockWidth;
        uint32_t blockHeight = blockInfo.blockHeight;
-        ASSERT(dataLayout.rowsPerImage == (copySizePixels.height));
-        ASSERT(dataLayout.bytesPerRow == (copySizePixels.width) / blockWidth * blockSize);
+        ASSERT(dataLayout.rowsPerImage == copySizePixels.height / blockHeight);
+        ASSERT(dataLayout.bytesPerRow == copySizePixels.width / blockWidth * blockSize);

        EnsureDestinationTextureInitialized(texture, *dst, copySizePixels);

@@ -314,8 +314,7 @@ namespace dawn_native { namespace metal {
            texture->ClampToMipLevelVirtualSize(dst->mipLevel, dst->origin, copySizePixels);
        const uint32_t copyBaseLayer = dst->origin.z;
        const uint32_t copyLayerCount = copySizePixels.depth;
-        const uint64_t bytesPerImage =
-            dataLayout.rowsPerImage * dataLayout.bytesPerRow / blockHeight;
+        const uint64_t bytesPerImage = dataLayout.rowsPerImage * dataLayout.bytesPerRow;

        MTLBlitOption blitOption = ComputeMTLBlitOption(texture->GetFormat(), dst->aspect);

--- a/src/dawn_native/metal/UtilsMetal.mm
+++ b/src/dawn_native/metal/UtilsMetal.mm
@@ -73,8 +73,7 @@ namespace dawn_native { namespace metal {

        // We work around this limitation by detecting when Metal would complain and copy the
        // last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
-        uint32_t dataRowsPerImage = rowsPerImage / blockInfo.blockHeight;
-        uint32_t bytesPerImage = bytesPerRow * dataRowsPerImage;
+        uint32_t bytesPerImage = bytesPerRow * rowsPerImage;

        // Metal validation layer requires that if the texture's pixel format is a compressed
        // format, the sourceSize must be a multiple of the pixel format's block size or be
--- a/src/dawn_native/opengl/CommandBufferGL.cpp
+++ b/src/dawn_native/opengl/CommandBufferGL.cpp
@@ -534,7 +534,8 @@ namespace dawn_native { namespace opengl {
                    const TexelBlockInfo& blockInfo = formatInfo.GetTexelBlockInfo(dst.aspect);
                    gl.PixelStorei(GL_UNPACK_ROW_LENGTH, src.bytesPerRow / blockInfo.blockByteSize *
                                                             blockInfo.blockWidth);
-                    gl.PixelStorei(GL_UNPACK_IMAGE_HEIGHT, src.rowsPerImage);
+                    gl.PixelStorei(GL_UNPACK_IMAGE_HEIGHT,
+                                   src.rowsPerImage * blockInfo.blockHeight);

                    if (formatInfo.isCompressed) {
                        gl.PixelStorei(GL_UNPACK_COMPRESSED_BLOCK_SIZE, blockInfo.blockByteSize);
@@ -626,7 +627,7 @@ namespace dawn_native { namespace opengl {
                    const TexelBlockInfo& blockInfo = formatInfo.GetTexelBlockInfo(src.aspect);

                    gl.BindBuffer(GL_PIXEL_PACK_BUFFER, buffer->GetHandle());
-                    gl.PixelStorei(GL_PACK_IMAGE_HEIGHT, dst.rowsPerImage);
+                    gl.PixelStorei(GL_PACK_IMAGE_HEIGHT, dst.rowsPerImage * blockInfo.blockHeight);
                    gl.PixelStorei(GL_PACK_ROW_LENGTH, dst.bytesPerRow / blockInfo.blockByteSize);

                    GLenum glAttachment;
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -438,13 +438,13 @@ namespace dawn_native { namespace vulkan {
        dawn_native::Format format = srcCopy.texture->GetFormat();
        const TexelBlockInfo& blockInfo = format.GetTexelBlockInfo(srcCopy.aspect);
        ASSERT(copySize.width % blockInfo.blockWidth == 0);
+        uint32_t widthInBlocks = copySize.width / blockInfo.blockWidth;
        ASSERT(copySize.height % blockInfo.blockHeight == 0);
+        uint32_t heightInBlocks = copySize.height / blockInfo.blockHeight;

        // Create the temporary buffer. Note that We don't need to respect WebGPU's 256 alignment
        // because it isn't a hard constraint in Vulkan.
-        uint64_t tempBufferSize =
-            (copySize.width / blockInfo.blockWidth * copySize.height / blockInfo.blockHeight) *
-            blockInfo.blockByteSize;
+        uint64_t tempBufferSize = widthInBlocks * heightInBlocks * blockInfo.blockByteSize;
        BufferDescriptor tempBufferDescriptor;
        tempBufferDescriptor.size = tempBufferSize;
        tempBufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
@@ -454,7 +454,7 @@ namespace dawn_native { namespace vulkan {

        BufferCopy tempBufferCopy;
        tempBufferCopy.buffer = tempBuffer.Get();
-        tempBufferCopy.rowsPerImage = copySize.height;
+        tempBufferCopy.rowsPerImage = heightInBlocks;
        tempBufferCopy.offset = 0;
        tempBufferCopy.bytesPerRow =
            copySize.width / blockInfo.blockWidth * blockInfo.blockByteSize;
--- a/src/dawn_native/vulkan/UtilsVulkan.cpp
+++ b/src/dawn_native/vulkan/UtilsVulkan.cpp
@@ -112,7 +112,7 @@ namespace dawn_native { namespace vulkan {
        ASSERT(dataLayout.bytesPerRow % blockInfo.blockByteSize == 0);
        region.bufferRowLength =
            dataLayout.bytesPerRow / blockInfo.blockByteSize * blockInfo.blockWidth;
-        region.bufferImageHeight = dataLayout.rowsPerImage;
+        region.bufferImageHeight = dataLayout.rowsPerImage * blockInfo.blockHeight;

        region.imageSubresource.aspectMask = VulkanAspectMask(textureCopy.aspect);
        region.imageSubresource.mipLevel = textureCopy.mipLevel;
--- a/src/tests/end2end/CompressedTextureFormatTests.cpp
+++ b/src/tests/end2end/CompressedTextureFormatTests.cpp
@@ -59,11 +59,11 @@ class CompressedTextureBCFormatTest : public DawnTest {
            rowPitchInBytes = copyWidthInBlock *
                              utils::GetTexelBlockSizeInBytes(copyConfig.textureDescriptor.format);
        }
-        uint32_t copyRowsPerImageInBlock = copyConfig.rowsPerImage / kBCBlockHeightInTexels;
-        if (copyRowsPerImageInBlock == 0) {
-            copyRowsPerImageInBlock = copyHeightInBlock;
+        uint32_t copyRowsPerImage = copyConfig.rowsPerImage;
+        if (copyRowsPerImage == 0) {
+            copyRowsPerImage = copyHeightInBlock;
        }
-        uint32_t copyBytesPerImage = rowPitchInBytes * copyRowsPerImageInBlock;
+        uint32_t copyBytesPerImage = rowPitchInBytes * copyRowsPerImage;
        uint32_t uploadBufferSize =
            copyConfig.bufferOffset + copyBytesPerImage * copyConfig.copyExtent3D.depth;

@@ -960,7 +960,7 @@ TEST_P(CompressedTextureBCFormatTest, LargeImageHeight) {
    config.textureDescriptor.size = {8, 8, 1};
    config.copyExtent3D = config.textureDescriptor.size;

-    config.rowsPerImage = config.textureDescriptor.size.height * 2;
+    config.rowsPerImage = config.textureDescriptor.size.height * 2 / kBCBlockHeightInTexels;

    for (wgpu::TextureFormat format : kBCFormats) {
        config.textureDescriptor.format = format;
@@ -1003,7 +1003,7 @@ TEST_P(CompressedTextureBCFormatTest, LargeImageHeightAndClampedCopyExtent) {

    config.copyExtent3D = {kCopyWidthAtLevel, kCopyHeightAtLevel, 1};

-    config.rowsPerImage = kCopyHeightAtLevel * 2;
+    config.rowsPerImage = kCopyHeightAtLevel * 2 / kBCBlockHeightInTexels;

    for (wgpu::TextureFormat format : kBCFormats) {
        config.textureDescriptor.format = format;
@@ -1149,7 +1149,7 @@ TEST_P(CompressedTextureWriteTextureTest, Basic) {
    config.copyOrigin3D = {4, 8, 0};
    config.copyExtent3D = {12, 16, 1};
    config.bytesPerRowAlignment = 511;
-    config.rowsPerImage = 20;
+    config.rowsPerImage = 5;

    for (wgpu::TextureFormat format : kBCFormats) {
        config.textureDescriptor.format = format;
@@ -1170,7 +1170,7 @@ TEST_P(CompressedTextureWriteTextureTest, WriteMultiple2DArrayLayers) {
    config.copyOrigin3D = {4, 8, 3};
    config.copyExtent3D = {12, 16, 6};
    config.bytesPerRowAlignment = 511;
-    config.rowsPerImage = 20;
+    config.rowsPerImage = 5;

    for (wgpu::TextureFormat format : kBCFormats) {
        config.textureDescriptor.format = format;
--- a/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
+++ b/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
@@ -1833,27 +1833,31 @@ TEST_F(CopyCommandTest_CompressedTextureFormats, BytesPerRow) {
    }
 }

-// Tests to verify that rowsPerImage must be a multiple of the compressed texture block height in
-// buffer-to-texture or texture-to-buffer copies with compressed texture formats.
-TEST_F(CopyCommandTest_CompressedTextureFormats, ImageHeight) {
+// rowsPerImage must be >= heightInBlocks.
+TEST_F(CopyCommandTest_CompressedTextureFormats, RowsPerImage) {
    wgpu::Buffer buffer =
-        CreateBuffer(512, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst);
+        CreateBuffer(1024, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst);

    for (wgpu::TextureFormat bcFormat : utils::kBCFormats) {
        wgpu::Texture texture = Create2DTexture(bcFormat);

        // Valid usages of rowsPerImage in B2T and T2B copies with compressed texture formats.
        {
-            constexpr uint32_t kValidImageHeight = 8;
-            TestBothTBCopies(utils::Expectation::Success, buffer, 0, 256, kValidImageHeight,
-                             texture, 0, {0, 0, 0}, {4, 4, 1});
+            constexpr uint32_t kValidRowsPerImage = 5;
+            TestBothTBCopies(utils::Expectation::Success, buffer, 0, 256, kValidRowsPerImage,
+                             texture, 0, {0, 0, 0}, {4, 16, 1});
+        }
+        {
+            constexpr uint32_t kValidRowsPerImage = 4;
+            TestBothTBCopies(utils::Expectation::Success, buffer, 0, 256, kValidRowsPerImage,
+                             texture, 0, {0, 0, 0}, {4, 16, 1});
        }

-        // Failures on invalid rowsPerImage.
+        // rowsPerImage is smaller than height.
        {
-            constexpr uint32_t kInvalidImageHeight = 3;
-            TestBothTBCopies(utils::Expectation::Failure, buffer, 0, 256, kInvalidImageHeight,
-                             texture, 0, {0, 0, 0}, {4, 4, 1});
+            constexpr uint32_t kInvalidRowsPerImage = 3;
+            TestBothTBCopies(utils::Expectation::Failure, buffer, 0, 256, kInvalidRowsPerImage,
+                             texture, 0, {0, 0, 0}, {4, 20, 1});
        }
    }
 }
@@ -1996,31 +2000,15 @@ TEST_F(CopyCommandTest_CompressedTextureFormats, CopyToMultipleArrayLayers) {
            12, 16, 1, 20, bcFormat, wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc);

        // Copy to all array layers
-        TestBothTBCopiesExactBufferSize(256, 16, texture, bcFormat, {0, 0, 0}, {12, 16, 20});
+        TestBothTBCopiesExactBufferSize(256, 4, texture, bcFormat, {0, 0, 0}, {12, 16, 20});

        // Copy to the highest array layer
-        TestBothTBCopiesExactBufferSize(256, 16, texture, bcFormat, {0, 0, 19}, {12, 16, 1});
+        TestBothTBCopiesExactBufferSize(256, 4, texture, bcFormat, {0, 0, 19}, {12, 16, 1});

        // Copy to array layers in the middle
-        TestBothTBCopiesExactBufferSize(256, 16, texture, bcFormat, {0, 0, 1}, {12, 16, 18});
+        TestBothTBCopiesExactBufferSize(256, 4, texture, bcFormat, {0, 0, 1}, {12, 16, 18});

        // Copy touching the texture corners with a non-packed rowsPerImage
-        TestBothTBCopiesExactBufferSize(256, 24, texture, bcFormat, {4, 4, 4}, {8, 12, 16});
-
-        // rowsPerImage needs to be a multiple of blockHeight
-        {
-            wgpu::Buffer source =
-                CreateBuffer(8192, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst);
-            TestBothTBCopies(utils::Expectation::Failure, source, 0, 256, 6, texture, 0, {0, 0, 0},
-                             {4, 4, 1});
-        }
-
-        // rowsPerImage must be a multiple of blockHeight even with an empty copy
-        {
-            wgpu::Buffer source =
-                CreateBuffer(0, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst);
-            TestBothTBCopies(utils::Expectation::Failure, source, 0, 256, 2, texture, 0, {0, 0, 0},
-                             {0, 0, 0});
-        }
+        TestBothTBCopiesExactBufferSize(256, 6, texture, bcFormat, {4, 4, 4}, {8, 12, 16});
    }
 }
--- a/src/tests/unittests/validation/QueueWriteTextureValidationTests.cpp
+++ b/src/tests/unittests/validation/QueueWriteTextureValidationTests.cpp
@@ -619,28 +619,28 @@ namespace {
        }
    }

-    // Tests to verify that rowsPerImage must be a multiple of the compressed texture block height
+    // rowsPerImage must be >= heightInBlocks.
    TEST_F(WriteTextureTest_CompressedTextureFormats, RowsPerImage) {
        for (wgpu::TextureFormat bcFormat : utils::kBCFormats) {
            wgpu::Texture texture = Create2DTexture(bcFormat);

            // Valid usages of rowsPerImage in WriteTexture with compressed texture formats.
            {
-                constexpr uint32_t kValidRowsPerImage = 8;
-                TestWriteTexture(512, 0, 256, kValidRowsPerImage, texture, 0, {0, 0, 0}, {4, 4, 1});
+                constexpr uint32_t kValidRowsPerImage = 5;
+                TestWriteTexture(1024, 0, 256, kValidRowsPerImage, texture, 0, {0, 0, 0},
+                                 {4, 16, 1});
            }
-
-            // 4 is the exact limit for rowsPerImage here.
            {
                constexpr uint32_t kValidRowsPerImage = 4;
-                TestWriteTexture(512, 0, 256, kValidRowsPerImage, texture, 0, {0, 0, 0}, {4, 4, 1});
+                TestWriteTexture(1024, 0, 256, kValidRowsPerImage, texture, 0, {0, 0, 0},
+                                 {4, 16, 1});
            }

            // Failure on invalid rowsPerImage.
            {
-                constexpr uint32_t kInvalidRowsPerImage = 2;
-                ASSERT_DEVICE_ERROR(TestWriteTexture(512, 0, 256, kInvalidRowsPerImage, texture, 0,
-                                                     {0, 0, 0}, {4, 4, 1}));
+                constexpr uint32_t kInvalidRowsPerImage = 3;
+                ASSERT_DEVICE_ERROR(TestWriteTexture(1024, 0, 256, kInvalidRowsPerImage, texture, 0,
+                                                     {0, 0, 0}, {4, 16, 1}));
            }
        }
    }
@@ -731,23 +731,16 @@ namespace {
                wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc);

            // Write to all array layers
-            TestWriteTextureExactDataSize(256, 16, texture, bcFormat, {0, 0, 0}, {12, 16, 20});
+            TestWriteTextureExactDataSize(256, 4, texture, bcFormat, {0, 0, 0}, {12, 16, 20});

            // Write to the highest array layer
-            TestWriteTextureExactDataSize(256, 16, texture, bcFormat, {0, 0, 19}, {12, 16, 1});
+            TestWriteTextureExactDataSize(256, 4, texture, bcFormat, {0, 0, 19}, {12, 16, 1});

            // Write to array layers in the middle
-            TestWriteTextureExactDataSize(256, 16, texture, bcFormat, {0, 0, 1}, {12, 16, 18});
+            TestWriteTextureExactDataSize(256, 4, texture, bcFormat, {0, 0, 1}, {12, 16, 18});

            // Write touching the texture corners with a non-packed rowsPerImage
-            TestWriteTextureExactDataSize(256, 24, texture, bcFormat, {4, 4, 4}, {8, 12, 16});
-
-            // rowsPerImage needs to be a multiple of blockHeight
-            ASSERT_DEVICE_ERROR(
-                TestWriteTexture(8192, 0, 256, 6, texture, 0, {0, 0, 0}, {4, 4, 1}));
-
-            // rowsPerImage must be a multiple of blockHeight even with an empty write
-            ASSERT_DEVICE_ERROR(TestWriteTexture(0, 0, 256, 2, texture, 0, {0, 0, 0}, {0, 0, 0}));
+            TestWriteTextureExactDataSize(256, 6, texture, bcFormat, {4, 4, 4}, {8, 12, 16});
        }
    }

--- a/src/utils/TestUtils.cpp
+++ b/src/utils/TestUtils.cpp
@@ -79,8 +79,7 @@ namespace utils {
            uint32_t blockWidth = utils::GetTextureFormatBlockWidth(textureFormat);
            uint32_t blockHeight = utils::GetTextureFormatBlockHeight(textureFormat);

-            uint64_t texelBlockRowsPerImage = rowsPerImage / blockHeight;
-            uint64_t bytesPerImage = bytesPerRow * texelBlockRowsPerImage;
+            uint64_t bytesPerImage = bytesPerRow * rowsPerImage;
            uint64_t bytesInLastSlice = bytesPerRow * (copyExtent.height / blockHeight - 1) +
                                        (copyExtent.width / blockWidth * blockSize);
            return bytesPerImage * (copyExtent.depth - 1) + bytesInLastSlice;