Optimizing memcpy inside Queue::WriteTexture

Fixing an earlier TODO. Bug: dawn:483 Change-Id: I19d961512caea1f9d502d7e58b56cd3846c9f7f3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25983 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Tomek Ponitka <tommek@google.com>
2025-07-23 13:35:53 +00:00 · 2020-07-29 18:37:51 +00:00 · 2020-07-29 18:37:51 +00:00 · 2c8e1f2f11
commit 2c8e1f2f11
parent f93fa6acd9
5 changed files with 52 additions and 21 deletions
--- a/src/dawn_native/Queue.cpp
+++ b/src/dawn_native/Queue.cpp
@ -281,4 +281,38 @@ namespace dawn_native {
        return {};
    }

+    void CopyTextureData(uint8_t* dstPointer,
+                         const uint8_t* srcPointer,
+                         uint32_t depth,
+                         uint32_t rowsPerImageInBlock,
+                         uint64_t imageAdditionalStride,
+                         uint32_t actualBytesPerRow,
+                         uint32_t dstBytesPerRow,
+                         uint32_t srcBytesPerRow) {
+        bool copyWholeLayer =
+            actualBytesPerRow == dstBytesPerRow && dstBytesPerRow == srcBytesPerRow;
+        bool copyWholeData = copyWholeLayer && imageAdditionalStride == 0;
+
+        if (!copyWholeLayer) {  // copy row by row
+            for (uint32_t d = 0; d < depth; ++d) {
+                for (uint32_t h = 0; h < rowsPerImageInBlock; ++h) {
+                    memcpy(dstPointer, srcPointer, actualBytesPerRow);
+                    dstPointer += dstBytesPerRow;
+                    srcPointer += srcBytesPerRow;
+                }
+                srcPointer += imageAdditionalStride;
+            }
+        } else {
+            uint64_t layerSize = rowsPerImageInBlock * actualBytesPerRow;
+            if (!copyWholeData) {  // copy layer by layer
+                for (uint32_t d = 0; d < depth; ++d) {
+                    memcpy(dstPointer, srcPointer, layerSize);
+                    dstPointer += layerSize;
+                    srcPointer += layerSize + imageAdditionalStride;
+                }
+            } else {  // do a single copy
+                memcpy(dstPointer, srcPointer, layerSize * depth);
+            }
+        }
+    }
 }  // namespace dawn_native
--- a/src/dawn_native/Queue.h
+++ b/src/dawn_native/Queue.h
@ -76,6 +76,16 @@ namespace dawn_native {
                                        const Extent3D* writeSize) const;
    };

+    // A helper function used in Queue::WriteTexture. The destination data layout must not
+    // contain any additional rows per image.
+    void CopyTextureData(uint8_t* dstPointer,
+                         const uint8_t* srcPointer,
+                         uint32_t depth,
+                         uint32_t rowsPerImageInBlock,
+                         uint64_t imageAdditionalStride,
+                         uint32_t actualBytesPerRow,
+                         uint32_t dstBytesPerRow,
+                         uint32_t srcBytesPerRow);
 }  // namespace dawn_native

 #endif  // DAWNNATIVE_QUEUE_H_
--- a/src/dawn_native/metal/DeviceMTL.mm
+++ b/src/dawn_native/metal/DeviceMTL.mm
@ -282,7 +282,6 @@ namespace dawn_native { namespace metal {
        ASSERT(dataLayout.rowsPerImage == (copySize.height));
        ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize);

-        // TODO(tommek@google.com): Add tests for this in TextureZeroInitTests.
        EnsureDestinationTextureInitialized(texture, *dst, copySize);

        // Metal validation layer requires that if the texture's pixel format is a compressed
--- a/src/dawn_native/metal/QueueMTL.mm
+++ b/src/dawn_native/metal/QueueMTL.mm
@ -43,8 +43,6 @@ namespace dawn_native { namespace metal {
                                              newDataSize, device->GetPendingCommandSerial()));
            ASSERT(uploadHandle.mappedBuffer != nullptr);

-            // TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
-            // is already correctly packed.
            uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
            const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
            srcPointer += dataLayout->offset;
@ -58,14 +56,10 @@ namespace dawn_native { namespace metal {
            ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
            uint64_t imageAdditionalStride =
                dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
-            for (uint32_t d = 0; d < writeSize->depth; ++d) {
-                for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) {
-                    memcpy(dstPointer, srcPointer, alignedBytesPerRow);
-                    dstPointer += alignedBytesPerRow;
-                    srcPointer += dataLayout->bytesPerRow;
-                }
-                srcPointer += imageAdditionalStride;
-            }
+
+            CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock,
+                            imageAdditionalStride, alignedBytesPerRow, alignedBytesPerRow,
+                            dataLayout->bytesPerRow);

            return uploadHandle;
        }
--- a/src/dawn_native/vulkan/QueueVk.cpp
+++ b/src/dawn_native/vulkan/QueueVk.cpp
@ -52,8 +52,6 @@ namespace dawn_native { namespace vulkan {
                                              device->GetPendingCommandSerial()));
            ASSERT(uploadHandle.mappedBuffer != nullptr);

-            // TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
-            // is already correctly packed.
            uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
            const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
            srcPointer += dataLayout->offset;
@ -72,14 +70,10 @@ namespace dawn_native { namespace vulkan {
            ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
            uint64_t imageAdditionalStride =
                dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
-            for (uint32_t d = 0; d < writeSize->depth; ++d) {
-                for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) {
-                    memcpy(dstPointer, srcPointer, alignedBytesPerRow);
-                    dstPointer += optimallyAlignedBytesPerRow;
-                    srcPointer += dataLayout->bytesPerRow;
-                }
-                srcPointer += imageAdditionalStride;
-            }
+
+            CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock,
+                            imageAdditionalStride, alignedBytesPerRow, optimallyAlignedBytesPerRow,
+                            dataLayout->bytesPerRow);

            return uploadHandle;
        }