From 2c8e1f2f11fa2b294337285cadc835f3f3125d8e Mon Sep 17 00:00:00 2001 From: Tomek Ponitka Date: Wed, 29 Jul 2020 18:37:51 +0000 Subject: [PATCH] Optimizing memcpy inside Queue::WriteTexture Fixing an earlier TODO. Bug: dawn:483 Change-Id: I19d961512caea1f9d502d7e58b56cd3846c9f7f3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25983 Reviewed-by: Corentin Wallez Commit-Queue: Tomek Ponitka --- src/dawn_native/Queue.cpp | 34 ++++++++++++++++++++++++++++++ src/dawn_native/Queue.h | 10 +++++++++ src/dawn_native/metal/DeviceMTL.mm | 1 - src/dawn_native/metal/QueueMTL.mm | 14 ++++-------- src/dawn_native/vulkan/QueueVk.cpp | 14 ++++-------- 5 files changed, 52 insertions(+), 21 deletions(-) diff --git a/src/dawn_native/Queue.cpp b/src/dawn_native/Queue.cpp index efc716701b..372885bd92 100644 --- a/src/dawn_native/Queue.cpp +++ b/src/dawn_native/Queue.cpp @@ -281,4 +281,38 @@ namespace dawn_native { return {}; } + void CopyTextureData(uint8_t* dstPointer, + const uint8_t* srcPointer, + uint32_t depth, + uint32_t rowsPerImageInBlock, + uint64_t imageAdditionalStride, + uint32_t actualBytesPerRow, + uint32_t dstBytesPerRow, + uint32_t srcBytesPerRow) { + bool copyWholeLayer = + actualBytesPerRow == dstBytesPerRow && dstBytesPerRow == srcBytesPerRow; + bool copyWholeData = copyWholeLayer && imageAdditionalStride == 0; + + if (!copyWholeLayer) { // copy row by row + for (uint32_t d = 0; d < depth; ++d) { + for (uint32_t h = 0; h < rowsPerImageInBlock; ++h) { + memcpy(dstPointer, srcPointer, actualBytesPerRow); + dstPointer += dstBytesPerRow; + srcPointer += srcBytesPerRow; + } + srcPointer += imageAdditionalStride; + } + } else { + uint64_t layerSize = rowsPerImageInBlock * actualBytesPerRow; + if (!copyWholeData) { // copy layer by layer + for (uint32_t d = 0; d < depth; ++d) { + memcpy(dstPointer, srcPointer, layerSize); + dstPointer += layerSize; + srcPointer += layerSize + imageAdditionalStride; + } + } else { // do a single copy + memcpy(dstPointer, srcPointer, layerSize * depth); + } + } + } } // namespace dawn_native diff --git a/src/dawn_native/Queue.h b/src/dawn_native/Queue.h index 694d53b845..aad5175334 100644 --- a/src/dawn_native/Queue.h +++ b/src/dawn_native/Queue.h @@ -76,6 +76,16 @@ namespace dawn_native { const Extent3D* writeSize) const; }; + // A helper function used in Queue::WriteTexture. The destination data layout must not + // contain any additional rows per image. + void CopyTextureData(uint8_t* dstPointer, + const uint8_t* srcPointer, + uint32_t depth, + uint32_t rowsPerImageInBlock, + uint64_t imageAdditionalStride, + uint32_t actualBytesPerRow, + uint32_t dstBytesPerRow, + uint32_t srcBytesPerRow); } // namespace dawn_native #endif // DAWNNATIVE_QUEUE_H_ diff --git a/src/dawn_native/metal/DeviceMTL.mm b/src/dawn_native/metal/DeviceMTL.mm index d5dbbc5ea5..13cb7dfd08 100644 --- a/src/dawn_native/metal/DeviceMTL.mm +++ b/src/dawn_native/metal/DeviceMTL.mm @@ -282,7 +282,6 @@ namespace dawn_native { namespace metal { ASSERT(dataLayout.rowsPerImage == (copySize.height)); ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize); - // TODO(tommek@google.com): Add tests for this in TextureZeroInitTests. EnsureDestinationTextureInitialized(texture, *dst, copySize); // Metal validation layer requires that if the texture's pixel format is a compressed diff --git a/src/dawn_native/metal/QueueMTL.mm b/src/dawn_native/metal/QueueMTL.mm index c0245b1b63..016e0cd69a 100644 --- a/src/dawn_native/metal/QueueMTL.mm +++ b/src/dawn_native/metal/QueueMTL.mm @@ -43,8 +43,6 @@ namespace dawn_native { namespace metal { newDataSize, device->GetPendingCommandSerial())); ASSERT(uploadHandle.mappedBuffer != nullptr); - // TODO(tommek@google.com): Add an optimization to do a single memcpy if the data - // is already correctly packed. uint8_t* dstPointer = static_cast(uploadHandle.mappedBuffer); const uint8_t* srcPointer = static_cast(data); srcPointer += dataLayout->offset; @@ -58,14 +56,10 @@ namespace dawn_native { namespace metal { ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock); uint64_t imageAdditionalStride = dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock); - for (uint32_t d = 0; d < writeSize->depth; ++d) { - for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) { - memcpy(dstPointer, srcPointer, alignedBytesPerRow); - dstPointer += alignedBytesPerRow; - srcPointer += dataLayout->bytesPerRow; - } - srcPointer += imageAdditionalStride; - } + + CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock, + imageAdditionalStride, alignedBytesPerRow, alignedBytesPerRow, + dataLayout->bytesPerRow); return uploadHandle; } diff --git a/src/dawn_native/vulkan/QueueVk.cpp b/src/dawn_native/vulkan/QueueVk.cpp index 99a05909a8..b19b2368c4 100644 --- a/src/dawn_native/vulkan/QueueVk.cpp +++ b/src/dawn_native/vulkan/QueueVk.cpp @@ -52,8 +52,6 @@ namespace dawn_native { namespace vulkan { device->GetPendingCommandSerial())); ASSERT(uploadHandle.mappedBuffer != nullptr); - // TODO(tommek@google.com): Add an optimization to do a single memcpy if the data - // is already correctly packed. uint8_t* dstPointer = static_cast(uploadHandle.mappedBuffer); const uint8_t* srcPointer = static_cast(data); srcPointer += dataLayout->offset; @@ -72,14 +70,10 @@ namespace dawn_native { namespace vulkan { ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock); uint64_t imageAdditionalStride = dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock); - for (uint32_t d = 0; d < writeSize->depth; ++d) { - for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) { - memcpy(dstPointer, srcPointer, alignedBytesPerRow); - dstPointer += optimallyAlignedBytesPerRow; - srcPointer += dataLayout->bytesPerRow; - } - srcPointer += imageAdditionalStride; - } + + CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock, + imageAdditionalStride, alignedBytesPerRow, optimallyAlignedBytesPerRow, + dataLayout->bytesPerRow); return uploadHandle; }