Optimizing memcpy inside Queue::WriteTexture
Fixing an earlier TODO. Bug: dawn:483 Change-Id: I19d961512caea1f9d502d7e58b56cd3846c9f7f3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25983 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Tomek Ponitka <tommek@google.com>
This commit is contained in:
parent
f93fa6acd9
commit
2c8e1f2f11
|
@ -281,4 +281,38 @@ namespace dawn_native {
|
|||
return {};
|
||||
}
|
||||
|
||||
void CopyTextureData(uint8_t* dstPointer,
|
||||
const uint8_t* srcPointer,
|
||||
uint32_t depth,
|
||||
uint32_t rowsPerImageInBlock,
|
||||
uint64_t imageAdditionalStride,
|
||||
uint32_t actualBytesPerRow,
|
||||
uint32_t dstBytesPerRow,
|
||||
uint32_t srcBytesPerRow) {
|
||||
bool copyWholeLayer =
|
||||
actualBytesPerRow == dstBytesPerRow && dstBytesPerRow == srcBytesPerRow;
|
||||
bool copyWholeData = copyWholeLayer && imageAdditionalStride == 0;
|
||||
|
||||
if (!copyWholeLayer) { // copy row by row
|
||||
for (uint32_t d = 0; d < depth; ++d) {
|
||||
for (uint32_t h = 0; h < rowsPerImageInBlock; ++h) {
|
||||
memcpy(dstPointer, srcPointer, actualBytesPerRow);
|
||||
dstPointer += dstBytesPerRow;
|
||||
srcPointer += srcBytesPerRow;
|
||||
}
|
||||
srcPointer += imageAdditionalStride;
|
||||
}
|
||||
} else {
|
||||
uint64_t layerSize = rowsPerImageInBlock * actualBytesPerRow;
|
||||
if (!copyWholeData) { // copy layer by layer
|
||||
for (uint32_t d = 0; d < depth; ++d) {
|
||||
memcpy(dstPointer, srcPointer, layerSize);
|
||||
dstPointer += layerSize;
|
||||
srcPointer += layerSize + imageAdditionalStride;
|
||||
}
|
||||
} else { // do a single copy
|
||||
memcpy(dstPointer, srcPointer, layerSize * depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace dawn_native
|
||||
|
|
|
@ -76,6 +76,16 @@ namespace dawn_native {
|
|||
const Extent3D* writeSize) const;
|
||||
};
|
||||
|
||||
// A helper function used in Queue::WriteTexture. The destination data layout must not
|
||||
// contain any additional rows per image.
|
||||
void CopyTextureData(uint8_t* dstPointer,
|
||||
const uint8_t* srcPointer,
|
||||
uint32_t depth,
|
||||
uint32_t rowsPerImageInBlock,
|
||||
uint64_t imageAdditionalStride,
|
||||
uint32_t actualBytesPerRow,
|
||||
uint32_t dstBytesPerRow,
|
||||
uint32_t srcBytesPerRow);
|
||||
} // namespace dawn_native
|
||||
|
||||
#endif // DAWNNATIVE_QUEUE_H_
|
||||
|
|
|
@ -282,7 +282,6 @@ namespace dawn_native { namespace metal {
|
|||
ASSERT(dataLayout.rowsPerImage == (copySize.height));
|
||||
ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize);
|
||||
|
||||
// TODO(tommek@google.com): Add tests for this in TextureZeroInitTests.
|
||||
EnsureDestinationTextureInitialized(texture, *dst, copySize);
|
||||
|
||||
// Metal validation layer requires that if the texture's pixel format is a compressed
|
||||
|
|
|
@ -43,8 +43,6 @@ namespace dawn_native { namespace metal {
|
|||
newDataSize, device->GetPendingCommandSerial()));
|
||||
ASSERT(uploadHandle.mappedBuffer != nullptr);
|
||||
|
||||
// TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
|
||||
// is already correctly packed.
|
||||
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
|
||||
const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
|
||||
srcPointer += dataLayout->offset;
|
||||
|
@ -58,14 +56,10 @@ namespace dawn_native { namespace metal {
|
|||
ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
|
||||
uint64_t imageAdditionalStride =
|
||||
dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
|
||||
for (uint32_t d = 0; d < writeSize->depth; ++d) {
|
||||
for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) {
|
||||
memcpy(dstPointer, srcPointer, alignedBytesPerRow);
|
||||
dstPointer += alignedBytesPerRow;
|
||||
srcPointer += dataLayout->bytesPerRow;
|
||||
}
|
||||
srcPointer += imageAdditionalStride;
|
||||
}
|
||||
|
||||
CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock,
|
||||
imageAdditionalStride, alignedBytesPerRow, alignedBytesPerRow,
|
||||
dataLayout->bytesPerRow);
|
||||
|
||||
return uploadHandle;
|
||||
}
|
||||
|
|
|
@ -52,8 +52,6 @@ namespace dawn_native { namespace vulkan {
|
|||
device->GetPendingCommandSerial()));
|
||||
ASSERT(uploadHandle.mappedBuffer != nullptr);
|
||||
|
||||
// TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
|
||||
// is already correctly packed.
|
||||
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
|
||||
const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
|
||||
srcPointer += dataLayout->offset;
|
||||
|
@ -72,14 +70,10 @@ namespace dawn_native { namespace vulkan {
|
|||
ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
|
||||
uint64_t imageAdditionalStride =
|
||||
dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
|
||||
for (uint32_t d = 0; d < writeSize->depth; ++d) {
|
||||
for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) {
|
||||
memcpy(dstPointer, srcPointer, alignedBytesPerRow);
|
||||
dstPointer += optimallyAlignedBytesPerRow;
|
||||
srcPointer += dataLayout->bytesPerRow;
|
||||
}
|
||||
srcPointer += imageAdditionalStride;
|
||||
}
|
||||
|
||||
CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock,
|
||||
imageAdditionalStride, alignedBytesPerRow, optimallyAlignedBytesPerRow,
|
||||
dataLayout->bytesPerRow);
|
||||
|
||||
return uploadHandle;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue