Optimizing memcpy inside Queue::WriteTexture

Fixing an earlier TODO.

Bug: dawn:483
Change-Id: I19d961512caea1f9d502d7e58b56cd3846c9f7f3
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25983
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Tomek Ponitka <tommek@google.com>
This commit is contained in:
Tomek Ponitka 2020-07-29 18:37:51 +00:00 committed by Commit Bot service account
parent f93fa6acd9
commit 2c8e1f2f11
5 changed files with 52 additions and 21 deletions

View File

@ -281,4 +281,38 @@ namespace dawn_native {
return {}; return {};
} }
void CopyTextureData(uint8_t* dstPointer,
const uint8_t* srcPointer,
uint32_t depth,
uint32_t rowsPerImageInBlock,
uint64_t imageAdditionalStride,
uint32_t actualBytesPerRow,
uint32_t dstBytesPerRow,
uint32_t srcBytesPerRow) {
bool copyWholeLayer =
actualBytesPerRow == dstBytesPerRow && dstBytesPerRow == srcBytesPerRow;
bool copyWholeData = copyWholeLayer && imageAdditionalStride == 0;
if (!copyWholeLayer) { // copy row by row
for (uint32_t d = 0; d < depth; ++d) {
for (uint32_t h = 0; h < rowsPerImageInBlock; ++h) {
memcpy(dstPointer, srcPointer, actualBytesPerRow);
dstPointer += dstBytesPerRow;
srcPointer += srcBytesPerRow;
}
srcPointer += imageAdditionalStride;
}
} else {
uint64_t layerSize = rowsPerImageInBlock * actualBytesPerRow;
if (!copyWholeData) { // copy layer by layer
for (uint32_t d = 0; d < depth; ++d) {
memcpy(dstPointer, srcPointer, layerSize);
dstPointer += layerSize;
srcPointer += layerSize + imageAdditionalStride;
}
} else { // do a single copy
memcpy(dstPointer, srcPointer, layerSize * depth);
}
}
}
} // namespace dawn_native } // namespace dawn_native

View File

@ -76,6 +76,16 @@ namespace dawn_native {
const Extent3D* writeSize) const; const Extent3D* writeSize) const;
}; };
// A helper function used in Queue::WriteTexture. The destination data layout must not
// contain any additional rows per image.
void CopyTextureData(uint8_t* dstPointer,
const uint8_t* srcPointer,
uint32_t depth,
uint32_t rowsPerImageInBlock,
uint64_t imageAdditionalStride,
uint32_t actualBytesPerRow,
uint32_t dstBytesPerRow,
uint32_t srcBytesPerRow);
} // namespace dawn_native } // namespace dawn_native
#endif // DAWNNATIVE_QUEUE_H_ #endif // DAWNNATIVE_QUEUE_H_

View File

@ -282,7 +282,6 @@ namespace dawn_native { namespace metal {
ASSERT(dataLayout.rowsPerImage == (copySize.height)); ASSERT(dataLayout.rowsPerImage == (copySize.height));
ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize); ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize);
// TODO(tommek@google.com): Add tests for this in TextureZeroInitTests.
EnsureDestinationTextureInitialized(texture, *dst, copySize); EnsureDestinationTextureInitialized(texture, *dst, copySize);
// Metal validation layer requires that if the texture's pixel format is a compressed // Metal validation layer requires that if the texture's pixel format is a compressed

View File

@ -43,8 +43,6 @@ namespace dawn_native { namespace metal {
newDataSize, device->GetPendingCommandSerial())); newDataSize, device->GetPendingCommandSerial()));
ASSERT(uploadHandle.mappedBuffer != nullptr); ASSERT(uploadHandle.mappedBuffer != nullptr);
// TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
// is already correctly packed.
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer); uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
const uint8_t* srcPointer = static_cast<const uint8_t*>(data); const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
srcPointer += dataLayout->offset; srcPointer += dataLayout->offset;
@ -58,14 +56,10 @@ namespace dawn_native { namespace metal {
ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock); ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
uint64_t imageAdditionalStride = uint64_t imageAdditionalStride =
dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock); dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
for (uint32_t d = 0; d < writeSize->depth; ++d) {
for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) { CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock,
memcpy(dstPointer, srcPointer, alignedBytesPerRow); imageAdditionalStride, alignedBytesPerRow, alignedBytesPerRow,
dstPointer += alignedBytesPerRow; dataLayout->bytesPerRow);
srcPointer += dataLayout->bytesPerRow;
}
srcPointer += imageAdditionalStride;
}
return uploadHandle; return uploadHandle;
} }

View File

@ -52,8 +52,6 @@ namespace dawn_native { namespace vulkan {
device->GetPendingCommandSerial())); device->GetPendingCommandSerial()));
ASSERT(uploadHandle.mappedBuffer != nullptr); ASSERT(uploadHandle.mappedBuffer != nullptr);
// TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
// is already correctly packed.
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer); uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
const uint8_t* srcPointer = static_cast<const uint8_t*>(data); const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
srcPointer += dataLayout->offset; srcPointer += dataLayout->offset;
@ -72,14 +70,10 @@ namespace dawn_native { namespace vulkan {
ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock); ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
uint64_t imageAdditionalStride = uint64_t imageAdditionalStride =
dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock); dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
for (uint32_t d = 0; d < writeSize->depth; ++d) {
for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) { CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock,
memcpy(dstPointer, srcPointer, alignedBytesPerRow); imageAdditionalStride, alignedBytesPerRow, optimallyAlignedBytesPerRow,
dstPointer += optimallyAlignedBytesPerRow; dataLayout->bytesPerRow);
srcPointer += dataLayout->bytesPerRow;
}
srcPointer += imageAdditionalStride;
}
return uploadHandle; return uploadHandle;
} }