Optimizing memcpy inside Queue::WriteTexture
Fixing an earlier TODO. Bug: dawn:483 Change-Id: I19d961512caea1f9d502d7e58b56cd3846c9f7f3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25983 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Tomek Ponitka <tommek@google.com>
This commit is contained in:
parent
f93fa6acd9
commit
2c8e1f2f11
|
@ -281,4 +281,38 @@ namespace dawn_native {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CopyTextureData(uint8_t* dstPointer,
|
||||||
|
const uint8_t* srcPointer,
|
||||||
|
uint32_t depth,
|
||||||
|
uint32_t rowsPerImageInBlock,
|
||||||
|
uint64_t imageAdditionalStride,
|
||||||
|
uint32_t actualBytesPerRow,
|
||||||
|
uint32_t dstBytesPerRow,
|
||||||
|
uint32_t srcBytesPerRow) {
|
||||||
|
bool copyWholeLayer =
|
||||||
|
actualBytesPerRow == dstBytesPerRow && dstBytesPerRow == srcBytesPerRow;
|
||||||
|
bool copyWholeData = copyWholeLayer && imageAdditionalStride == 0;
|
||||||
|
|
||||||
|
if (!copyWholeLayer) { // copy row by row
|
||||||
|
for (uint32_t d = 0; d < depth; ++d) {
|
||||||
|
for (uint32_t h = 0; h < rowsPerImageInBlock; ++h) {
|
||||||
|
memcpy(dstPointer, srcPointer, actualBytesPerRow);
|
||||||
|
dstPointer += dstBytesPerRow;
|
||||||
|
srcPointer += srcBytesPerRow;
|
||||||
|
}
|
||||||
|
srcPointer += imageAdditionalStride;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint64_t layerSize = rowsPerImageInBlock * actualBytesPerRow;
|
||||||
|
if (!copyWholeData) { // copy layer by layer
|
||||||
|
for (uint32_t d = 0; d < depth; ++d) {
|
||||||
|
memcpy(dstPointer, srcPointer, layerSize);
|
||||||
|
dstPointer += layerSize;
|
||||||
|
srcPointer += layerSize + imageAdditionalStride;
|
||||||
|
}
|
||||||
|
} else { // do a single copy
|
||||||
|
memcpy(dstPointer, srcPointer, layerSize * depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace dawn_native
|
} // namespace dawn_native
|
||||||
|
|
|
@ -76,6 +76,16 @@ namespace dawn_native {
|
||||||
const Extent3D* writeSize) const;
|
const Extent3D* writeSize) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A helper function used in Queue::WriteTexture. The destination data layout must not
|
||||||
|
// contain any additional rows per image.
|
||||||
|
void CopyTextureData(uint8_t* dstPointer,
|
||||||
|
const uint8_t* srcPointer,
|
||||||
|
uint32_t depth,
|
||||||
|
uint32_t rowsPerImageInBlock,
|
||||||
|
uint64_t imageAdditionalStride,
|
||||||
|
uint32_t actualBytesPerRow,
|
||||||
|
uint32_t dstBytesPerRow,
|
||||||
|
uint32_t srcBytesPerRow);
|
||||||
} // namespace dawn_native
|
} // namespace dawn_native
|
||||||
|
|
||||||
#endif // DAWNNATIVE_QUEUE_H_
|
#endif // DAWNNATIVE_QUEUE_H_
|
||||||
|
|
|
@ -282,7 +282,6 @@ namespace dawn_native { namespace metal {
|
||||||
ASSERT(dataLayout.rowsPerImage == (copySize.height));
|
ASSERT(dataLayout.rowsPerImage == (copySize.height));
|
||||||
ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize);
|
ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize);
|
||||||
|
|
||||||
// TODO(tommek@google.com): Add tests for this in TextureZeroInitTests.
|
|
||||||
EnsureDestinationTextureInitialized(texture, *dst, copySize);
|
EnsureDestinationTextureInitialized(texture, *dst, copySize);
|
||||||
|
|
||||||
// Metal validation layer requires that if the texture's pixel format is a compressed
|
// Metal validation layer requires that if the texture's pixel format is a compressed
|
||||||
|
|
|
@ -43,8 +43,6 @@ namespace dawn_native { namespace metal {
|
||||||
newDataSize, device->GetPendingCommandSerial()));
|
newDataSize, device->GetPendingCommandSerial()));
|
||||||
ASSERT(uploadHandle.mappedBuffer != nullptr);
|
ASSERT(uploadHandle.mappedBuffer != nullptr);
|
||||||
|
|
||||||
// TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
|
|
||||||
// is already correctly packed.
|
|
||||||
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
|
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
|
||||||
const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
|
const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
|
||||||
srcPointer += dataLayout->offset;
|
srcPointer += dataLayout->offset;
|
||||||
|
@ -58,14 +56,10 @@ namespace dawn_native { namespace metal {
|
||||||
ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
|
ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
|
||||||
uint64_t imageAdditionalStride =
|
uint64_t imageAdditionalStride =
|
||||||
dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
|
dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
|
||||||
for (uint32_t d = 0; d < writeSize->depth; ++d) {
|
|
||||||
for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) {
|
CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock,
|
||||||
memcpy(dstPointer, srcPointer, alignedBytesPerRow);
|
imageAdditionalStride, alignedBytesPerRow, alignedBytesPerRow,
|
||||||
dstPointer += alignedBytesPerRow;
|
dataLayout->bytesPerRow);
|
||||||
srcPointer += dataLayout->bytesPerRow;
|
|
||||||
}
|
|
||||||
srcPointer += imageAdditionalStride;
|
|
||||||
}
|
|
||||||
|
|
||||||
return uploadHandle;
|
return uploadHandle;
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,8 +52,6 @@ namespace dawn_native { namespace vulkan {
|
||||||
device->GetPendingCommandSerial()));
|
device->GetPendingCommandSerial()));
|
||||||
ASSERT(uploadHandle.mappedBuffer != nullptr);
|
ASSERT(uploadHandle.mappedBuffer != nullptr);
|
||||||
|
|
||||||
// TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
|
|
||||||
// is already correctly packed.
|
|
||||||
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
|
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
|
||||||
const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
|
const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
|
||||||
srcPointer += dataLayout->offset;
|
srcPointer += dataLayout->offset;
|
||||||
|
@ -72,14 +70,10 @@ namespace dawn_native { namespace vulkan {
|
||||||
ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
|
ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
|
||||||
uint64_t imageAdditionalStride =
|
uint64_t imageAdditionalStride =
|
||||||
dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
|
dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
|
||||||
for (uint32_t d = 0; d < writeSize->depth; ++d) {
|
|
||||||
for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) {
|
CopyTextureData(dstPointer, srcPointer, writeSize->depth, alignedRowsPerImageInBlock,
|
||||||
memcpy(dstPointer, srcPointer, alignedBytesPerRow);
|
imageAdditionalStride, alignedBytesPerRow, optimallyAlignedBytesPerRow,
|
||||||
dstPointer += optimallyAlignedBytesPerRow;
|
dataLayout->bytesPerRow);
|
||||||
srcPointer += dataLayout->bytesPerRow;
|
|
||||||
}
|
|
||||||
srcPointer += imageAdditionalStride;
|
|
||||||
}
|
|
||||||
|
|
||||||
return uploadHandle;
|
return uploadHandle;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue