Optimize B2T and T2B copies with multiple array layers on Metal

This patch moves the computation of TextureBufferCopySplit outside of
the loop over each copy texture array layer so that we only need to do
the computation once for each B2T and T2B copy command.

BUG=dawn:453
TEST=dawn_end2end_tests

Change-Id: I1fcfc972504f845467af92a77bc37870ed7b52a7
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/23720
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Jiawei Shao 2020-06-25 00:00:18 +00:00 committed by Commit Bot service account
parent 10e4a44ce6
commit 2a6cc4f8fb
1 changed files with 69 additions and 64 deletions

View File

@ -316,8 +316,8 @@ namespace dawn_native { namespace metal {
NSUInteger bufferOffset; NSUInteger bufferOffset;
NSUInteger bytesPerRow; NSUInteger bytesPerRow;
NSUInteger bytesPerImage; NSUInteger bytesPerImage;
MTLOrigin textureOrigin; Origin3D textureOrigin;
MTLSize copyExtent; Extent3D copyExtent;
}; };
uint32_t count = 0; uint32_t count = 0;
@ -328,7 +328,8 @@ namespace dawn_native { namespace metal {
return MTLOriginMake(origin.x, origin.y, origin.z); return MTLOriginMake(origin.x, origin.y, origin.z);
} }
TextureBufferCopySplit ComputeTextureBufferCopySplit(Origin3D origin, TextureBufferCopySplit ComputeTextureBufferCopySplit(wgpu::TextureDimension dimension,
Origin3D origin,
Extent3D copyExtent, Extent3D copyExtent,
Format textureFormat, Format textureFormat,
Extent3D virtualSizeAtLevel, Extent3D virtualSizeAtLevel,
@ -371,6 +372,8 @@ namespace dawn_native { namespace metal {
? (virtualSizeAtLevel.height - origin.y) ? (virtualSizeAtLevel.height - origin.y)
: copyExtent.height; : copyExtent.height;
ASSERT(dimension == wgpu::TextureDimension::e2D);
// Check whether buffer size is big enough. // Check whether buffer size is big enough.
bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth; bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth;
if (!needWorkaround) { if (!needWorkaround) {
@ -378,9 +381,9 @@ namespace dawn_native { namespace metal {
copy.copies[0].bufferOffset = bufferOffset; copy.copies[0].bufferOffset = bufferOffset;
copy.copies[0].bytesPerRow = bytesPerRow; copy.copies[0].bytesPerRow = bytesPerRow;
copy.copies[0].bytesPerImage = bytesPerImage; copy.copies[0].bytesPerImage = bytesPerImage;
copy.copies[0].textureOrigin = MakeMTLOrigin(origin); copy.copies[0].textureOrigin = origin;
copy.copies[0].copyExtent = copy.copies[0].copyExtent = {clampedCopyExtentWidth, clampedCopyExtentHeight,
MTLSizeMake(clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth); copyExtent.depth};
return copy; return copy;
} }
@ -391,9 +394,9 @@ namespace dawn_native { namespace metal {
copy.copies[copy.count].bufferOffset = currentOffset; copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = bytesPerRow; copy.copies[copy.count].bytesPerRow = bytesPerRow;
copy.copies[copy.count].bytesPerImage = bytesPerImage; copy.copies[copy.count].bytesPerImage = bytesPerImage;
copy.copies[copy.count].textureOrigin = MakeMTLOrigin(origin); copy.copies[copy.count].textureOrigin = origin;
copy.copies[copy.count].copyExtent = MTLSizeMake( copy.copies[copy.count].copyExtent = {
clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth - 1); clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth - 1};
++copy.count; ++copy.count;
@ -407,12 +410,12 @@ namespace dawn_native { namespace metal {
copy.copies[copy.count].bufferOffset = currentOffset; copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = bytesPerRow; copy.copies[copy.count].bytesPerRow = bytesPerRow;
copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1); copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
copy.copies[copy.count].textureOrigin = copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
MTLOriginMake(origin.x, origin.y, origin.z + copyExtent.depth - 1); origin.z + copyExtent.depth - 1};
ASSERT(copyExtent.height - textureFormat.blockHeight < virtualSizeAtLevel.height); ASSERT(copyExtent.height - textureFormat.blockHeight < virtualSizeAtLevel.height);
copy.copies[copy.count].copyExtent = MTLSizeMake( copy.copies[copy.count].copyExtent = {
clampedCopyExtentWidth, copyExtent.height - textureFormat.blockHeight, 1); clampedCopyExtentWidth, copyExtent.height - textureFormat.blockHeight, 1};
++copy.count; ++copy.count;
@ -431,11 +434,11 @@ namespace dawn_native { namespace metal {
copy.copies[copy.count].bufferOffset = currentOffset; copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = lastRowDataSize; copy.copies[copy.count].bytesPerRow = lastRowDataSize;
copy.copies[copy.count].bytesPerImage = lastRowDataSize; copy.copies[copy.count].bytesPerImage = lastRowDataSize;
copy.copies[copy.count].textureOrigin = copy.copies[copy.count].textureOrigin = {
MTLOriginMake(origin.x, origin.y + copyExtent.height - textureFormat.blockHeight, origin.x, origin.y + copyExtent.height - textureFormat.blockHeight,
origin.z + copyExtent.depth - 1); origin.z + copyExtent.depth - 1};
copy.copies[copy.count].copyExtent = copy.copies[copy.count].copyExtent = {clampedCopyExtentWidth, lastRowCopyExtentHeight,
MTLSizeMake(clampedCopyExtentWidth, lastRowCopyExtentHeight, 1); 1};
++copy.count; ++copy.count;
return copy; return copy;
@ -747,39 +750,40 @@ namespace dawn_native { namespace metal {
EnsureDestinationTextureInitialized(texture, copy->copySize, copy->destination); EnsureDestinationTextureInitialized(texture, copy->copySize, copy->destination);
Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(dst.mipLevel); const Extent3D virtualSizeAtLevel =
texture->GetMipLevelVirtualSize(dst.mipLevel);
uint64_t bufferOffsetForNextSlice = 0; Origin3D copyOrigin = dst.origin;
const uint64_t bytesPerSlice = src.bytesPerRow * src.rowsPerImage; copyOrigin.z = dst.arrayLayer;
const dawn_native::Extent3D copyOneLayerSize = {copySize.width, copySize.height,
1};
// TODO(jiawei.shao@intel.com): use dst.origin.z instead of dst.arrayLayer once
// GPUTextureCopyView.arrayLayer to GPUTextureCopyView.origin.z is done.
for (uint32_t copySlice = dst.arrayLayer;
copySlice < dst.arrayLayer + copySize.depth; ++copySlice) {
// TODO(jiawei.shao@intel.com): compute splitCopies once for all texture
// array layers when possible.
TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit( TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
dst.origin, copyOneLayerSize, texture->GetFormat(), virtualSizeAtLevel, texture->GetDimension(), copyOrigin, copySize, texture->GetFormat(),
buffer->GetSize(), bufferOffsetForNextSlice + src.offset, virtualSizeAtLevel, buffer->GetSize(), src.offset, src.bytesPerRow,
src.bytesPerRow, src.rowsPerImage); src.rowsPerImage);
for (uint32_t i = 0; i < splitCopies.count; ++i) { for (uint32_t i = 0; i < splitCopies.count; ++i) {
const TextureBufferCopySplit::CopyInfo& copyInfo = const TextureBufferCopySplit::CopyInfo& copyInfo = splitCopies.copies[i];
splitCopies.copies[i];
const uint32_t copyBaseLayer = copyInfo.textureOrigin.z;
const uint32_t copyLayerCount = copyInfo.copyExtent.depth;
const MTLOrigin textureOrigin =
MTLOriginMake(copyInfo.textureOrigin.x, copyInfo.textureOrigin.y, 0);
const MTLSize copyExtent =
MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);
uint64_t bufferOffset = copyInfo.bufferOffset;
for (uint32_t copyLayer = copyBaseLayer;
copyLayer < copyBaseLayer + copyLayerCount; ++copyLayer) {
[commandContext->EnsureBlit() copyFromBuffer:buffer->GetMTLBuffer() [commandContext->EnsureBlit() copyFromBuffer:buffer->GetMTLBuffer()
sourceOffset:copyInfo.bufferOffset sourceOffset:bufferOffset
sourceBytesPerRow:copyInfo.bytesPerRow sourceBytesPerRow:copyInfo.bytesPerRow
sourceBytesPerImage:copyInfo.bytesPerImage sourceBytesPerImage:copyInfo.bytesPerImage
sourceSize:copyInfo.copyExtent sourceSize:copyExtent
toTexture:texture->GetMTLTexture() toTexture:texture->GetMTLTexture()
destinationSlice:copySlice destinationSlice:copyLayer
destinationLevel:dst.mipLevel destinationLevel:dst.mipLevel
destinationOrigin:copyInfo.textureOrigin]; destinationOrigin:textureOrigin];
bufferOffset += copyInfo.bytesPerImage;
} }
bufferOffsetForNextSlice += bytesPerSlice;
} }
break; break;
@ -796,38 +800,39 @@ namespace dawn_native { namespace metal {
EnsureSourceTextureInitialized(texture, copy->copySize, copy->source); EnsureSourceTextureInitialized(texture, copy->copySize, copy->source);
Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(src.mipLevel); Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(src.mipLevel);
Origin3D copyOrigin = src.origin;
uint64_t bufferOffsetForNextSlice = 0; copyOrigin.z = src.arrayLayer;
const uint64_t bytesPerSlice = dst.bytesPerRow * dst.rowsPerImage;
const dawn_native::Extent3D copyOneLayerSize = {copySize.width, copySize.height,
1};
// TODO(jiawei.shao@intel.com): use src.origin.z instead of src.arrayLayer once
// GPUTextureCopyView.arrayLayer to GPUTextureCopyView.origin.z is done.
for (uint32_t copySlice = src.arrayLayer;
copySlice < src.arrayLayer + copySize.depth; ++copySlice) {
// TODO(jiawei.shao@intel.com): compute splitCopies once for all texture
// array layers when possible.
TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit( TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
src.origin, copyOneLayerSize, texture->GetFormat(), virtualSizeAtLevel, texture->GetDimension(), copyOrigin, copySize, texture->GetFormat(),
buffer->GetSize(), bufferOffsetForNextSlice + dst.offset, virtualSizeAtLevel, buffer->GetSize(), dst.offset, dst.bytesPerRow,
dst.bytesPerRow, dst.rowsPerImage); dst.rowsPerImage);
for (uint32_t i = 0; i < splitCopies.count; ++i) { for (uint32_t i = 0; i < splitCopies.count; ++i) {
const TextureBufferCopySplit::CopyInfo& copyInfo = const TextureBufferCopySplit::CopyInfo& copyInfo = splitCopies.copies[i];
splitCopies.copies[i];
const uint32_t copyBaseLayer = copyInfo.textureOrigin.z;
const uint32_t copyLayerCount = copyInfo.copyExtent.depth;
const MTLOrigin textureOrigin =
MTLOriginMake(copyInfo.textureOrigin.x, copyInfo.textureOrigin.y, 0);
const MTLSize copyExtent =
MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);
uint64_t bufferOffset = copyInfo.bufferOffset;
for (uint32_t copyLayer = copyBaseLayer;
copyLayer < copyBaseLayer + copyLayerCount; ++copyLayer) {
[commandContext->EnsureBlit() copyFromTexture:texture->GetMTLTexture() [commandContext->EnsureBlit() copyFromTexture:texture->GetMTLTexture()
sourceSlice:copySlice sourceSlice:copyLayer
sourceLevel:src.mipLevel sourceLevel:src.mipLevel
sourceOrigin:copyInfo.textureOrigin sourceOrigin:textureOrigin
sourceSize:copyInfo.copyExtent sourceSize:copyExtent
toBuffer:buffer->GetMTLBuffer() toBuffer:buffer->GetMTLBuffer()
destinationOffset:copyInfo.bufferOffset destinationOffset:bufferOffset
destinationBytesPerRow:copyInfo.bytesPerRow destinationBytesPerRow:copyInfo.bytesPerRow
destinationBytesPerImage:copyInfo.bytesPerImage]; destinationBytesPerImage:copyInfo.bytesPerImage];
bufferOffset += copyInfo.bytesPerImage;
} }
bufferOffsetForNextSlice += bytesPerSlice;
} }
break; break;
} }