Optimize B2T and T2B copies with multiple array layers on Metal
This patch moves the computation of TextureBufferCopySplit outside of the loop over each copy texture array layer so that we only need to do the computation once for each B2T and T2B copy command. BUG=dawn:453 TEST=dawn_end2end_tests Change-Id: I1fcfc972504f845467af92a77bc37870ed7b52a7 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/23720 Commit-Queue: Jiawei Shao <jiawei.shao@intel.com> Reviewed-by: Austin Eng <enga@chromium.org> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
parent
10e4a44ce6
commit
2a6cc4f8fb
|
@ -316,8 +316,8 @@ namespace dawn_native { namespace metal {
|
||||||
NSUInteger bufferOffset;
|
NSUInteger bufferOffset;
|
||||||
NSUInteger bytesPerRow;
|
NSUInteger bytesPerRow;
|
||||||
NSUInteger bytesPerImage;
|
NSUInteger bytesPerImage;
|
||||||
MTLOrigin textureOrigin;
|
Origin3D textureOrigin;
|
||||||
MTLSize copyExtent;
|
Extent3D copyExtent;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32_t count = 0;
|
uint32_t count = 0;
|
||||||
|
@ -328,7 +328,8 @@ namespace dawn_native { namespace metal {
|
||||||
return MTLOriginMake(origin.x, origin.y, origin.z);
|
return MTLOriginMake(origin.x, origin.y, origin.z);
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureBufferCopySplit ComputeTextureBufferCopySplit(Origin3D origin,
|
TextureBufferCopySplit ComputeTextureBufferCopySplit(wgpu::TextureDimension dimension,
|
||||||
|
Origin3D origin,
|
||||||
Extent3D copyExtent,
|
Extent3D copyExtent,
|
||||||
Format textureFormat,
|
Format textureFormat,
|
||||||
Extent3D virtualSizeAtLevel,
|
Extent3D virtualSizeAtLevel,
|
||||||
|
@ -371,6 +372,8 @@ namespace dawn_native { namespace metal {
|
||||||
? (virtualSizeAtLevel.height - origin.y)
|
? (virtualSizeAtLevel.height - origin.y)
|
||||||
: copyExtent.height;
|
: copyExtent.height;
|
||||||
|
|
||||||
|
ASSERT(dimension == wgpu::TextureDimension::e2D);
|
||||||
|
|
||||||
// Check whether buffer size is big enough.
|
// Check whether buffer size is big enough.
|
||||||
bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth;
|
bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth;
|
||||||
if (!needWorkaround) {
|
if (!needWorkaround) {
|
||||||
|
@ -378,9 +381,9 @@ namespace dawn_native { namespace metal {
|
||||||
copy.copies[0].bufferOffset = bufferOffset;
|
copy.copies[0].bufferOffset = bufferOffset;
|
||||||
copy.copies[0].bytesPerRow = bytesPerRow;
|
copy.copies[0].bytesPerRow = bytesPerRow;
|
||||||
copy.copies[0].bytesPerImage = bytesPerImage;
|
copy.copies[0].bytesPerImage = bytesPerImage;
|
||||||
copy.copies[0].textureOrigin = MakeMTLOrigin(origin);
|
copy.copies[0].textureOrigin = origin;
|
||||||
copy.copies[0].copyExtent =
|
copy.copies[0].copyExtent = {clampedCopyExtentWidth, clampedCopyExtentHeight,
|
||||||
MTLSizeMake(clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth);
|
copyExtent.depth};
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -391,9 +394,9 @@ namespace dawn_native { namespace metal {
|
||||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
copy.copies[copy.count].bufferOffset = currentOffset;
|
||||||
copy.copies[copy.count].bytesPerRow = bytesPerRow;
|
copy.copies[copy.count].bytesPerRow = bytesPerRow;
|
||||||
copy.copies[copy.count].bytesPerImage = bytesPerImage;
|
copy.copies[copy.count].bytesPerImage = bytesPerImage;
|
||||||
copy.copies[copy.count].textureOrigin = MakeMTLOrigin(origin);
|
copy.copies[copy.count].textureOrigin = origin;
|
||||||
copy.copies[copy.count].copyExtent = MTLSizeMake(
|
copy.copies[copy.count].copyExtent = {
|
||||||
clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth - 1);
|
clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth - 1};
|
||||||
|
|
||||||
++copy.count;
|
++copy.count;
|
||||||
|
|
||||||
|
@ -407,12 +410,12 @@ namespace dawn_native { namespace metal {
|
||||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
copy.copies[copy.count].bufferOffset = currentOffset;
|
||||||
copy.copies[copy.count].bytesPerRow = bytesPerRow;
|
copy.copies[copy.count].bytesPerRow = bytesPerRow;
|
||||||
copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
|
copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
|
||||||
copy.copies[copy.count].textureOrigin =
|
copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
|
||||||
MTLOriginMake(origin.x, origin.y, origin.z + copyExtent.depth - 1);
|
origin.z + copyExtent.depth - 1};
|
||||||
|
|
||||||
ASSERT(copyExtent.height - textureFormat.blockHeight < virtualSizeAtLevel.height);
|
ASSERT(copyExtent.height - textureFormat.blockHeight < virtualSizeAtLevel.height);
|
||||||
copy.copies[copy.count].copyExtent = MTLSizeMake(
|
copy.copies[copy.count].copyExtent = {
|
||||||
clampedCopyExtentWidth, copyExtent.height - textureFormat.blockHeight, 1);
|
clampedCopyExtentWidth, copyExtent.height - textureFormat.blockHeight, 1};
|
||||||
|
|
||||||
++copy.count;
|
++copy.count;
|
||||||
|
|
||||||
|
@ -431,11 +434,11 @@ namespace dawn_native { namespace metal {
|
||||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
copy.copies[copy.count].bufferOffset = currentOffset;
|
||||||
copy.copies[copy.count].bytesPerRow = lastRowDataSize;
|
copy.copies[copy.count].bytesPerRow = lastRowDataSize;
|
||||||
copy.copies[copy.count].bytesPerImage = lastRowDataSize;
|
copy.copies[copy.count].bytesPerImage = lastRowDataSize;
|
||||||
copy.copies[copy.count].textureOrigin =
|
copy.copies[copy.count].textureOrigin = {
|
||||||
MTLOriginMake(origin.x, origin.y + copyExtent.height - textureFormat.blockHeight,
|
origin.x, origin.y + copyExtent.height - textureFormat.blockHeight,
|
||||||
origin.z + copyExtent.depth - 1);
|
origin.z + copyExtent.depth - 1};
|
||||||
copy.copies[copy.count].copyExtent =
|
copy.copies[copy.count].copyExtent = {clampedCopyExtentWidth, lastRowCopyExtentHeight,
|
||||||
MTLSizeMake(clampedCopyExtentWidth, lastRowCopyExtentHeight, 1);
|
1};
|
||||||
++copy.count;
|
++copy.count;
|
||||||
|
|
||||||
return copy;
|
return copy;
|
||||||
|
@ -747,39 +750,40 @@ namespace dawn_native { namespace metal {
|
||||||
|
|
||||||
EnsureDestinationTextureInitialized(texture, copy->copySize, copy->destination);
|
EnsureDestinationTextureInitialized(texture, copy->copySize, copy->destination);
|
||||||
|
|
||||||
Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(dst.mipLevel);
|
const Extent3D virtualSizeAtLevel =
|
||||||
|
texture->GetMipLevelVirtualSize(dst.mipLevel);
|
||||||
|
|
||||||
uint64_t bufferOffsetForNextSlice = 0;
|
Origin3D copyOrigin = dst.origin;
|
||||||
const uint64_t bytesPerSlice = src.bytesPerRow * src.rowsPerImage;
|
copyOrigin.z = dst.arrayLayer;
|
||||||
|
|
||||||
const dawn_native::Extent3D copyOneLayerSize = {copySize.width, copySize.height,
|
|
||||||
1};
|
|
||||||
// TODO(jiawei.shao@intel.com): use dst.origin.z instead of dst.arrayLayer once
|
|
||||||
// GPUTextureCopyView.arrayLayer to GPUTextureCopyView.origin.z is done.
|
|
||||||
for (uint32_t copySlice = dst.arrayLayer;
|
|
||||||
copySlice < dst.arrayLayer + copySize.depth; ++copySlice) {
|
|
||||||
// TODO(jiawei.shao@intel.com): compute splitCopies once for all texture
|
|
||||||
// array layers when possible.
|
|
||||||
TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
|
TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
|
||||||
dst.origin, copyOneLayerSize, texture->GetFormat(), virtualSizeAtLevel,
|
texture->GetDimension(), copyOrigin, copySize, texture->GetFormat(),
|
||||||
buffer->GetSize(), bufferOffsetForNextSlice + src.offset,
|
virtualSizeAtLevel, buffer->GetSize(), src.offset, src.bytesPerRow,
|
||||||
src.bytesPerRow, src.rowsPerImage);
|
src.rowsPerImage);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < splitCopies.count; ++i) {
|
for (uint32_t i = 0; i < splitCopies.count; ++i) {
|
||||||
const TextureBufferCopySplit::CopyInfo& copyInfo =
|
const TextureBufferCopySplit::CopyInfo& copyInfo = splitCopies.copies[i];
|
||||||
splitCopies.copies[i];
|
|
||||||
|
const uint32_t copyBaseLayer = copyInfo.textureOrigin.z;
|
||||||
|
const uint32_t copyLayerCount = copyInfo.copyExtent.depth;
|
||||||
|
const MTLOrigin textureOrigin =
|
||||||
|
MTLOriginMake(copyInfo.textureOrigin.x, copyInfo.textureOrigin.y, 0);
|
||||||
|
const MTLSize copyExtent =
|
||||||
|
MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);
|
||||||
|
|
||||||
|
uint64_t bufferOffset = copyInfo.bufferOffset;
|
||||||
|
for (uint32_t copyLayer = copyBaseLayer;
|
||||||
|
copyLayer < copyBaseLayer + copyLayerCount; ++copyLayer) {
|
||||||
[commandContext->EnsureBlit() copyFromBuffer:buffer->GetMTLBuffer()
|
[commandContext->EnsureBlit() copyFromBuffer:buffer->GetMTLBuffer()
|
||||||
sourceOffset:copyInfo.bufferOffset
|
sourceOffset:bufferOffset
|
||||||
sourceBytesPerRow:copyInfo.bytesPerRow
|
sourceBytesPerRow:copyInfo.bytesPerRow
|
||||||
sourceBytesPerImage:copyInfo.bytesPerImage
|
sourceBytesPerImage:copyInfo.bytesPerImage
|
||||||
sourceSize:copyInfo.copyExtent
|
sourceSize:copyExtent
|
||||||
toTexture:texture->GetMTLTexture()
|
toTexture:texture->GetMTLTexture()
|
||||||
destinationSlice:copySlice
|
destinationSlice:copyLayer
|
||||||
destinationLevel:dst.mipLevel
|
destinationLevel:dst.mipLevel
|
||||||
destinationOrigin:copyInfo.textureOrigin];
|
destinationOrigin:textureOrigin];
|
||||||
|
bufferOffset += copyInfo.bytesPerImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
bufferOffsetForNextSlice += bytesPerSlice;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -796,38 +800,39 @@ namespace dawn_native { namespace metal {
|
||||||
EnsureSourceTextureInitialized(texture, copy->copySize, copy->source);
|
EnsureSourceTextureInitialized(texture, copy->copySize, copy->source);
|
||||||
|
|
||||||
Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(src.mipLevel);
|
Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(src.mipLevel);
|
||||||
|
Origin3D copyOrigin = src.origin;
|
||||||
uint64_t bufferOffsetForNextSlice = 0;
|
copyOrigin.z = src.arrayLayer;
|
||||||
const uint64_t bytesPerSlice = dst.bytesPerRow * dst.rowsPerImage;
|
|
||||||
|
|
||||||
const dawn_native::Extent3D copyOneLayerSize = {copySize.width, copySize.height,
|
|
||||||
1};
|
|
||||||
// TODO(jiawei.shao@intel.com): use src.origin.z instead of src.arrayLayer once
|
|
||||||
// GPUTextureCopyView.arrayLayer to GPUTextureCopyView.origin.z is done.
|
|
||||||
for (uint32_t copySlice = src.arrayLayer;
|
|
||||||
copySlice < src.arrayLayer + copySize.depth; ++copySlice) {
|
|
||||||
// TODO(jiawei.shao@intel.com): compute splitCopies once for all texture
|
|
||||||
// array layers when possible.
|
|
||||||
TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
|
TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
|
||||||
src.origin, copyOneLayerSize, texture->GetFormat(), virtualSizeAtLevel,
|
texture->GetDimension(), copyOrigin, copySize, texture->GetFormat(),
|
||||||
buffer->GetSize(), bufferOffsetForNextSlice + dst.offset,
|
virtualSizeAtLevel, buffer->GetSize(), dst.offset, dst.bytesPerRow,
|
||||||
dst.bytesPerRow, dst.rowsPerImage);
|
dst.rowsPerImage);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < splitCopies.count; ++i) {
|
for (uint32_t i = 0; i < splitCopies.count; ++i) {
|
||||||
const TextureBufferCopySplit::CopyInfo& copyInfo =
|
const TextureBufferCopySplit::CopyInfo& copyInfo = splitCopies.copies[i];
|
||||||
splitCopies.copies[i];
|
|
||||||
|
const uint32_t copyBaseLayer = copyInfo.textureOrigin.z;
|
||||||
|
const uint32_t copyLayerCount = copyInfo.copyExtent.depth;
|
||||||
|
const MTLOrigin textureOrigin =
|
||||||
|
MTLOriginMake(copyInfo.textureOrigin.x, copyInfo.textureOrigin.y, 0);
|
||||||
|
const MTLSize copyExtent =
|
||||||
|
MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);
|
||||||
|
|
||||||
|
uint64_t bufferOffset = copyInfo.bufferOffset;
|
||||||
|
for (uint32_t copyLayer = copyBaseLayer;
|
||||||
|
copyLayer < copyBaseLayer + copyLayerCount; ++copyLayer) {
|
||||||
[commandContext->EnsureBlit() copyFromTexture:texture->GetMTLTexture()
|
[commandContext->EnsureBlit() copyFromTexture:texture->GetMTLTexture()
|
||||||
sourceSlice:copySlice
|
sourceSlice:copyLayer
|
||||||
sourceLevel:src.mipLevel
|
sourceLevel:src.mipLevel
|
||||||
sourceOrigin:copyInfo.textureOrigin
|
sourceOrigin:textureOrigin
|
||||||
sourceSize:copyInfo.copyExtent
|
sourceSize:copyExtent
|
||||||
toBuffer:buffer->GetMTLBuffer()
|
toBuffer:buffer->GetMTLBuffer()
|
||||||
destinationOffset:copyInfo.bufferOffset
|
destinationOffset:bufferOffset
|
||||||
destinationBytesPerRow:copyInfo.bytesPerRow
|
destinationBytesPerRow:copyInfo.bytesPerRow
|
||||||
destinationBytesPerImage:copyInfo.bytesPerImage];
|
destinationBytesPerImage:copyInfo.bytesPerImage];
|
||||||
|
bufferOffset += copyInfo.bytesPerImage;
|
||||||
}
|
}
|
||||||
bufferOffsetForNextSlice += bytesPerSlice;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue