Workaround Metal Validation Issue for copying 3D textures to a buffer.

The metal validation layer complains when copying from a 3D texture to
a buffer if the stride for the destination buffer is larger then 2048
bytes.

Bug: dawn:1430
Change-Id: I6ba4508d71610c35dfb0fab7d2bebc91d37504e3
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/113426
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Gregg Tavares <gman@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Brandon Jones <bajones@chromium.org>
This commit is contained in:
Gregg Tavares 2022-12-22 01:30:22 +00:00 committed by Dawn LUCI CQ
parent 0244804193
commit 6609f9cfb9
3 changed files with 110 additions and 55 deletions

View File

@ -15,6 +15,7 @@
#ifndef SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_ #ifndef SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
#define SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_ #define SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
#include "dawn/common/StackContainer.h"
#include "dawn/native/dawn_platform.h" #include "dawn/native/dawn_platform.h"
#include "dawn/native/metal/DeviceMTL.h" #include "dawn/native/metal/DeviceMTL.h"
#include "dawn/native/metal/ShaderModuleMTL.h" #include "dawn/native/metal/ShaderModuleMTL.h"
@ -34,9 +35,21 @@ namespace dawn::native::metal {
MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction); MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction);
struct TextureBufferCopySplit { struct TextureBufferCopySplit {
static constexpr uint32_t kMaxTextureBufferCopyRegions = 3; // Avoid allocations except in the worse case. Most cases require at most 3 regions.
static constexpr uint32_t kNumCommonTextureBufferCopyRegions = 3;
struct CopyInfo { struct CopyInfo {
CopyInfo(NSUInteger bufferOffset,
NSUInteger bytesPerRow,
NSUInteger bytesPerImage,
Origin3D textureOrigin,
Extent3D copyExtent)
: bufferOffset(bufferOffset),
bytesPerRow(bytesPerRow),
bytesPerImage(bytesPerImage),
textureOrigin(textureOrigin),
copyExtent(copyExtent) {}
NSUInteger bufferOffset; NSUInteger bufferOffset;
NSUInteger bytesPerRow; NSUInteger bytesPerRow;
NSUInteger bytesPerImage; NSUInteger bytesPerImage;
@ -44,12 +57,11 @@ struct TextureBufferCopySplit {
Extent3D copyExtent; Extent3D copyExtent;
}; };
uint32_t count = 0; StackVector<CopyInfo, kNumCommonTextureBufferCopyRegions> copies;
std::array<CopyInfo, kMaxTextureBufferCopyRegions> copies;
auto begin() const { return copies.begin(); } auto begin() const { return copies->begin(); }
auto end() const { return copies->end(); }
auto end() const { return copies.begin() + count; } void push_back(const CopyInfo& copyInfo) { copies->push_back(copyInfo); }
}; };
TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture, TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,

View File

@ -179,12 +179,15 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
const Format textureFormat = texture->GetFormat(); const Format textureFormat = texture->GetFormat();
const TexelBlockInfo& blockInfo = textureFormat.GetAspectInfo(aspect).block; const TexelBlockInfo& blockInfo = textureFormat.GetAspectInfo(aspect).block;
// When copying textures from/to an unpacked buffer, the Metal validation layer doesn't // When copying textures from/to an unpacked buffer, the Metal validation layer has 3
// compute the correct range when checking if the buffer is big enough to contain the // issues.
// data for the whole copy. Instead of looking at the position of the last texel in the //
// buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage / // 1. The metal validation layer doesn't compute the correct range when checking if the
// format.blockHeight) * copySize.depthOrArrayLayers. For example considering the pixel // buffer is big enough to contain the data for the whole copy. Instead of looking at
// buffer below where in memory, each row data (D) of the texture is followed by some // the position of the last texel in the buffer, it computes the volume of the 3D box
// with bytesPerRow * (rowsPerImage / format.blockHeight) * copySize.depthOrArrayLayers.
// For example considering the pixel buffer below where in memory, each row data (D) of
// the texture is followed by some
// padding data (P): // padding data (P):
// |DDDDDDD|PP| // |DDDDDDD|PP|
// |DDDDDDD|PP| // |DDDDDDD|PP|
@ -196,6 +199,28 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
// We work around this limitation by detecting when Metal would complain and copy the // We work around this limitation by detecting when Metal would complain and copy the
// last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage. // last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
// 2. Metal requires `destinationBytesPerRow` is less than or equal to the size
// of the maximum texture dimension in bytes.
// 3. Some Metal Drivers (Intel Pre MacOS 13.1?) Incorrectly calculation the size
// needed for the destination buffer. Their calculation is something like
//
// sizeNeeded = bufferOffset + desintationBytesPerImage * numImages +
// destinationBytesPerRow * (numRows - 1) +
// bytesPerPixel * width
//
// where as it should be
//
// sizeNeeded = bufferOffset + desintationBytesPerImage * (numImages - 1) +
// destinationBytesPerRow * (numRows - 1) +
// bytesPerPixel * width
//
// since you won't actually go to the next image if there is only 1 image.
//
// The workaround is if you're only copying a single row then pass 0 for
// destinationBytesPerImage
uint32_t bytesPerImage = bytesPerRow * rowsPerImage; uint32_t bytesPerImage = bytesPerRow * rowsPerImage;
// Metal validation layer requires that if the texture's pixel format is a compressed // Metal validation layer requires that if the texture's pixel format is a compressed
@ -205,32 +230,63 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
const Extent3D clampedCopyExtent = const Extent3D clampedCopyExtent =
texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent); texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent);
// Check whether buffer size is big enough. // Note: all current GPUs have a 3D texture size limit of 2048 and otherwise 16348
bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers; // for non-3D textures except for Apple2 GPUs (iPhone6) which has a non-3D texture
if (!needWorkaround) { // limit of 8192. Dawn doesn't support Apple2 GPUs
copy.count = 1; // See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
copy.copies[0].bufferOffset = bufferOffset; const uint32_t kMetalMax3DTextureDimensions = 2048u;
copy.copies[0].bytesPerRow = bytesPerRow; const uint32_t kMetalMaxNon3DTextureDimensions = 16384u;
copy.copies[0].bytesPerImage = bytesPerImage; uint32_t maxTextureDimension = texture->GetDimension() == wgpu::TextureDimension::e3D
copy.copies[0].textureOrigin = origin; ? kMetalMax3DTextureDimensions
copy.copies[0].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height, : kMetalMaxNon3DTextureDimensions;
copyExtent.depthOrArrayLayers}; uint32_t bytesPerPixel = blockInfo.byteSize;
uint32_t maxBytesPerRow = maxTextureDimension * bytesPerPixel;
bool needCopyRowByRow = bytesPerRow > maxBytesPerRow;
if (needCopyRowByRow) {
// handle workaround case 2
// Since we're copying a row at a time bytesPerRow shouldn't matter but just to
// try to have it make sense, pass correct or max valid value
const uint32_t localBytesPerRow = std::min(bytesPerRow, maxBytesPerRow);
const uint32_t localBytesPerImage = 0; // workaround case 3
ASSERT(copyExtent.height % blockInfo.height == 0);
ASSERT(copyExtent.width % blockInfo.width == 0);
const uint32_t blockRows = copyExtent.height / blockInfo.height;
for (uint32_t slice = 0; slice < copyExtent.depthOrArrayLayers; ++slice) {
for (uint32_t blockRow = 0; blockRow < blockRows; ++blockRow) {
copy.push_back(TextureBufferCopySplit::CopyInfo(
bufferOffset + slice * rowsPerImage * bytesPerRow + blockRow * bytesPerRow,
localBytesPerRow, localBytesPerImage,
{origin.x, origin.y + blockRow * blockInfo.height, origin.z + slice},
{clampedCopyExtent.width, blockInfo.height, 1}));
}
}
return copy; return copy;
} }
// Check whether buffer size is big enough.
bool needCopyLastImageAndLastRowSeparately =
bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers;
if (!needCopyLastImageAndLastRowSeparately) {
const uint32_t localBytesPerImage =
copyExtent.depthOrArrayLayers == 1 ? 0 : bytesPerImage; // workaround case 3
copy.push_back(TextureBufferCopySplit::CopyInfo(
bufferOffset, bytesPerRow, localBytesPerImage, origin,
{clampedCopyExtent.width, clampedCopyExtent.height, copyExtent.depthOrArrayLayers}));
return copy;
}
// handle workaround case 1
uint64_t currentOffset = bufferOffset; uint64_t currentOffset = bufferOffset;
// Doing all the copy except the last image. // Doing all the copy except the last image.
if (copyExtent.depthOrArrayLayers > 1) { if (copyExtent.depthOrArrayLayers > 1) {
copy.copies[copy.count].bufferOffset = currentOffset; const uint32_t localDepthOrArrayLayers = copyExtent.depthOrArrayLayers - 1;
copy.copies[copy.count].bytesPerRow = bytesPerRow; const uint32_t localBytesPerImage =
copy.copies[copy.count].bytesPerImage = bytesPerImage; localDepthOrArrayLayers == 1 ? 0 : bytesPerImage; // workaround case 3
copy.copies[copy.count].textureOrigin = origin; copy.push_back(TextureBufferCopySplit::CopyInfo(
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height, currentOffset, bytesPerRow, localBytesPerImage, origin,
copyExtent.depthOrArrayLayers - 1}; {clampedCopyExtent.width, clampedCopyExtent.height, localDepthOrArrayLayers}));
++copy.count;
// Update offset to copy to the last image. // Update offset to copy to the last image.
currentOffset += (copyExtent.depthOrArrayLayers - 1) * bytesPerImage; currentOffset += (copyExtent.depthOrArrayLayers - 1) * bytesPerImage;
} }
@ -238,18 +294,13 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
// Doing all the copy in last image except the last row. // Doing all the copy in last image except the last row.
uint32_t copyBlockRowCount = copyExtent.height / blockInfo.height; uint32_t copyBlockRowCount = copyExtent.height / blockInfo.height;
if (copyBlockRowCount > 1) { if (copyBlockRowCount > 1) {
copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = bytesPerRow;
copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
origin.z + copyExtent.depthOrArrayLayers - 1};
ASSERT(copyExtent.height - blockInfo.height < ASSERT(copyExtent.height - blockInfo.height <
texture->GetMipLevelSingleSubresourceVirtualSize(mipLevel).height); texture->GetMipLevelSingleSubresourceVirtualSize(mipLevel).height);
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, const uint32_t localBytesPerImage = 0; // workaround case 3
copyExtent.height - blockInfo.height, 1}; copy.push_back(TextureBufferCopySplit::CopyInfo(
currentOffset, bytesPerRow, localBytesPerImage,
++copy.count; {origin.x, origin.y, origin.z + copyExtent.depthOrArrayLayers - 1},
{clampedCopyExtent.width, copyExtent.height - blockInfo.height, 1}));
// Update offset to copy to the last row. // Update offset to copy to the last row.
currentOffset += (copyBlockRowCount - 1) * bytesPerRow; currentOffset += (copyBlockRowCount - 1) * bytesPerRow;
@ -258,18 +309,16 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
// Doing the last row copy with the exact number of bytes in last row. // Doing the last row copy with the exact number of bytes in last row.
// Workaround this issue in a way just like the copy to a 1D texture. // Workaround this issue in a way just like the copy to a 1D texture.
uint32_t lastRowDataSize = (copyExtent.width / blockInfo.width) * blockInfo.byteSize; uint32_t lastRowDataSize = (copyExtent.width / blockInfo.width) * blockInfo.byteSize;
uint32_t lastImageDataSize = 0; // workaround case 3
uint32_t lastRowCopyExtentHeight = uint32_t lastRowCopyExtentHeight =
blockInfo.height + clampedCopyExtent.height - copyExtent.height; blockInfo.height + clampedCopyExtent.height - copyExtent.height;
ASSERT(lastRowCopyExtentHeight <= blockInfo.height); ASSERT(lastRowCopyExtentHeight <= blockInfo.height);
copy.copies[copy.count].bufferOffset = currentOffset; copy.push_back(
copy.copies[copy.count].bytesPerRow = lastRowDataSize; TextureBufferCopySplit::CopyInfo(currentOffset, lastRowDataSize, lastImageDataSize,
copy.copies[copy.count].bytesPerImage = lastRowDataSize; {origin.x, origin.y + copyExtent.height - blockInfo.height,
copy.copies[copy.count].textureOrigin = {origin.x, origin.z + copyExtent.depthOrArrayLayers - 1},
origin.y + copyExtent.height - blockInfo.height, {clampedCopyExtent.width, lastRowCopyExtentHeight, 1}));
origin.z + copyExtent.depthOrArrayLayers - 1};
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, lastRowCopyExtentHeight, 1};
++copy.count;
return copy; return copy;
} }

View File

@ -119,12 +119,6 @@ crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextur
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba16float";dstFormat="rgba16float";dimension="2d" [ Failure ] crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba16float";dstFormat="rgba16float";dimension="2d" [ Failure ]
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba32float";dstFormat="rgba32float";dimension="2d" [ Failure ] crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba32float";dstFormat="rgba32float";dimension="2d" [ Failure ]
################################################################################
# Failing with Metal validation layers
################################################################################
crbug.com/dawn/1430 [ mac dawn-backend-validation ] webgpu:api,operation,command_buffer,image_copy:rowsPerImage_and_bytesPerRow:initMethod="CopyB2T";checkMethod="FullCopyT2B";* [ Failure ]
crbug.com/dawn/1430 [ mac dawn-backend-validation ] webgpu:api,operation,command_buffer,image_copy:rowsPerImage_and_bytesPerRow:initMethod="WriteTexture";checkMethod="PartialCopyT2B";* [ Failure ]
################################################################################ ################################################################################
# Large and slow tests # Large and slow tests
# KEEP # KEEP