Workaround Metal Validation Issue for copying 3D textures to a buffer.

The metal validation layer complains when copying from a 3D texture to
a buffer if the stride for the destination buffer is larger then 2048
bytes.

Bug: dawn:1430
Change-Id: I6ba4508d71610c35dfb0fab7d2bebc91d37504e3
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/113426
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Gregg Tavares <gman@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Brandon Jones <bajones@chromium.org>
This commit is contained in:
Gregg Tavares 2022-12-22 01:30:22 +00:00 committed by Dawn LUCI CQ
parent 0244804193
commit 6609f9cfb9
3 changed files with 110 additions and 55 deletions

View File

@ -15,6 +15,7 @@
#ifndef SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
#define SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
#include "dawn/common/StackContainer.h"
#include "dawn/native/dawn_platform.h"
#include "dawn/native/metal/DeviceMTL.h"
#include "dawn/native/metal/ShaderModuleMTL.h"
@ -34,9 +35,21 @@ namespace dawn::native::metal {
MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction);
struct TextureBufferCopySplit {
static constexpr uint32_t kMaxTextureBufferCopyRegions = 3;
// Avoid allocations except in the worse case. Most cases require at most 3 regions.
static constexpr uint32_t kNumCommonTextureBufferCopyRegions = 3;
struct CopyInfo {
CopyInfo(NSUInteger bufferOffset,
NSUInteger bytesPerRow,
NSUInteger bytesPerImage,
Origin3D textureOrigin,
Extent3D copyExtent)
: bufferOffset(bufferOffset),
bytesPerRow(bytesPerRow),
bytesPerImage(bytesPerImage),
textureOrigin(textureOrigin),
copyExtent(copyExtent) {}
NSUInteger bufferOffset;
NSUInteger bytesPerRow;
NSUInteger bytesPerImage;
@ -44,12 +57,11 @@ struct TextureBufferCopySplit {
Extent3D copyExtent;
};
uint32_t count = 0;
std::array<CopyInfo, kMaxTextureBufferCopyRegions> copies;
StackVector<CopyInfo, kNumCommonTextureBufferCopyRegions> copies;
auto begin() const { return copies.begin(); }
auto end() const { return copies.begin() + count; }
auto begin() const { return copies->begin(); }
auto end() const { return copies->end(); }
void push_back(const CopyInfo& copyInfo) { copies->push_back(copyInfo); }
};
TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,

View File

@ -179,12 +179,15 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
const Format textureFormat = texture->GetFormat();
const TexelBlockInfo& blockInfo = textureFormat.GetAspectInfo(aspect).block;
// When copying textures from/to an unpacked buffer, the Metal validation layer doesn't
// compute the correct range when checking if the buffer is big enough to contain the
// data for the whole copy. Instead of looking at the position of the last texel in the
// buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage /
// format.blockHeight) * copySize.depthOrArrayLayers. For example considering the pixel
// buffer below where in memory, each row data (D) of the texture is followed by some
// When copying textures from/to an unpacked buffer, the Metal validation layer has 3
// issues.
//
// 1. The metal validation layer doesn't compute the correct range when checking if the
// buffer is big enough to contain the data for the whole copy. Instead of looking at
// the position of the last texel in the buffer, it computes the volume of the 3D box
// with bytesPerRow * (rowsPerImage / format.blockHeight) * copySize.depthOrArrayLayers.
// For example considering the pixel buffer below where in memory, each row data (D) of
// the texture is followed by some
// padding data (P):
// |DDDDDDD|PP|
// |DDDDDDD|PP|
@ -196,6 +199,28 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
// We work around this limitation by detecting when Metal would complain and copy the
// last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
// 2. Metal requires `destinationBytesPerRow` is less than or equal to the size
// of the maximum texture dimension in bytes.
// 3. Some Metal Drivers (Intel Pre MacOS 13.1?) Incorrectly calculation the size
// needed for the destination buffer. Their calculation is something like
//
// sizeNeeded = bufferOffset + desintationBytesPerImage * numImages +
// destinationBytesPerRow * (numRows - 1) +
// bytesPerPixel * width
//
// where as it should be
//
// sizeNeeded = bufferOffset + desintationBytesPerImage * (numImages - 1) +
// destinationBytesPerRow * (numRows - 1) +
// bytesPerPixel * width
//
// since you won't actually go to the next image if there is only 1 image.
//
// The workaround is if you're only copying a single row then pass 0 for
// destinationBytesPerImage
uint32_t bytesPerImage = bytesPerRow * rowsPerImage;
// Metal validation layer requires that if the texture's pixel format is a compressed
@ -205,32 +230,63 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
const Extent3D clampedCopyExtent =
texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent);
// Check whether buffer size is big enough.
bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers;
if (!needWorkaround) {
copy.count = 1;
copy.copies[0].bufferOffset = bufferOffset;
copy.copies[0].bytesPerRow = bytesPerRow;
copy.copies[0].bytesPerImage = bytesPerImage;
copy.copies[0].textureOrigin = origin;
copy.copies[0].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
copyExtent.depthOrArrayLayers};
// Note: all current GPUs have a 3D texture size limit of 2048 and otherwise 16348
// for non-3D textures except for Apple2 GPUs (iPhone6) which has a non-3D texture
// limit of 8192. Dawn doesn't support Apple2 GPUs
// See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
const uint32_t kMetalMax3DTextureDimensions = 2048u;
const uint32_t kMetalMaxNon3DTextureDimensions = 16384u;
uint32_t maxTextureDimension = texture->GetDimension() == wgpu::TextureDimension::e3D
? kMetalMax3DTextureDimensions
: kMetalMaxNon3DTextureDimensions;
uint32_t bytesPerPixel = blockInfo.byteSize;
uint32_t maxBytesPerRow = maxTextureDimension * bytesPerPixel;
bool needCopyRowByRow = bytesPerRow > maxBytesPerRow;
if (needCopyRowByRow) {
// handle workaround case 2
// Since we're copying a row at a time bytesPerRow shouldn't matter but just to
// try to have it make sense, pass correct or max valid value
const uint32_t localBytesPerRow = std::min(bytesPerRow, maxBytesPerRow);
const uint32_t localBytesPerImage = 0; // workaround case 3
ASSERT(copyExtent.height % blockInfo.height == 0);
ASSERT(copyExtent.width % blockInfo.width == 0);
const uint32_t blockRows = copyExtent.height / blockInfo.height;
for (uint32_t slice = 0; slice < copyExtent.depthOrArrayLayers; ++slice) {
for (uint32_t blockRow = 0; blockRow < blockRows; ++blockRow) {
copy.push_back(TextureBufferCopySplit::CopyInfo(
bufferOffset + slice * rowsPerImage * bytesPerRow + blockRow * bytesPerRow,
localBytesPerRow, localBytesPerImage,
{origin.x, origin.y + blockRow * blockInfo.height, origin.z + slice},
{clampedCopyExtent.width, blockInfo.height, 1}));
}
}
return copy;
}
// Check whether buffer size is big enough.
bool needCopyLastImageAndLastRowSeparately =
bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers;
if (!needCopyLastImageAndLastRowSeparately) {
const uint32_t localBytesPerImage =
copyExtent.depthOrArrayLayers == 1 ? 0 : bytesPerImage; // workaround case 3
copy.push_back(TextureBufferCopySplit::CopyInfo(
bufferOffset, bytesPerRow, localBytesPerImage, origin,
{clampedCopyExtent.width, clampedCopyExtent.height, copyExtent.depthOrArrayLayers}));
return copy;
}
// handle workaround case 1
uint64_t currentOffset = bufferOffset;
// Doing all the copy except the last image.
if (copyExtent.depthOrArrayLayers > 1) {
copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = bytesPerRow;
copy.copies[copy.count].bytesPerImage = bytesPerImage;
copy.copies[copy.count].textureOrigin = origin;
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
copyExtent.depthOrArrayLayers - 1};
++copy.count;
const uint32_t localDepthOrArrayLayers = copyExtent.depthOrArrayLayers - 1;
const uint32_t localBytesPerImage =
localDepthOrArrayLayers == 1 ? 0 : bytesPerImage; // workaround case 3
copy.push_back(TextureBufferCopySplit::CopyInfo(
currentOffset, bytesPerRow, localBytesPerImage, origin,
{clampedCopyExtent.width, clampedCopyExtent.height, localDepthOrArrayLayers}));
// Update offset to copy to the last image.
currentOffset += (copyExtent.depthOrArrayLayers - 1) * bytesPerImage;
}
@ -238,18 +294,13 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
// Doing all the copy in last image except the last row.
uint32_t copyBlockRowCount = copyExtent.height / blockInfo.height;
if (copyBlockRowCount > 1) {
copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = bytesPerRow;
copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
origin.z + copyExtent.depthOrArrayLayers - 1};
ASSERT(copyExtent.height - blockInfo.height <
texture->GetMipLevelSingleSubresourceVirtualSize(mipLevel).height);
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width,
copyExtent.height - blockInfo.height, 1};
++copy.count;
const uint32_t localBytesPerImage = 0; // workaround case 3
copy.push_back(TextureBufferCopySplit::CopyInfo(
currentOffset, bytesPerRow, localBytesPerImage,
{origin.x, origin.y, origin.z + copyExtent.depthOrArrayLayers - 1},
{clampedCopyExtent.width, copyExtent.height - blockInfo.height, 1}));
// Update offset to copy to the last row.
currentOffset += (copyBlockRowCount - 1) * bytesPerRow;
@ -258,18 +309,16 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
// Doing the last row copy with the exact number of bytes in last row.
// Workaround this issue in a way just like the copy to a 1D texture.
uint32_t lastRowDataSize = (copyExtent.width / blockInfo.width) * blockInfo.byteSize;
uint32_t lastImageDataSize = 0; // workaround case 3
uint32_t lastRowCopyExtentHeight =
blockInfo.height + clampedCopyExtent.height - copyExtent.height;
ASSERT(lastRowCopyExtentHeight <= blockInfo.height);
copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = lastRowDataSize;
copy.copies[copy.count].bytesPerImage = lastRowDataSize;
copy.copies[copy.count].textureOrigin = {origin.x,
origin.y + copyExtent.height - blockInfo.height,
origin.z + copyExtent.depthOrArrayLayers - 1};
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, lastRowCopyExtentHeight, 1};
++copy.count;
copy.push_back(
TextureBufferCopySplit::CopyInfo(currentOffset, lastRowDataSize, lastImageDataSize,
{origin.x, origin.y + copyExtent.height - blockInfo.height,
origin.z + copyExtent.depthOrArrayLayers - 1},
{clampedCopyExtent.width, lastRowCopyExtentHeight, 1}));
return copy;
}

View File

@ -119,12 +119,6 @@ crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextur
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba16float";dstFormat="rgba16float";dimension="2d" [ Failure ]
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba32float";dstFormat="rgba32float";dimension="2d" [ Failure ]
################################################################################
# Failing with Metal validation layers
################################################################################
crbug.com/dawn/1430 [ mac dawn-backend-validation ] webgpu:api,operation,command_buffer,image_copy:rowsPerImage_and_bytesPerRow:initMethod="CopyB2T";checkMethod="FullCopyT2B";* [ Failure ]
crbug.com/dawn/1430 [ mac dawn-backend-validation ] webgpu:api,operation,command_buffer,image_copy:rowsPerImage_and_bytesPerRow:initMethod="WriteTexture";checkMethod="PartialCopyT2B";* [ Failure ]
################################################################################
# Large and slow tests
# KEEP