Workaround Metal Validation Issue for copying 3D textures to a buffer.
The metal validation layer complains when copying from a 3D texture to a buffer if the stride for the destination buffer is larger then 2048 bytes. Bug: dawn:1430 Change-Id: I6ba4508d71610c35dfb0fab7d2bebc91d37504e3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/113426 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Gregg Tavares <gman@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: Brandon Jones <bajones@chromium.org>
This commit is contained in:
parent
0244804193
commit
6609f9cfb9
|
@ -15,6 +15,7 @@
|
|||
#ifndef SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
|
||||
#define SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
|
||||
|
||||
#include "dawn/common/StackContainer.h"
|
||||
#include "dawn/native/dawn_platform.h"
|
||||
#include "dawn/native/metal/DeviceMTL.h"
|
||||
#include "dawn/native/metal/ShaderModuleMTL.h"
|
||||
|
@ -34,9 +35,21 @@ namespace dawn::native::metal {
|
|||
MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction);
|
||||
|
||||
struct TextureBufferCopySplit {
|
||||
static constexpr uint32_t kMaxTextureBufferCopyRegions = 3;
|
||||
// Avoid allocations except in the worse case. Most cases require at most 3 regions.
|
||||
static constexpr uint32_t kNumCommonTextureBufferCopyRegions = 3;
|
||||
|
||||
struct CopyInfo {
|
||||
CopyInfo(NSUInteger bufferOffset,
|
||||
NSUInteger bytesPerRow,
|
||||
NSUInteger bytesPerImage,
|
||||
Origin3D textureOrigin,
|
||||
Extent3D copyExtent)
|
||||
: bufferOffset(bufferOffset),
|
||||
bytesPerRow(bytesPerRow),
|
||||
bytesPerImage(bytesPerImage),
|
||||
textureOrigin(textureOrigin),
|
||||
copyExtent(copyExtent) {}
|
||||
|
||||
NSUInteger bufferOffset;
|
||||
NSUInteger bytesPerRow;
|
||||
NSUInteger bytesPerImage;
|
||||
|
@ -44,12 +57,11 @@ struct TextureBufferCopySplit {
|
|||
Extent3D copyExtent;
|
||||
};
|
||||
|
||||
uint32_t count = 0;
|
||||
std::array<CopyInfo, kMaxTextureBufferCopyRegions> copies;
|
||||
StackVector<CopyInfo, kNumCommonTextureBufferCopyRegions> copies;
|
||||
|
||||
auto begin() const { return copies.begin(); }
|
||||
|
||||
auto end() const { return copies.begin() + count; }
|
||||
auto begin() const { return copies->begin(); }
|
||||
auto end() const { return copies->end(); }
|
||||
void push_back(const CopyInfo& copyInfo) { copies->push_back(copyInfo); }
|
||||
};
|
||||
|
||||
TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
||||
|
|
|
@ -179,12 +179,15 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
|||
const Format textureFormat = texture->GetFormat();
|
||||
const TexelBlockInfo& blockInfo = textureFormat.GetAspectInfo(aspect).block;
|
||||
|
||||
// When copying textures from/to an unpacked buffer, the Metal validation layer doesn't
|
||||
// compute the correct range when checking if the buffer is big enough to contain the
|
||||
// data for the whole copy. Instead of looking at the position of the last texel in the
|
||||
// buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage /
|
||||
// format.blockHeight) * copySize.depthOrArrayLayers. For example considering the pixel
|
||||
// buffer below where in memory, each row data (D) of the texture is followed by some
|
||||
// When copying textures from/to an unpacked buffer, the Metal validation layer has 3
|
||||
// issues.
|
||||
//
|
||||
// 1. The metal validation layer doesn't compute the correct range when checking if the
|
||||
// buffer is big enough to contain the data for the whole copy. Instead of looking at
|
||||
// the position of the last texel in the buffer, it computes the volume of the 3D box
|
||||
// with bytesPerRow * (rowsPerImage / format.blockHeight) * copySize.depthOrArrayLayers.
|
||||
// For example considering the pixel buffer below where in memory, each row data (D) of
|
||||
// the texture is followed by some
|
||||
// padding data (P):
|
||||
// |DDDDDDD|PP|
|
||||
// |DDDDDDD|PP|
|
||||
|
@ -196,6 +199,28 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
|||
|
||||
// We work around this limitation by detecting when Metal would complain and copy the
|
||||
// last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
|
||||
|
||||
// 2. Metal requires `destinationBytesPerRow` is less than or equal to the size
|
||||
// of the maximum texture dimension in bytes.
|
||||
|
||||
// 3. Some Metal Drivers (Intel Pre MacOS 13.1?) Incorrectly calculation the size
|
||||
// needed for the destination buffer. Their calculation is something like
|
||||
//
|
||||
// sizeNeeded = bufferOffset + desintationBytesPerImage * numImages +
|
||||
// destinationBytesPerRow * (numRows - 1) +
|
||||
// bytesPerPixel * width
|
||||
//
|
||||
// where as it should be
|
||||
//
|
||||
// sizeNeeded = bufferOffset + desintationBytesPerImage * (numImages - 1) +
|
||||
// destinationBytesPerRow * (numRows - 1) +
|
||||
// bytesPerPixel * width
|
||||
//
|
||||
// since you won't actually go to the next image if there is only 1 image.
|
||||
//
|
||||
// The workaround is if you're only copying a single row then pass 0 for
|
||||
// destinationBytesPerImage
|
||||
|
||||
uint32_t bytesPerImage = bytesPerRow * rowsPerImage;
|
||||
|
||||
// Metal validation layer requires that if the texture's pixel format is a compressed
|
||||
|
@ -205,32 +230,63 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
|||
const Extent3D clampedCopyExtent =
|
||||
texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent);
|
||||
|
||||
// Check whether buffer size is big enough.
|
||||
bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers;
|
||||
if (!needWorkaround) {
|
||||
copy.count = 1;
|
||||
copy.copies[0].bufferOffset = bufferOffset;
|
||||
copy.copies[0].bytesPerRow = bytesPerRow;
|
||||
copy.copies[0].bytesPerImage = bytesPerImage;
|
||||
copy.copies[0].textureOrigin = origin;
|
||||
copy.copies[0].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
|
||||
copyExtent.depthOrArrayLayers};
|
||||
// Note: all current GPUs have a 3D texture size limit of 2048 and otherwise 16348
|
||||
// for non-3D textures except for Apple2 GPUs (iPhone6) which has a non-3D texture
|
||||
// limit of 8192. Dawn doesn't support Apple2 GPUs
|
||||
// See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||
const uint32_t kMetalMax3DTextureDimensions = 2048u;
|
||||
const uint32_t kMetalMaxNon3DTextureDimensions = 16384u;
|
||||
uint32_t maxTextureDimension = texture->GetDimension() == wgpu::TextureDimension::e3D
|
||||
? kMetalMax3DTextureDimensions
|
||||
: kMetalMaxNon3DTextureDimensions;
|
||||
uint32_t bytesPerPixel = blockInfo.byteSize;
|
||||
uint32_t maxBytesPerRow = maxTextureDimension * bytesPerPixel;
|
||||
|
||||
bool needCopyRowByRow = bytesPerRow > maxBytesPerRow;
|
||||
if (needCopyRowByRow) {
|
||||
// handle workaround case 2
|
||||
// Since we're copying a row at a time bytesPerRow shouldn't matter but just to
|
||||
// try to have it make sense, pass correct or max valid value
|
||||
const uint32_t localBytesPerRow = std::min(bytesPerRow, maxBytesPerRow);
|
||||
const uint32_t localBytesPerImage = 0; // workaround case 3
|
||||
ASSERT(copyExtent.height % blockInfo.height == 0);
|
||||
ASSERT(copyExtent.width % blockInfo.width == 0);
|
||||
const uint32_t blockRows = copyExtent.height / blockInfo.height;
|
||||
for (uint32_t slice = 0; slice < copyExtent.depthOrArrayLayers; ++slice) {
|
||||
for (uint32_t blockRow = 0; blockRow < blockRows; ++blockRow) {
|
||||
copy.push_back(TextureBufferCopySplit::CopyInfo(
|
||||
bufferOffset + slice * rowsPerImage * bytesPerRow + blockRow * bytesPerRow,
|
||||
localBytesPerRow, localBytesPerImage,
|
||||
{origin.x, origin.y + blockRow * blockInfo.height, origin.z + slice},
|
||||
{clampedCopyExtent.width, blockInfo.height, 1}));
|
||||
}
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
// Check whether buffer size is big enough.
|
||||
bool needCopyLastImageAndLastRowSeparately =
|
||||
bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers;
|
||||
if (!needCopyLastImageAndLastRowSeparately) {
|
||||
const uint32_t localBytesPerImage =
|
||||
copyExtent.depthOrArrayLayers == 1 ? 0 : bytesPerImage; // workaround case 3
|
||||
copy.push_back(TextureBufferCopySplit::CopyInfo(
|
||||
bufferOffset, bytesPerRow, localBytesPerImage, origin,
|
||||
{clampedCopyExtent.width, clampedCopyExtent.height, copyExtent.depthOrArrayLayers}));
|
||||
return copy;
|
||||
}
|
||||
|
||||
// handle workaround case 1
|
||||
uint64_t currentOffset = bufferOffset;
|
||||
|
||||
// Doing all the copy except the last image.
|
||||
if (copyExtent.depthOrArrayLayers > 1) {
|
||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
||||
copy.copies[copy.count].bytesPerRow = bytesPerRow;
|
||||
copy.copies[copy.count].bytesPerImage = bytesPerImage;
|
||||
copy.copies[copy.count].textureOrigin = origin;
|
||||
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
|
||||
copyExtent.depthOrArrayLayers - 1};
|
||||
|
||||
++copy.count;
|
||||
|
||||
const uint32_t localDepthOrArrayLayers = copyExtent.depthOrArrayLayers - 1;
|
||||
const uint32_t localBytesPerImage =
|
||||
localDepthOrArrayLayers == 1 ? 0 : bytesPerImage; // workaround case 3
|
||||
copy.push_back(TextureBufferCopySplit::CopyInfo(
|
||||
currentOffset, bytesPerRow, localBytesPerImage, origin,
|
||||
{clampedCopyExtent.width, clampedCopyExtent.height, localDepthOrArrayLayers}));
|
||||
// Update offset to copy to the last image.
|
||||
currentOffset += (copyExtent.depthOrArrayLayers - 1) * bytesPerImage;
|
||||
}
|
||||
|
@ -238,18 +294,13 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
|||
// Doing all the copy in last image except the last row.
|
||||
uint32_t copyBlockRowCount = copyExtent.height / blockInfo.height;
|
||||
if (copyBlockRowCount > 1) {
|
||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
||||
copy.copies[copy.count].bytesPerRow = bytesPerRow;
|
||||
copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
|
||||
copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
|
||||
origin.z + copyExtent.depthOrArrayLayers - 1};
|
||||
|
||||
ASSERT(copyExtent.height - blockInfo.height <
|
||||
texture->GetMipLevelSingleSubresourceVirtualSize(mipLevel).height);
|
||||
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width,
|
||||
copyExtent.height - blockInfo.height, 1};
|
||||
|
||||
++copy.count;
|
||||
const uint32_t localBytesPerImage = 0; // workaround case 3
|
||||
copy.push_back(TextureBufferCopySplit::CopyInfo(
|
||||
currentOffset, bytesPerRow, localBytesPerImage,
|
||||
{origin.x, origin.y, origin.z + copyExtent.depthOrArrayLayers - 1},
|
||||
{clampedCopyExtent.width, copyExtent.height - blockInfo.height, 1}));
|
||||
|
||||
// Update offset to copy to the last row.
|
||||
currentOffset += (copyBlockRowCount - 1) * bytesPerRow;
|
||||
|
@ -258,18 +309,16 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
|||
// Doing the last row copy with the exact number of bytes in last row.
|
||||
// Workaround this issue in a way just like the copy to a 1D texture.
|
||||
uint32_t lastRowDataSize = (copyExtent.width / blockInfo.width) * blockInfo.byteSize;
|
||||
uint32_t lastImageDataSize = 0; // workaround case 3
|
||||
uint32_t lastRowCopyExtentHeight =
|
||||
blockInfo.height + clampedCopyExtent.height - copyExtent.height;
|
||||
ASSERT(lastRowCopyExtentHeight <= blockInfo.height);
|
||||
|
||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
||||
copy.copies[copy.count].bytesPerRow = lastRowDataSize;
|
||||
copy.copies[copy.count].bytesPerImage = lastRowDataSize;
|
||||
copy.copies[copy.count].textureOrigin = {origin.x,
|
||||
origin.y + copyExtent.height - blockInfo.height,
|
||||
origin.z + copyExtent.depthOrArrayLayers - 1};
|
||||
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, lastRowCopyExtentHeight, 1};
|
||||
++copy.count;
|
||||
copy.push_back(
|
||||
TextureBufferCopySplit::CopyInfo(currentOffset, lastRowDataSize, lastImageDataSize,
|
||||
{origin.x, origin.y + copyExtent.height - blockInfo.height,
|
||||
origin.z + copyExtent.depthOrArrayLayers - 1},
|
||||
{clampedCopyExtent.width, lastRowCopyExtentHeight, 1}));
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
|
|
@ -119,12 +119,6 @@ crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextur
|
|||
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba16float";dstFormat="rgba16float";dimension="2d" [ Failure ]
|
||||
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba32float";dstFormat="rgba32float";dimension="2d" [ Failure ]
|
||||
|
||||
################################################################################
|
||||
# Failing with Metal validation layers
|
||||
################################################################################
|
||||
crbug.com/dawn/1430 [ mac dawn-backend-validation ] webgpu:api,operation,command_buffer,image_copy:rowsPerImage_and_bytesPerRow:initMethod="CopyB2T";checkMethod="FullCopyT2B";* [ Failure ]
|
||||
crbug.com/dawn/1430 [ mac dawn-backend-validation ] webgpu:api,operation,command_buffer,image_copy:rowsPerImage_and_bytesPerRow:initMethod="WriteTexture";checkMethod="PartialCopyT2B";* [ Failure ]
|
||||
|
||||
################################################################################
|
||||
# Large and slow tests
|
||||
# KEEP
|
||||
|
|
Loading…
Reference in New Issue