Workaround Metal Validation Issue for copying 3D textures to a buffer.
The metal validation layer complains when copying from a 3D texture to a buffer if the stride for the destination buffer is larger then 2048 bytes. Bug: dawn:1430 Change-Id: I6ba4508d71610c35dfb0fab7d2bebc91d37504e3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/113426 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Gregg Tavares <gman@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: Brandon Jones <bajones@chromium.org>
This commit is contained in:
parent
0244804193
commit
6609f9cfb9
|
@ -15,6 +15,7 @@
|
||||||
#ifndef SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
|
#ifndef SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
|
||||||
#define SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
|
#define SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_
|
||||||
|
|
||||||
|
#include "dawn/common/StackContainer.h"
|
||||||
#include "dawn/native/dawn_platform.h"
|
#include "dawn/native/dawn_platform.h"
|
||||||
#include "dawn/native/metal/DeviceMTL.h"
|
#include "dawn/native/metal/DeviceMTL.h"
|
||||||
#include "dawn/native/metal/ShaderModuleMTL.h"
|
#include "dawn/native/metal/ShaderModuleMTL.h"
|
||||||
|
@ -34,9 +35,21 @@ namespace dawn::native::metal {
|
||||||
MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction);
|
MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction);
|
||||||
|
|
||||||
struct TextureBufferCopySplit {
|
struct TextureBufferCopySplit {
|
||||||
static constexpr uint32_t kMaxTextureBufferCopyRegions = 3;
|
// Avoid allocations except in the worse case. Most cases require at most 3 regions.
|
||||||
|
static constexpr uint32_t kNumCommonTextureBufferCopyRegions = 3;
|
||||||
|
|
||||||
struct CopyInfo {
|
struct CopyInfo {
|
||||||
|
CopyInfo(NSUInteger bufferOffset,
|
||||||
|
NSUInteger bytesPerRow,
|
||||||
|
NSUInteger bytesPerImage,
|
||||||
|
Origin3D textureOrigin,
|
||||||
|
Extent3D copyExtent)
|
||||||
|
: bufferOffset(bufferOffset),
|
||||||
|
bytesPerRow(bytesPerRow),
|
||||||
|
bytesPerImage(bytesPerImage),
|
||||||
|
textureOrigin(textureOrigin),
|
||||||
|
copyExtent(copyExtent) {}
|
||||||
|
|
||||||
NSUInteger bufferOffset;
|
NSUInteger bufferOffset;
|
||||||
NSUInteger bytesPerRow;
|
NSUInteger bytesPerRow;
|
||||||
NSUInteger bytesPerImage;
|
NSUInteger bytesPerImage;
|
||||||
|
@ -44,12 +57,11 @@ struct TextureBufferCopySplit {
|
||||||
Extent3D copyExtent;
|
Extent3D copyExtent;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32_t count = 0;
|
StackVector<CopyInfo, kNumCommonTextureBufferCopyRegions> copies;
|
||||||
std::array<CopyInfo, kMaxTextureBufferCopyRegions> copies;
|
|
||||||
|
|
||||||
auto begin() const { return copies.begin(); }
|
auto begin() const { return copies->begin(); }
|
||||||
|
auto end() const { return copies->end(); }
|
||||||
auto end() const { return copies.begin() + count; }
|
void push_back(const CopyInfo& copyInfo) { copies->push_back(copyInfo); }
|
||||||
};
|
};
|
||||||
|
|
||||||
TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
||||||
|
|
|
@ -179,12 +179,15 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
||||||
const Format textureFormat = texture->GetFormat();
|
const Format textureFormat = texture->GetFormat();
|
||||||
const TexelBlockInfo& blockInfo = textureFormat.GetAspectInfo(aspect).block;
|
const TexelBlockInfo& blockInfo = textureFormat.GetAspectInfo(aspect).block;
|
||||||
|
|
||||||
// When copying textures from/to an unpacked buffer, the Metal validation layer doesn't
|
// When copying textures from/to an unpacked buffer, the Metal validation layer has 3
|
||||||
// compute the correct range when checking if the buffer is big enough to contain the
|
// issues.
|
||||||
// data for the whole copy. Instead of looking at the position of the last texel in the
|
//
|
||||||
// buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage /
|
// 1. The metal validation layer doesn't compute the correct range when checking if the
|
||||||
// format.blockHeight) * copySize.depthOrArrayLayers. For example considering the pixel
|
// buffer is big enough to contain the data for the whole copy. Instead of looking at
|
||||||
// buffer below where in memory, each row data (D) of the texture is followed by some
|
// the position of the last texel in the buffer, it computes the volume of the 3D box
|
||||||
|
// with bytesPerRow * (rowsPerImage / format.blockHeight) * copySize.depthOrArrayLayers.
|
||||||
|
// For example considering the pixel buffer below where in memory, each row data (D) of
|
||||||
|
// the texture is followed by some
|
||||||
// padding data (P):
|
// padding data (P):
|
||||||
// |DDDDDDD|PP|
|
// |DDDDDDD|PP|
|
||||||
// |DDDDDDD|PP|
|
// |DDDDDDD|PP|
|
||||||
|
@ -196,6 +199,28 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
||||||
|
|
||||||
// We work around this limitation by detecting when Metal would complain and copy the
|
// We work around this limitation by detecting when Metal would complain and copy the
|
||||||
// last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
|
// last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
|
||||||
|
|
||||||
|
// 2. Metal requires `destinationBytesPerRow` is less than or equal to the size
|
||||||
|
// of the maximum texture dimension in bytes.
|
||||||
|
|
||||||
|
// 3. Some Metal Drivers (Intel Pre MacOS 13.1?) Incorrectly calculation the size
|
||||||
|
// needed for the destination buffer. Their calculation is something like
|
||||||
|
//
|
||||||
|
// sizeNeeded = bufferOffset + desintationBytesPerImage * numImages +
|
||||||
|
// destinationBytesPerRow * (numRows - 1) +
|
||||||
|
// bytesPerPixel * width
|
||||||
|
//
|
||||||
|
// where as it should be
|
||||||
|
//
|
||||||
|
// sizeNeeded = bufferOffset + desintationBytesPerImage * (numImages - 1) +
|
||||||
|
// destinationBytesPerRow * (numRows - 1) +
|
||||||
|
// bytesPerPixel * width
|
||||||
|
//
|
||||||
|
// since you won't actually go to the next image if there is only 1 image.
|
||||||
|
//
|
||||||
|
// The workaround is if you're only copying a single row then pass 0 for
|
||||||
|
// destinationBytesPerImage
|
||||||
|
|
||||||
uint32_t bytesPerImage = bytesPerRow * rowsPerImage;
|
uint32_t bytesPerImage = bytesPerRow * rowsPerImage;
|
||||||
|
|
||||||
// Metal validation layer requires that if the texture's pixel format is a compressed
|
// Metal validation layer requires that if the texture's pixel format is a compressed
|
||||||
|
@ -205,32 +230,63 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
||||||
const Extent3D clampedCopyExtent =
|
const Extent3D clampedCopyExtent =
|
||||||
texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent);
|
texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent);
|
||||||
|
|
||||||
// Check whether buffer size is big enough.
|
// Note: all current GPUs have a 3D texture size limit of 2048 and otherwise 16348
|
||||||
bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers;
|
// for non-3D textures except for Apple2 GPUs (iPhone6) which has a non-3D texture
|
||||||
if (!needWorkaround) {
|
// limit of 8192. Dawn doesn't support Apple2 GPUs
|
||||||
copy.count = 1;
|
// See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||||
copy.copies[0].bufferOffset = bufferOffset;
|
const uint32_t kMetalMax3DTextureDimensions = 2048u;
|
||||||
copy.copies[0].bytesPerRow = bytesPerRow;
|
const uint32_t kMetalMaxNon3DTextureDimensions = 16384u;
|
||||||
copy.copies[0].bytesPerImage = bytesPerImage;
|
uint32_t maxTextureDimension = texture->GetDimension() == wgpu::TextureDimension::e3D
|
||||||
copy.copies[0].textureOrigin = origin;
|
? kMetalMax3DTextureDimensions
|
||||||
copy.copies[0].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
|
: kMetalMaxNon3DTextureDimensions;
|
||||||
copyExtent.depthOrArrayLayers};
|
uint32_t bytesPerPixel = blockInfo.byteSize;
|
||||||
|
uint32_t maxBytesPerRow = maxTextureDimension * bytesPerPixel;
|
||||||
|
|
||||||
|
bool needCopyRowByRow = bytesPerRow > maxBytesPerRow;
|
||||||
|
if (needCopyRowByRow) {
|
||||||
|
// handle workaround case 2
|
||||||
|
// Since we're copying a row at a time bytesPerRow shouldn't matter but just to
|
||||||
|
// try to have it make sense, pass correct or max valid value
|
||||||
|
const uint32_t localBytesPerRow = std::min(bytesPerRow, maxBytesPerRow);
|
||||||
|
const uint32_t localBytesPerImage = 0; // workaround case 3
|
||||||
|
ASSERT(copyExtent.height % blockInfo.height == 0);
|
||||||
|
ASSERT(copyExtent.width % blockInfo.width == 0);
|
||||||
|
const uint32_t blockRows = copyExtent.height / blockInfo.height;
|
||||||
|
for (uint32_t slice = 0; slice < copyExtent.depthOrArrayLayers; ++slice) {
|
||||||
|
for (uint32_t blockRow = 0; blockRow < blockRows; ++blockRow) {
|
||||||
|
copy.push_back(TextureBufferCopySplit::CopyInfo(
|
||||||
|
bufferOffset + slice * rowsPerImage * bytesPerRow + blockRow * bytesPerRow,
|
||||||
|
localBytesPerRow, localBytesPerImage,
|
||||||
|
{origin.x, origin.y + blockRow * blockInfo.height, origin.z + slice},
|
||||||
|
{clampedCopyExtent.width, blockInfo.height, 1}));
|
||||||
|
}
|
||||||
|
}
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check whether buffer size is big enough.
|
||||||
|
bool needCopyLastImageAndLastRowSeparately =
|
||||||
|
bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers;
|
||||||
|
if (!needCopyLastImageAndLastRowSeparately) {
|
||||||
|
const uint32_t localBytesPerImage =
|
||||||
|
copyExtent.depthOrArrayLayers == 1 ? 0 : bytesPerImage; // workaround case 3
|
||||||
|
copy.push_back(TextureBufferCopySplit::CopyInfo(
|
||||||
|
bufferOffset, bytesPerRow, localBytesPerImage, origin,
|
||||||
|
{clampedCopyExtent.width, clampedCopyExtent.height, copyExtent.depthOrArrayLayers}));
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle workaround case 1
|
||||||
uint64_t currentOffset = bufferOffset;
|
uint64_t currentOffset = bufferOffset;
|
||||||
|
|
||||||
// Doing all the copy except the last image.
|
// Doing all the copy except the last image.
|
||||||
if (copyExtent.depthOrArrayLayers > 1) {
|
if (copyExtent.depthOrArrayLayers > 1) {
|
||||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
const uint32_t localDepthOrArrayLayers = copyExtent.depthOrArrayLayers - 1;
|
||||||
copy.copies[copy.count].bytesPerRow = bytesPerRow;
|
const uint32_t localBytesPerImage =
|
||||||
copy.copies[copy.count].bytesPerImage = bytesPerImage;
|
localDepthOrArrayLayers == 1 ? 0 : bytesPerImage; // workaround case 3
|
||||||
copy.copies[copy.count].textureOrigin = origin;
|
copy.push_back(TextureBufferCopySplit::CopyInfo(
|
||||||
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
|
currentOffset, bytesPerRow, localBytesPerImage, origin,
|
||||||
copyExtent.depthOrArrayLayers - 1};
|
{clampedCopyExtent.width, clampedCopyExtent.height, localDepthOrArrayLayers}));
|
||||||
|
|
||||||
++copy.count;
|
|
||||||
|
|
||||||
// Update offset to copy to the last image.
|
// Update offset to copy to the last image.
|
||||||
currentOffset += (copyExtent.depthOrArrayLayers - 1) * bytesPerImage;
|
currentOffset += (copyExtent.depthOrArrayLayers - 1) * bytesPerImage;
|
||||||
}
|
}
|
||||||
|
@ -238,18 +294,13 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
||||||
// Doing all the copy in last image except the last row.
|
// Doing all the copy in last image except the last row.
|
||||||
uint32_t copyBlockRowCount = copyExtent.height / blockInfo.height;
|
uint32_t copyBlockRowCount = copyExtent.height / blockInfo.height;
|
||||||
if (copyBlockRowCount > 1) {
|
if (copyBlockRowCount > 1) {
|
||||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
|
||||||
copy.copies[copy.count].bytesPerRow = bytesPerRow;
|
|
||||||
copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
|
|
||||||
copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
|
|
||||||
origin.z + copyExtent.depthOrArrayLayers - 1};
|
|
||||||
|
|
||||||
ASSERT(copyExtent.height - blockInfo.height <
|
ASSERT(copyExtent.height - blockInfo.height <
|
||||||
texture->GetMipLevelSingleSubresourceVirtualSize(mipLevel).height);
|
texture->GetMipLevelSingleSubresourceVirtualSize(mipLevel).height);
|
||||||
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width,
|
const uint32_t localBytesPerImage = 0; // workaround case 3
|
||||||
copyExtent.height - blockInfo.height, 1};
|
copy.push_back(TextureBufferCopySplit::CopyInfo(
|
||||||
|
currentOffset, bytesPerRow, localBytesPerImage,
|
||||||
++copy.count;
|
{origin.x, origin.y, origin.z + copyExtent.depthOrArrayLayers - 1},
|
||||||
|
{clampedCopyExtent.width, copyExtent.height - blockInfo.height, 1}));
|
||||||
|
|
||||||
// Update offset to copy to the last row.
|
// Update offset to copy to the last row.
|
||||||
currentOffset += (copyBlockRowCount - 1) * bytesPerRow;
|
currentOffset += (copyBlockRowCount - 1) * bytesPerRow;
|
||||||
|
@ -258,18 +309,16 @@ TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
|
||||||
// Doing the last row copy with the exact number of bytes in last row.
|
// Doing the last row copy with the exact number of bytes in last row.
|
||||||
// Workaround this issue in a way just like the copy to a 1D texture.
|
// Workaround this issue in a way just like the copy to a 1D texture.
|
||||||
uint32_t lastRowDataSize = (copyExtent.width / blockInfo.width) * blockInfo.byteSize;
|
uint32_t lastRowDataSize = (copyExtent.width / blockInfo.width) * blockInfo.byteSize;
|
||||||
|
uint32_t lastImageDataSize = 0; // workaround case 3
|
||||||
uint32_t lastRowCopyExtentHeight =
|
uint32_t lastRowCopyExtentHeight =
|
||||||
blockInfo.height + clampedCopyExtent.height - copyExtent.height;
|
blockInfo.height + clampedCopyExtent.height - copyExtent.height;
|
||||||
ASSERT(lastRowCopyExtentHeight <= blockInfo.height);
|
ASSERT(lastRowCopyExtentHeight <= blockInfo.height);
|
||||||
|
|
||||||
copy.copies[copy.count].bufferOffset = currentOffset;
|
copy.push_back(
|
||||||
copy.copies[copy.count].bytesPerRow = lastRowDataSize;
|
TextureBufferCopySplit::CopyInfo(currentOffset, lastRowDataSize, lastImageDataSize,
|
||||||
copy.copies[copy.count].bytesPerImage = lastRowDataSize;
|
{origin.x, origin.y + copyExtent.height - blockInfo.height,
|
||||||
copy.copies[copy.count].textureOrigin = {origin.x,
|
origin.z + copyExtent.depthOrArrayLayers - 1},
|
||||||
origin.y + copyExtent.height - blockInfo.height,
|
{clampedCopyExtent.width, lastRowCopyExtentHeight, 1}));
|
||||||
origin.z + copyExtent.depthOrArrayLayers - 1};
|
|
||||||
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, lastRowCopyExtentHeight, 1};
|
|
||||||
++copy.count;
|
|
||||||
|
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
|
@ -119,12 +119,6 @@ crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextur
|
||||||
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba16float";dstFormat="rgba16float";dimension="2d" [ Failure ]
|
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba16float";dstFormat="rgba16float";dimension="2d" [ Failure ]
|
||||||
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba32float";dstFormat="rgba32float";dimension="2d" [ Failure ]
|
crbug.com/dawn/1107 [ intel mac ] webgpu:api,operation,command_buffer,copyTextureToTexture:color_textures,non_compressed,non_array:srcFormat="rgba32float";dstFormat="rgba32float";dimension="2d" [ Failure ]
|
||||||
|
|
||||||
################################################################################
|
|
||||||
# Failing with Metal validation layers
|
|
||||||
################################################################################
|
|
||||||
crbug.com/dawn/1430 [ mac dawn-backend-validation ] webgpu:api,operation,command_buffer,image_copy:rowsPerImage_and_bytesPerRow:initMethod="CopyB2T";checkMethod="FullCopyT2B";* [ Failure ]
|
|
||||||
crbug.com/dawn/1430 [ mac dawn-backend-validation ] webgpu:api,operation,command_buffer,image_copy:rowsPerImage_and_bytesPerRow:initMethod="WriteTexture";checkMethod="PartialCopyT2B";* [ Failure ]
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# Large and slow tests
|
# Large and slow tests
|
||||||
# KEEP
|
# KEEP
|
||||||
|
|
Loading…
Reference in New Issue