Implement new formula for requiredBytesInCopy

Changed upstream in:
- https://github.com/gpuweb/gpuweb/pull/1014
- https://github.com/gpuweb/gpuweb/pull/1130

Note that in some of the cases where width==0 || height==0 || depth==0,
this increases the number of linear data bytes required for a copy.
Since this is a corner case, no deprecation logic is added.

Removes a duplicated copy of this logic in TestUtils.cpp.

Bug: dawn:520
Change-Id: I3b3d079c6ef316df7d95ba5c349bf8de4646fa4d
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/30741
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Kai Ninomiya <kainino@chromium.org>
This commit is contained in:
Kai Ninomiya 2020-10-23 21:21:33 +00:00 committed by Commit Bot service account
parent ca5aa235da
commit c9d0b492d5
10 changed files with 157 additions and 87 deletions

View File

@ -381,47 +381,53 @@ namespace dawn_native {
static_cast<uint64_t>(maxStart);
}
template <typename A, typename B>
DAWN_FORCE_INLINE uint64_t Safe32x32(A a, B b) {
static_assert(std::is_same<A, uint32_t>::value, "'a' must be uint32_t");
static_assert(std::is_same<B, uint32_t>::value, "'b' must be uint32_t");
return uint64_t(a) * uint64_t(b);
}
ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo,
const Extent3D& copySize,
uint32_t bytesPerRow,
uint32_t rowsPerImage) {
ASSERT(copySize.width % blockInfo.width == 0);
ASSERT(copySize.height % blockInfo.height == 0);
uint32_t widthInBlocks = copySize.width / blockInfo.width;
uint32_t heightInBlocks = copySize.height / blockInfo.height;
uint64_t bytesInLastRow = Safe32x32(widthInBlocks, blockInfo.byteSize);
uint64_t bytesPerImage = Safe32x32(bytesPerRow, rowsPerImage);
// Default value for rowsPerImage
if (rowsPerImage == 0) {
rowsPerImage = heightInBlocks;
}
ASSERT(rowsPerImage >= heightInBlocks);
if (copySize.height > 1 || copySize.depth > 1) {
ASSERT(bytesPerRow >= copySize.width / blockInfo.width * blockInfo.byteSize);
}
if (copySize.width == 0 || copySize.height == 0 || copySize.depth == 0) {
if (copySize.depth == 0) {
return 0;
}
ASSERT(copySize.height >= 1);
ASSERT(copySize.depth >= 1);
// bytesPerImage won't overflow since we're multiplying two uint32_t numbers
uint64_t bytesPerImage = uint64_t(rowsPerImage) * bytesPerRow;
// Provided that copySize.height > 1: bytesInLastSlice won't overflow since it's at most
// bytesPerImage. Otherwise the result is a multiplication of two uint32_t numbers.
uint64_t bytesInLastSlice =
uint64_t(bytesPerRow) * (heightInBlocks - 1) +
(uint64_t(copySize.width) / blockInfo.width * blockInfo.byteSize);
// This error cannot be thrown for copySize.depth = 1.
// For copySize.depth > 1 we know that:
// requiredBytesInCopy >= (copySize.depth * bytesPerImage) / 2, so if
// copySize.depth * bytesPerImage overflows uint64_t, then requiredBytesInCopy is definitely
// too large to fit in the available data size.
if (std::numeric_limits<uint64_t>::max() / copySize.depth < bytesPerImage) {
return DAWN_VALIDATION_ERROR("requiredBytesInCopy is too large");
// Check for potential overflows for the rest of the computations. We have the following
// inequalities:
//
// lastRowBytes <= bytesPerRow
// heightInBlocks <= rowsPerImage
//
// So:
//
// bytesInLastImage = bytesPerRow * (heightInBlocks - 1) + bytesInLastRow
// <= bytesPerRow * heightInBlocks
// <= bytesPerRow * rowsPerImage
// <= bytesPerImage
//
// This means that if the computation of depth * bytesPerImage doesn't overflow, none of the
// computations for requiredBytesInCopy will. (and it's not a very pessimizing check)
if (bytesPerImage > std::numeric_limits<uint64_t>::max() / copySize.depth) {
return DAWN_VALIDATION_ERROR("requiredBytesInCopy is too large.");
}
return bytesPerImage * (copySize.depth - 1) + bytesInLastSlice;
uint64_t requiredBytesInCopy = bytesPerImage * (copySize.depth - 1);
if (heightInBlocks > 0) {
uint64_t bytesInLastImage = Safe32x32(bytesPerRow, heightInBlocks - 1) + bytesInLastRow;
requiredBytesInCopy += bytesInLastImage;
}
return requiredBytesInCopy;
}
MaybeError ValidateCopySizeFitsInBuffer(const Ref<BufferBase>& buffer,
@ -436,7 +442,7 @@ namespace dawn_native {
return {};
}
MaybeError ValidateLinearTextureData(const TextureDataLayout& layout,
MaybeError ValidateLinearTextureData(TextureDataLayout layout,
uint64_t byteSize,
const TexelBlockInfo& blockInfo,
const Extent3D& copyExtent) {
@ -445,19 +451,32 @@ namespace dawn_native {
return DAWN_VALIDATION_ERROR("Offset must be a multiple of the texel or block size");
}
ASSERT(copyExtent.width % blockInfo.width == 0);
uint32_t widthInBlocks = copyExtent.width / blockInfo.width;
ASSERT(copyExtent.height % blockInfo.height == 0);
uint32_t heightInBlocks = copyExtent.height / blockInfo.height;
// Default value for rowsPerImage
if (layout.rowsPerImage == 0) {
layout.rowsPerImage = heightInBlocks;
}
// Validation for other members in layout:
if ((copyExtent.height > 1 || copyExtent.depth > 1) &&
layout.bytesPerRow < copyExtent.width / blockInfo.width * blockInfo.byteSize) {
return DAWN_VALIDATION_ERROR(
"bytesPerRow must not be less than the number of bytes per row");
ASSERT(Safe32x32(widthInBlocks, blockInfo.byteSize) <=
std::numeric_limits<uint32_t>::max());
uint32_t lastRowBytes = widthInBlocks * blockInfo.byteSize;
if (lastRowBytes > layout.bytesPerRow) {
if (copyExtent.height > 1 || copyExtent.depth > 1) {
return DAWN_VALIDATION_ERROR("The byte size of a row must be <= bytesPerRow.");
} else {
// bytesPerRow is unused. Populate it with a valid value for later validation.
layout.bytesPerRow = lastRowBytes;
}
}
// TODO(tommek@google.com): to match the spec there should be another condition here
// on rowsPerImage >= copyExtent.height if copyExtent.depth > 1.
ASSERT(copyExtent.height % blockInfo.height == 0);
uint32_t heightInBlocks = copyExtent.height / blockInfo.height;
// Validation for the copy being in-bounds:
if (layout.rowsPerImage != 0 && layout.rowsPerImage < heightInBlocks) {
return DAWN_VALIDATION_ERROR(
@ -476,7 +495,7 @@ namespace dawn_native {
layout.offset <= byteSize && (requiredBytesInCopy <= (byteSize - layout.offset));
if (!fitsInData) {
return DAWN_VALIDATION_ERROR(
"Required size for texture data layout exceeds the given size");
"Required size for texture data layout exceeds the linear data size.");
}
return {};

View File

@ -51,7 +51,7 @@ namespace dawn_native {
uint32_t bytesPerRow,
uint32_t rowsPerImage);
MaybeError ValidateLinearTextureData(const TextureDataLayout& layout,
MaybeError ValidateLinearTextureData(TextureDataLayout layout,
uint64_t byteSize,
const TexelBlockInfo& blockInfo,
const Extent3D& copyExtent);

View File

@ -27,6 +27,7 @@
#include "utils/PlatformDebugLogger.h"
#include "utils/SystemUtils.h"
#include "utils/TerribleCommandBuffer.h"
#include "utils/TestUtils.h"
#include "utils/WGPUHelpers.h"
#include <algorithm>
@ -851,7 +852,10 @@ std::ostringstream& DawnTestBase::AddTextureExpectationImpl(const char* file,
ASSERT(bytesPerRow == Align(bytesPerRow, kTextureBytesPerRowAlignment));
}
uint32_t size = bytesPerRow * (height - 1) + width * dataSize;
uint32_t rowsPerImage = height;
uint32_t depth = 1;
uint32_t size =
utils::RequiredBytesInCopy(bytesPerRow, rowsPerImage, width, height, depth, dataSize);
// TODO(enga): We should have the map async alignment in Contants.h. Also, it should change to 8
// for Float64Array.

View File

@ -864,9 +864,9 @@ TEST_P(BufferZeroInitTest, CopyBufferToTexture) {
const wgpu::TextureCopyView textureCopyView =
utils::CreateTextureCopyView(texture, 0, {0, 0, 0});
const uint32_t requiredBufferSizeForCopy = utils::GetBytesInBufferTextureCopy(
kTextureFormat, kTextureSize.width, kTextureBytesPerRowAlignment, kTextureSize.width,
kTextureSize.depth);
const uint32_t rowsPerImage = kTextureSize.height;
const uint32_t requiredBufferSizeForCopy = utils::RequiredBytesInCopy(
kTextureBytesPerRowAlignment, rowsPerImage, kTextureSize, kTextureFormat);
constexpr wgpu::BufferUsage kBufferUsage =
wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;

View File

@ -52,18 +52,18 @@ class CompressedTextureBCFormatTest : public DawnTest {
std::vector<uint8_t> UploadData(const CopyConfig& copyConfig) {
uint32_t copyWidthInBlock = copyConfig.copyExtent3D.width / kBCBlockWidthInTexels;
uint32_t copyHeightInBlock = copyConfig.copyExtent3D.height / kBCBlockHeightInTexels;
uint32_t rowPitchInBytes = 0;
uint32_t copyBytesPerRow = 0;
if (copyConfig.bytesPerRowAlignment != 0) {
rowPitchInBytes = copyConfig.bytesPerRowAlignment;
copyBytesPerRow = copyConfig.bytesPerRowAlignment;
} else {
rowPitchInBytes = copyWidthInBlock *
copyBytesPerRow = copyWidthInBlock *
utils::GetTexelBlockSizeInBytes(copyConfig.textureDescriptor.format);
}
uint32_t copyRowsPerImage = copyConfig.rowsPerImage;
if (copyRowsPerImage == 0) {
copyRowsPerImage = copyHeightInBlock;
}
uint32_t copyBytesPerImage = rowPitchInBytes * copyRowsPerImage;
uint32_t copyBytesPerImage = copyBytesPerRow * copyRowsPerImage;
uint32_t uploadBufferSize =
copyConfig.bufferOffset + copyBytesPerImage * copyConfig.copyExtent3D.depth;
@ -75,7 +75,7 @@ class CompressedTextureBCFormatTest : public DawnTest {
for (uint32_t h = 0; h < copyHeightInBlock; ++h) {
for (uint32_t w = 0; w < copyWidthInBlock; ++w) {
uint32_t uploadBufferOffset = copyConfig.bufferOffset +
copyBytesPerImage * layer + rowPitchInBytes * h +
copyBytesPerImage * layer + copyBytesPerRow * h +
oneBlockCompressedTextureData.size() * w;
std::memcpy(&data[uploadBufferOffset], oneBlockCompressedTextureData.data(),
oneBlockCompressedTextureData.size() * sizeof(uint8_t));

View File

@ -57,13 +57,14 @@ class CopyTests : public DawnTest {
}
static BufferSpec MinimumBufferSpec(uint32_t width,
uint32_t rowsPerImage,
uint32_t height,
uint32_t arrayLayer = 1,
bool testZeroRowsPerImage = true) {
const uint32_t bytesPerRow = utils::GetMinimumBytesPerRow(kTextureFormat, width);
const uint32_t totalBufferSize = utils::GetBytesInBufferTextureCopy(
kTextureFormat, width, bytesPerRow, rowsPerImage, arrayLayer);
uint32_t appliedRowsPerImage = testZeroRowsPerImage ? 0 : rowsPerImage;
const uint32_t rowsPerImage = height;
const uint32_t totalBufferSize = utils::RequiredBytesInCopy(
bytesPerRow, rowsPerImage, {width, height, arrayLayer}, kTextureFormat);
uint32_t appliedRowsPerImage = testZeroRowsPerImage ? 0 : height;
return {totalBufferSize, 0, bytesPerRow, appliedRowsPerImage};
}

View File

@ -145,10 +145,11 @@ TEST_P(TextureZeroInitTest, CopyMultipleTextureArrayLayersToBufferSource) {
wgpu::Texture texture = device.CreateTexture(&descriptor);
const uint32_t bytesPerRow = utils::GetMinimumBytesPerRow(kColorFormat, kSize);
const uint32_t rowsPerImage = kSize;
wgpu::BufferDescriptor bufferDescriptor;
bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
bufferDescriptor.size =
utils::GetBytesInBufferTextureCopy(kColorFormat, kSize, bytesPerRow, kSize, kArrayLayers);
bufferDescriptor.size = utils::RequiredBytesInCopy(bytesPerRow, rowsPerImage,
{kSize, kSize, kArrayLayers}, kColorFormat);
wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor);
const wgpu::BufferCopyView bufferCopyView =

View File

@ -923,6 +923,34 @@ TEST_F(CopyCommandTest_T2B, Success) {
}
}
// Edge cases around requiredBytesInCopy computation for empty copies
TEST_F(CopyCommandTest_T2B, Empty) {
wgpu::Texture source =
Create2DTexture(16, 16, 1, 2, wgpu::TextureFormat::RGBA8Unorm, wgpu::TextureUsage::CopySrc);
TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
CreateBuffer(0, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 0, 0});
TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
CreateBuffer(0, wgpu::BufferUsage::CopyDst), 0, 256, 4, {4, 0, 0});
TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
CreateBuffer(0, wgpu::BufferUsage::CopyDst), 0, 256, 4, {4, 4, 0});
TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
CreateBuffer(1024, wgpu::BufferUsage::CopyDst), 0, 256, 4, {4, 0, 2});
TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0},
CreateBuffer(1023, wgpu::BufferUsage::CopyDst), 0, 256, 4, {4, 0, 2});
TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
CreateBuffer(1792, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 4, 2});
TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0},
CreateBuffer(1791, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 4, 2});
TestT2BCopy(utils::Expectation::Success, source, 0, {0, 0, 0},
CreateBuffer(1024, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 0, 2});
TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0},
CreateBuffer(1023, wgpu::BufferUsage::CopyDst), 0, 256, 4, {0, 0, 2});
}
// Test OOB conditions on the texture
TEST_F(CopyCommandTest_T2B, OutOfBoundsOnTexture) {
uint64_t bufferSize = BufferSizeForTextureCopy(4, 4, 1);

View File

@ -25,27 +25,18 @@
namespace utils {
uint32_t GetMinimumBytesPerRow(wgpu::TextureFormat format, uint32_t width) {
const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(format);
return Align(bytesPerTexel * width, kTextureBytesPerRowAlignment);
const uint32_t bytesPerBlock = utils::GetTexelBlockSizeInBytes(format);
return Align(bytesPerBlock * width, kTextureBytesPerRowAlignment);
}
uint32_t GetBytesInBufferTextureCopy(wgpu::TextureFormat format,
uint32_t width,
uint32_t bytesPerRow,
uint32_t rowsPerImage,
uint32_t copyArrayLayerCount) {
ASSERT(rowsPerImage > 0);
const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(format);
const uint32_t bytesAtLastImage = bytesPerRow * (rowsPerImage - 1) + bytesPerTexel * width;
return bytesPerRow * rowsPerImage * (copyArrayLayerCount - 1) + bytesAtLastImage;
}
// TODO(jiawei.shao@intel.com): support compressed texture formats
TextureDataCopyLayout GetTextureDataCopyLayoutForTexture2DAtLevel(
wgpu::TextureFormat format,
wgpu::Extent3D textureSizeAtLevel0,
uint32_t mipmapLevel,
uint32_t rowsPerImage) {
// TODO(jiawei.shao@intel.com): support compressed texture formats
ASSERT(utils::GetTextureFormatBlockWidth(format) == 1);
TextureDataCopyLayout layout;
layout.mipSize = {textureSizeAtLevel0.width >> mipmapLevel,
@ -56,9 +47,17 @@ namespace utils {
uint32_t appliedRowsPerImage = rowsPerImage > 0 ? rowsPerImage : layout.mipSize.height;
layout.bytesPerImage = layout.bytesPerRow * appliedRowsPerImage;
layout.byteLength =
GetBytesInBufferTextureCopy(format, layout.mipSize.width, layout.bytesPerRow,
appliedRowsPerImage, textureSizeAtLevel0.depth);
// TODO(kainino@chromium.org): Remove this intermediate variable.
// It is currently needed because of an issue in the D3D12 copy splitter
// (or maybe in D3D12 itself?) which requires there to be enough room in the
// buffer for the last image to have a height of `rowsPerImage` instead of
// the actual height.
wgpu::Extent3D mipSizeWithHeightWorkaround = layout.mipSize;
mipSizeWithHeightWorkaround.height =
appliedRowsPerImage * utils::GetTextureFormatBlockHeight(format);
layout.byteLength = RequiredBytesInCopy(layout.bytesPerRow, appliedRowsPerImage,
mipSizeWithHeightWorkaround, format);
const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(format);
layout.texelBlocksPerRow = layout.bytesPerRow / bytesPerTexel;
@ -72,18 +71,35 @@ namespace utils {
uint64_t rowsPerImage,
wgpu::Extent3D copyExtent,
wgpu::TextureFormat textureFormat) {
if (copyExtent.width == 0 || copyExtent.height == 0 || copyExtent.depth == 0) {
return 0;
} else {
uint32_t blockSize = utils::GetTexelBlockSizeInBytes(textureFormat);
uint32_t blockWidth = utils::GetTextureFormatBlockWidth(textureFormat);
uint32_t blockHeight = utils::GetTextureFormatBlockHeight(textureFormat);
uint32_t blockSize = utils::GetTexelBlockSizeInBytes(textureFormat);
uint32_t blockWidth = utils::GetTextureFormatBlockWidth(textureFormat);
uint32_t blockHeight = utils::GetTextureFormatBlockHeight(textureFormat);
ASSERT(copyExtent.width % blockWidth == 0);
uint32_t widthInBlocks = copyExtent.width / blockWidth;
ASSERT(copyExtent.height % blockHeight == 0);
uint32_t heightInBlocks = copyExtent.height / blockHeight;
return RequiredBytesInCopy(bytesPerRow, rowsPerImage, widthInBlocks, heightInBlocks,
copyExtent.depth, blockSize);
}
uint64_t bytesPerImage = bytesPerRow * rowsPerImage;
uint64_t bytesInLastSlice = bytesPerRow * (copyExtent.height / blockHeight - 1) +
(copyExtent.width / blockWidth * blockSize);
return bytesPerImage * (copyExtent.depth - 1) + bytesInLastSlice;
uint64_t RequiredBytesInCopy(uint64_t bytesPerRow,
uint64_t rowsPerImage,
uint64_t widthInBlocks,
uint64_t heightInBlocks,
uint64_t depth,
uint64_t bytesPerBlock) {
if (depth == 0) {
return 0;
}
uint64_t bytesPerImage = bytesPerRow * rowsPerImage;
uint64_t requiredBytesInCopy = bytesPerImage * (depth - 1);
if (heightInBlocks != 0) {
uint64_t lastRowBytes = widthInBlocks * bytesPerBlock;
uint64_t lastImageBytes = bytesPerRow * (heightInBlocks - 1) + lastRowBytes;
requiredBytesInCopy += lastImageBytes;
}
return requiredBytesInCopy;
}
uint64_t GetTexelCountInCopyRegion(uint64_t bytesPerRow,

View File

@ -30,11 +30,6 @@ namespace utils {
};
uint32_t GetMinimumBytesPerRow(wgpu::TextureFormat format, uint32_t width);
uint32_t GetBytesInBufferTextureCopy(wgpu::TextureFormat format,
uint32_t width,
uint32_t bytesPerRow,
uint32_t rowsPerImage,
uint32_t copyArrayLayerCount);
TextureDataCopyLayout GetTextureDataCopyLayoutForTexture2DAtLevel(
wgpu::TextureFormat format,
wgpu::Extent3D textureSizeAtLevel0,
@ -45,6 +40,12 @@ namespace utils {
uint64_t rowsPerImage,
wgpu::Extent3D copyExtent,
wgpu::TextureFormat textureFormat);
uint64_t RequiredBytesInCopy(uint64_t bytesPerRow,
uint64_t rowsPerImage,
uint64_t widthInBlocks,
uint64_t heightInBlocks,
uint64_t depth,
uint64_t bytesPerBlock);
uint64_t GetTexelCountInCopyRegion(uint64_t bytesPerRow,
uint64_t rowsPerImage,