Impl 3DTexture copy splitter for empty first row issue on D3D12

If there is an empty row at the beginning of a copy region due to
alignment adjustment, this copy region split by 2D texture splitter
will be definitely incorrect for 3D textures because every depth
slice (except the first slice) will wrongly skip one row. We need
to recompute this copy region via modifying this copy region and
adding a couple more copy regions for the empty first row issue.

The idea of recomputation is:
  - modify this copy region and don't copy the last row in order to
    make its bufferSize.height not exceed rowsPerImage,
  - use one more copy region to handle the last row of each depth
    slice except the last depth slice,
  - use another copy region to handle the last row of the last depth
    slice.

Bug: dawn:547

Change-Id: Ib2f6019963ed29d62a9f13d7316b5f04801db8c9
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/52341
Commit-Queue: Yunchao He <yunchao.he@intel.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Yunchao He 2021-06-08 19:56:17 +00:00 committed by Dawn LUCI CQ
parent f858843fa7
commit 84d200e6dd
3 changed files with 176 additions and 26 deletions

View File

@ -31,8 +31,26 @@ namespace dawn_native { namespace d3d12 {
return {byteOffsetX / blockInfo.byteSize * blockInfo.width, return {byteOffsetX / blockInfo.byteSize * blockInfo.width,
byteOffsetY / bytesPerRow * blockInfo.height, 0}; byteOffsetY / bytesPerRow * blockInfo.height, 0};
} }
uint64_t OffsetToFirstCopiedTexel(const TexelBlockInfo& blockInfo,
uint32_t bytesPerRow,
uint64_t alignedOffset,
Origin3D bufferOffset) {
ASSERT(bufferOffset.z == 0);
return alignedOffset + bufferOffset.x * blockInfo.byteSize / blockInfo.width +
bufferOffset.y * bytesPerRow / blockInfo.height;
}
uint64_t AlignDownForDataPlacement(uint32_t offset) {
return offset & ~static_cast<uint64_t>(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1);
}
} // namespace } // namespace
TextureCopySubresource::CopyInfo* TextureCopySubresource::AddCopy() {
ASSERT(this->count < kMaxTextureCopyRegions);
return &this->copies[this->count++];
}
TextureCopySubresource Compute2DTextureCopySubresource(Origin3D origin, TextureCopySubresource Compute2DTextureCopySubresource(Origin3D origin,
Extent3D copySize, Extent3D copySize,
const TexelBlockInfo& blockInfo, const TexelBlockInfo& blockInfo,
@ -44,8 +62,7 @@ namespace dawn_native { namespace d3d12 {
// The copies must be 512-aligned. To do this, we calculate the first 512-aligned address // The copies must be 512-aligned. To do this, we calculate the first 512-aligned address
// preceding our data. // preceding our data.
uint64_t alignedOffset = uint64_t alignedOffset = AlignDownForDataPlacement(offset);
offset & ~static_cast<uint64_t>(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1);
// If the provided offset to the data was already 512-aligned, we can simply copy the data // If the provided offset to the data was already 512-aligned, we can simply copy the data
// without further translation. // without further translation.
@ -182,8 +199,7 @@ namespace dawn_native { namespace d3d12 {
uint64_t offsetForCopy1 = uint64_t offsetForCopy1 =
offset + copy.copies[0].copySize.width / blockInfo.width * blockInfo.byteSize; offset + copy.copies[0].copySize.width / blockInfo.width * blockInfo.byteSize;
uint64_t alignedOffsetForCopy1 = uint64_t alignedOffsetForCopy1 = AlignDownForDataPlacement(offsetForCopy1);
offsetForCopy1 & ~static_cast<uint64_t>(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1);
Origin3D texelOffsetForCopy1 = ComputeTexelOffsets( Origin3D texelOffsetForCopy1 = ComputeTexelOffsets(
blockInfo, static_cast<uint32_t>(offsetForCopy1 - alignedOffsetForCopy1), bytesPerRow); blockInfo, static_cast<uint32_t>(offsetForCopy1 - alignedOffsetForCopy1), bytesPerRow);
@ -257,6 +273,148 @@ namespace dawn_native { namespace d3d12 {
return copies; return copies;
} }
void Recompute3DTextureCopyRegionsForBlockWithEmptyFirstRow(Origin3D origin,
Extent3D copySize,
const TexelBlockInfo& blockInfo,
uint32_t bytesPerRow,
uint32_t rowsPerImage,
TextureCopySubresource& copy,
uint32_t i) {
// Let's assign data and show why copy region generated by ComputeTextureCopySubresource
// is incorrect if there is an empty row at the beginning of the copy block.
// Assuming that bytesPerRow is 256 and we are doing a B2T copy, and copy size is {width: 2,
// height: 4, depthOrArrayLayers: 3}. Then the data layout in buffer is demonstrated
// as below:
//
// |<----- bytes per row ------>|
//
// |----------------------------|
// row (N - 1) | |
// row N | ++~~~~~~~~~|
// row (N + 1) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 2) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 3) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 4) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 5) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 6) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 7) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 8) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 9) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 10) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
// row (N + 11) |~~~~~~~~~~~~~~~~~++ |
// |----------------------------|
// The copy we mean to do is the following:
//
// - image 0: row N to row (N + 3),
// - image 1: row (N + 4) to row (N + 7),
// - image 2: row (N + 8) to row (N + 11).
//
// Note that alignedOffset is at the beginning of row (N - 1), while buffer offset makes
// the copy start at row N. Row (N - 1) is the empty row between alignedOffset and offset.
//
// The 2D copy region of image 0 we received from Compute2DTextureCopySubresource() is
// the following:
//
// |-------------------|
// row (N - 1) | |
// row N | ++|
// row (N + 1) |~~~~~~~~~~~~~~~~~++|
// row (N + 2) |~~~~~~~~~~~~~~~~~++|
// row (N + 3) |~~~~~~~~~~~~~~~~~++|
// |-------------------|
//
// However, if we simply expand the copy region of image 0 to all depth ranges of a 3D
// texture, we will copy 5 rows every time, and every first row of each slice will be
// skipped. As a result, the copied data will be:
//
// - image 0: row N to row (N + 3), which is correct. Row (N - 1) is skipped.
// - image 1: row (N + 5) to row (N + 8) because row (N + 4) is skipped. It is incorrect.
//
// Likewise, all other image followed will be incorrect because we wrongly keep skipping
// one row for each depth slice.
//
// Solution: split the copy region to two copies: copy 3 (rowsPerImage - 1) rows in and
// expand to all depth slices in the first copy. 3 rows + one skipped rows = 4 rows, which
// equals to rowsPerImage. Then copy the last row in the second copy. However, the copy
// block of the last row of the last image may out-of-bound (see the details below), so
// we need an extra copy for the very last row.
// Copy 0: copy 3 rows, not 4 rows.
// _____________________
// / /|
// / / |
// |-------------------| |
// row (N - 1) | | |
// row N | ++| |
// row (N + 1) |~~~~~~~~~~~~~~~~~++| /
// row (N + 2) |~~~~~~~~~~~~~~~~~++|/
// |-------------------|
// Copy 1: move down two rows and copy the last row on image 0, and expand to
// copySize.depthOrArrayLayers - 1 depth slices. Note that if we expand it to all depth
// slices, the last copy block will be row (N + 9) to row (N + 12). Row (N + 11) might
// be the last row of the entire buffer. Then row (N + 12) will be out-of-bound.
// _____________________
// / /|
// / / |
// |-------------------| |
// row (N + 1) | | |
// row (N + 2) | | |
// row (N + 3) | ++| /
// row (N + 4) |~~~~~~~~~~~~~~~~~~~|/
// |-------------------|
//
// copy 2: copy the last row of the last image.
// |-------------------|
// row (N + 11)| ++|
// |-------------------|
// Copy 0: copy copySize.height - 1 rows
TextureCopySubresource::CopyInfo& copy0 = copy.copies[i];
copy0.copySize.height = copySize.height - blockInfo.height;
copy0.bufferSize.height = rowsPerImage;
// Copy 1: move down 2 rows and copy the last row on image 0, and expand to all depth slices
// but the last one.
TextureCopySubresource::CopyInfo* copy1 = copy.AddCopy();
*copy1 = copy0;
copy1->alignedOffset += 2 * bytesPerRow;
copy1->textureOffset.y = copySize.height - blockInfo.height;
// Offset two rows from the copy height for the bufferOffset (See the figure above):
// - one for the row we advanced in the buffer: row (N + 4).
// - one for the last row we want to copy: row (N + 3) itself.
copy1->bufferOffset.y = copySize.height - 2 * blockInfo.height;
copy1->copySize.height = blockInfo.height;
copy1->copySize.depthOrArrayLayers--;
copy1->bufferSize.depthOrArrayLayers--;
// Copy 2: copy the last row of the last image.
uint64_t offsetForCopy0 = OffsetToFirstCopiedTexel(blockInfo, bytesPerRow,
copy0.alignedOffset, copy0.bufferOffset);
uint64_t offsetForLastRowOfLastImage =
offsetForCopy0 + bytesPerRow * (copy0.copySize.height +
rowsPerImage * (copySize.depthOrArrayLayers - 1));
uint64_t alignedOffsetForLastRowOfLastImage =
AlignDownForDataPlacement(offsetForLastRowOfLastImage);
Origin3D texelOffsetForLastRowOfLastImage = ComputeTexelOffsets(
blockInfo,
static_cast<uint32_t>(offsetForLastRowOfLastImage - alignedOffsetForLastRowOfLastImage),
bytesPerRow);
TextureCopySubresource::CopyInfo* copy2 = copy.AddCopy();
copy2->alignedOffset = alignedOffsetForLastRowOfLastImage;
copy2->textureOffset = copy1->textureOffset;
copy2->textureOffset.z = origin.z + copySize.depthOrArrayLayers - 1;
copy2->copySize = copy1->copySize;
copy2->copySize.depthOrArrayLayers = 1;
copy2->bufferOffset = texelOffsetForLastRowOfLastImage;
copy2->bufferSize.width = copy1->bufferSize.width;
ASSERT(copy2->copySize.height == 1);
copy2->bufferSize.height = copy2->bufferOffset.y + copy2->copySize.height;
copy2->bufferSize.depthOrArrayLayers = 1;
}
TextureCopySubresource Compute3DTextureCopySplits(Origin3D origin, TextureCopySubresource Compute3DTextureCopySplits(Origin3D origin,
Extent3D copySize, Extent3D copySize,
const TexelBlockInfo& blockInfo, const TexelBlockInfo& blockInfo,
@ -291,29 +449,27 @@ namespace dawn_native { namespace d3d12 {
return copySubresource; return copySubresource;
} }
bool needRecompute = false;
uint32_t rowsPerImageInTexels = rowsPerImage * blockInfo.height; uint32_t rowsPerImageInTexels = rowsPerImage * blockInfo.height;
for (uint32_t i = 0; i < copySubresource.count; ++i) { // The copy region(s) generated by Compute2DTextureCopySubresource might be incorrect.
// However, we may append a couple more copy regions in the for loop below. We don't need
// to revise these new added copy regions.
uint32_t originalCopyCount = copySubresource.count;
for (uint32_t i = 0; i < originalCopyCount; ++i) {
// There can be one empty row at most in a copy region. // There can be one empty row at most in a copy region.
ASSERT(copySubresource.copies[i].bufferSize.height <= rowsPerImage + blockInfo.height); ASSERT(copySubresource.copies[i].bufferSize.height <= rowsPerImage + blockInfo.height);
Extent3D& bufferSize = copySubresource.copies[i].bufferSize; Extent3D& bufferSize = copySubresource.copies[i].bufferSize;
if (bufferSize.height > rowsPerImageInTexels) { if (bufferSize.height > rowsPerImageInTexels) {
needRecompute = true; ASSERT(bytesPerRow == D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
} else if (bufferSize.height < rowsPerImageInTexels) { Recompute3DTextureCopyRegionsForBlockWithEmptyFirstRow(
origin, copySize, blockInfo, bytesPerRow, rowsPerImage, copySubresource, i);
// TODO(crbug.com/dawn/547): recompute copy regions when copySize.height is 1.
} else {
// If we are copying multiple depth slices, we should skip rowsPerImageInTexels rows // If we are copying multiple depth slices, we should skip rowsPerImageInTexels rows
// at least for each slice even though we only copy partial rows in each slice // for each slice even though we only copy partial rows in each slice sometimes.
// sometimes.
bufferSize.height = rowsPerImageInTexels; bufferSize.height = rowsPerImageInTexels;
} }
} }
if (!needRecompute) {
return copySubresource;
}
// TODO(yunchao.he@intel.com): recompute copy regions for special cases for 3D textures,
// and return the revised copy regions.
ASSERT(bytesPerRow == D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
return copySubresource; return copySubresource;
} }
}} // namespace dawn_native::d3d12 }} // namespace dawn_native::d3d12

View File

@ -28,7 +28,7 @@ namespace dawn_native {
namespace dawn_native { namespace d3d12 { namespace dawn_native { namespace d3d12 {
struct TextureCopySubresource { struct TextureCopySubresource {
static constexpr unsigned int kMaxTextureCopyRegions = 2; static constexpr unsigned int kMaxTextureCopyRegions = 4;
struct CopyInfo { struct CopyInfo {
uint64_t alignedOffset = 0; uint64_t alignedOffset = 0;
@ -39,6 +39,8 @@ namespace dawn_native { namespace d3d12 {
Extent3D copySize; Extent3D copySize;
}; };
CopyInfo* AddCopy();
uint32_t count = 0; uint32_t count = 0;
std::array<CopyInfo, kMaxTextureCopyRegions> copies; std::array<CopyInfo, kMaxTextureCopyRegions> copies;
}; };

View File

@ -1054,8 +1054,6 @@ TEST_P(CopyTests_T2B, Texture3DSubRegion) {
} }
TEST_P(CopyTests_T2B, Texture3DNoSplitRowDataWithEmptyFirstRow) { TEST_P(CopyTests_T2B, Texture3DNoSplitRowDataWithEmptyFirstRow) {
DAWN_TEST_UNSUPPORTED_IF(IsD3D12()); // TODO(crbug.com/dawn/547): Implement on D3D12.
constexpr uint32_t kWidth = 2; constexpr uint32_t kWidth = 2;
constexpr uint32_t kHeight = 4; constexpr uint32_t kHeight = 4;
constexpr uint32_t kDepth = 3; constexpr uint32_t kDepth = 3;
@ -1095,8 +1093,6 @@ TEST_P(CopyTests_T2B, Texture3DSplitRowDataWithoutEmptyFirstRow) {
} }
TEST_P(CopyTests_T2B, Texture3DSplitRowDataWithEmptyFirstRow) { TEST_P(CopyTests_T2B, Texture3DSplitRowDataWithEmptyFirstRow) {
DAWN_TEST_UNSUPPORTED_IF(IsD3D12()); // TODO(crbug.com/dawn/547): Implement on D3D12.
constexpr uint32_t kWidth = 39; constexpr uint32_t kWidth = 39;
constexpr uint32_t kHeight = 4; constexpr uint32_t kHeight = 4;
constexpr uint32_t kDepth = 3; constexpr uint32_t kDepth = 3;
@ -1637,8 +1633,6 @@ TEST_P(CopyTests_B2T, Texture3DSubRegion) {
} }
TEST_P(CopyTests_B2T, Texture3DNoSplitRowDataWithEmptyFirstRow) { TEST_P(CopyTests_B2T, Texture3DNoSplitRowDataWithEmptyFirstRow) {
DAWN_TEST_UNSUPPORTED_IF(IsD3D12()); // TODO(crbug.com/dawn/547): Implement on D3D12.
constexpr uint32_t kWidth = 2; constexpr uint32_t kWidth = 2;
constexpr uint32_t kHeight = 4; constexpr uint32_t kHeight = 4;
constexpr uint32_t kDepth = 3; constexpr uint32_t kDepth = 3;
@ -1678,8 +1672,6 @@ TEST_P(CopyTests_B2T, Texture3DSplitRowDataWithoutEmptyFirstRow) {
} }
TEST_P(CopyTests_B2T, Texture3DSplitRowDataWithEmptyFirstRow) { TEST_P(CopyTests_B2T, Texture3DSplitRowDataWithEmptyFirstRow) {
DAWN_TEST_UNSUPPORTED_IF(IsD3D12()); // TODO(crbug.com/dawn/547): Implement on D3D12.
constexpr uint32_t kWidth = 39; constexpr uint32_t kWidth = 39;
constexpr uint32_t kHeight = 4; constexpr uint32_t kHeight = 4;
constexpr uint32_t kDepth = 3; constexpr uint32_t kDepth = 3;