mirror of
				https://github.com/encounter/dawn-cmake.git
				synced 2025-10-26 11:40:29 +00:00 
			
		
		
		
	Impl 3DTexture copy splitter for empty first row issue on D3D12
If there is an empty row at the beginning of a copy region due to
alignment adjustment, this copy region split by 2D texture splitter
will be definitely incorrect for 3D textures because every depth
slice (except the first slice) will wrongly skip one row. We need
to recompute this copy region via modifying this copy region and
adding a couple more copy regions for the empty first row issue.
The idea of recomputation is:
  - modify this copy region and don't copy the last row in order to
    make its bufferSize.height not exceed rowsPerImage,
  - use one more copy region to handle the last row of each depth
    slice except the last depth slice,
  - use another copy region to handle the last row of the last depth
    slice.
Bug: dawn:547
Change-Id: Ib2f6019963ed29d62a9f13d7316b5f04801db8c9
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/52341
Commit-Queue: Yunchao He <yunchao.he@intel.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
			
			
This commit is contained in:
		
							parent
							
								
									f858843fa7
								
							
						
					
					
						commit
						84d200e6dd
					
				| @ -31,8 +31,26 @@ namespace dawn_native { namespace d3d12 { | ||||
|             return {byteOffsetX / blockInfo.byteSize * blockInfo.width, | ||||
|                     byteOffsetY / bytesPerRow * blockInfo.height, 0}; | ||||
|         } | ||||
| 
 | ||||
|         uint64_t OffsetToFirstCopiedTexel(const TexelBlockInfo& blockInfo, | ||||
|                                           uint32_t bytesPerRow, | ||||
|                                           uint64_t alignedOffset, | ||||
|                                           Origin3D bufferOffset) { | ||||
|             ASSERT(bufferOffset.z == 0); | ||||
|             return alignedOffset + bufferOffset.x * blockInfo.byteSize / blockInfo.width + | ||||
|                    bufferOffset.y * bytesPerRow / blockInfo.height; | ||||
|         } | ||||
| 
 | ||||
|         uint64_t AlignDownForDataPlacement(uint32_t offset) { | ||||
|             return offset & ~static_cast<uint64_t>(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1); | ||||
|         } | ||||
|     }  // namespace
 | ||||
| 
 | ||||
|     TextureCopySubresource::CopyInfo* TextureCopySubresource::AddCopy() { | ||||
|         ASSERT(this->count < kMaxTextureCopyRegions); | ||||
|         return &this->copies[this->count++]; | ||||
|     } | ||||
| 
 | ||||
|     TextureCopySubresource Compute2DTextureCopySubresource(Origin3D origin, | ||||
|                                                            Extent3D copySize, | ||||
|                                                            const TexelBlockInfo& blockInfo, | ||||
| @ -44,8 +62,7 @@ namespace dawn_native { namespace d3d12 { | ||||
| 
 | ||||
|         // The copies must be 512-aligned. To do this, we calculate the first 512-aligned address
 | ||||
|         // preceding our data.
 | ||||
|         uint64_t alignedOffset = | ||||
|             offset & ~static_cast<uint64_t>(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1); | ||||
|         uint64_t alignedOffset = AlignDownForDataPlacement(offset); | ||||
| 
 | ||||
|         // If the provided offset to the data was already 512-aligned, we can simply copy the data
 | ||||
|         // without further translation.
 | ||||
| @ -182,8 +199,7 @@ namespace dawn_native { namespace d3d12 { | ||||
| 
 | ||||
|         uint64_t offsetForCopy1 = | ||||
|             offset + copy.copies[0].copySize.width / blockInfo.width * blockInfo.byteSize; | ||||
|         uint64_t alignedOffsetForCopy1 = | ||||
|             offsetForCopy1 & ~static_cast<uint64_t>(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1); | ||||
|         uint64_t alignedOffsetForCopy1 = AlignDownForDataPlacement(offsetForCopy1); | ||||
|         Origin3D texelOffsetForCopy1 = ComputeTexelOffsets( | ||||
|             blockInfo, static_cast<uint32_t>(offsetForCopy1 - alignedOffsetForCopy1), bytesPerRow); | ||||
| 
 | ||||
| @ -257,6 +273,148 @@ namespace dawn_native { namespace d3d12 { | ||||
|         return copies; | ||||
|     } | ||||
| 
 | ||||
|     void Recompute3DTextureCopyRegionsForBlockWithEmptyFirstRow(Origin3D origin, | ||||
|                                                                 Extent3D copySize, | ||||
|                                                                 const TexelBlockInfo& blockInfo, | ||||
|                                                                 uint32_t bytesPerRow, | ||||
|                                                                 uint32_t rowsPerImage, | ||||
|                                                                 TextureCopySubresource& copy, | ||||
|                                                                 uint32_t i) { | ||||
|         // Let's assign data and show why copy region generated by ComputeTextureCopySubresource
 | ||||
|         // is incorrect if there is an empty row at the beginning of the copy block.
 | ||||
|         // Assuming that bytesPerRow is 256 and we are doing a B2T copy, and copy size is {width: 2,
 | ||||
|         // height: 4, depthOrArrayLayers: 3}. Then the data layout in buffer is demonstrated
 | ||||
|         // as below:
 | ||||
|         //
 | ||||
|         //               |<----- bytes per row ------>|
 | ||||
|         //
 | ||||
|         //               |----------------------------|
 | ||||
|         //  row (N - 1)  |                            |
 | ||||
|         //  row N        |                 ++~~~~~~~~~|
 | ||||
|         //  row (N + 1)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 2)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 3)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 4)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 5)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 6)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 7)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 8)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 9)  |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 10) |~~~~~~~~~~~~~~~~~++~~~~~~~~~|
 | ||||
|         //  row (N + 11) |~~~~~~~~~~~~~~~~~++         |
 | ||||
|         //               |----------------------------|
 | ||||
| 
 | ||||
|         // The copy we mean to do is the following:
 | ||||
|         //
 | ||||
|         //   - image 0: row N to row (N + 3),
 | ||||
|         //   - image 1: row (N + 4) to row (N + 7),
 | ||||
|         //   - image 2: row (N + 8) to row (N + 11).
 | ||||
|         //
 | ||||
|         // Note that alignedOffset is at the beginning of row (N - 1), while buffer offset makes
 | ||||
|         // the copy start at row N. Row (N - 1) is the empty row between alignedOffset and offset.
 | ||||
|         //
 | ||||
|         // The 2D copy region of image 0 we received from Compute2DTextureCopySubresource() is
 | ||||
|         // the following:
 | ||||
|         //
 | ||||
|         //              |-------------------|
 | ||||
|         //  row (N - 1) |                   |
 | ||||
|         //  row N       |                 ++|
 | ||||
|         //  row (N + 1) |~~~~~~~~~~~~~~~~~++|
 | ||||
|         //  row (N + 2) |~~~~~~~~~~~~~~~~~++|
 | ||||
|         //  row (N + 3) |~~~~~~~~~~~~~~~~~++|
 | ||||
|         //              |-------------------|
 | ||||
|         //
 | ||||
|         // However, if we simply expand the copy region of image 0 to all depth ranges of a 3D
 | ||||
|         // texture, we will copy 5 rows every time, and every first row of each slice will be
 | ||||
|         // skipped. As a result, the copied data will be:
 | ||||
|         //
 | ||||
|         //   - image 0: row N to row (N + 3), which is correct. Row (N - 1) is skipped.
 | ||||
|         //   - image 1: row (N + 5) to row (N + 8) because row (N + 4) is skipped. It is incorrect.
 | ||||
|         //
 | ||||
|         // Likewise, all other image followed will be incorrect because we wrongly keep skipping
 | ||||
|         // one row for each depth slice.
 | ||||
|         //
 | ||||
|         // Solution: split the copy region to two copies: copy 3 (rowsPerImage - 1) rows in and
 | ||||
|         // expand to all depth slices in the first copy. 3 rows + one skipped rows = 4 rows, which
 | ||||
|         // equals to rowsPerImage. Then copy the last row in the second copy. However, the copy
 | ||||
|         // block of the last row of the last image may out-of-bound (see the details below), so
 | ||||
|         // we need an extra copy for the very last row.
 | ||||
| 
 | ||||
|         // Copy 0: copy 3 rows, not 4 rows.
 | ||||
|         //                _____________________
 | ||||
|         //               /                    /|
 | ||||
|         //              /                    / |
 | ||||
|         //              |-------------------|  |
 | ||||
|         //  row (N - 1) |                   |  |
 | ||||
|         //  row N       |                 ++|  |
 | ||||
|         //  row (N + 1) |~~~~~~~~~~~~~~~~~++| /
 | ||||
|         //  row (N + 2) |~~~~~~~~~~~~~~~~~++|/
 | ||||
|         //              |-------------------|
 | ||||
| 
 | ||||
|         // Copy 1: move down two rows and copy the last row on image 0, and expand to
 | ||||
|         // copySize.depthOrArrayLayers - 1 depth slices. Note that if we expand it to all depth
 | ||||
|         // slices, the last copy block will be row (N + 9) to row (N + 12). Row (N + 11) might
 | ||||
|         // be the last row of the entire buffer. Then row (N + 12) will be out-of-bound.
 | ||||
|         //                _____________________
 | ||||
|         //               /                    /|
 | ||||
|         //              /                    / |
 | ||||
|         //              |-------------------|  |
 | ||||
|         //  row (N + 1) |                   |  |
 | ||||
|         //  row (N + 2) |                   |  |
 | ||||
|         //  row (N + 3) |                 ++| /
 | ||||
|         //  row (N + 4) |~~~~~~~~~~~~~~~~~~~|/
 | ||||
|         //              |-------------------|
 | ||||
|         //
 | ||||
|         //  copy 2: copy the last row of the last image.
 | ||||
|         //              |-------------------|
 | ||||
|         //  row (N + 11)|                 ++|
 | ||||
|         //              |-------------------|
 | ||||
| 
 | ||||
|         // Copy 0: copy copySize.height - 1 rows
 | ||||
|         TextureCopySubresource::CopyInfo& copy0 = copy.copies[i]; | ||||
|         copy0.copySize.height = copySize.height - blockInfo.height; | ||||
|         copy0.bufferSize.height = rowsPerImage; | ||||
| 
 | ||||
|         // Copy 1: move down 2 rows and copy the last row on image 0, and expand to all depth slices
 | ||||
|         // but the last one.
 | ||||
|         TextureCopySubresource::CopyInfo* copy1 = copy.AddCopy(); | ||||
|         *copy1 = copy0; | ||||
|         copy1->alignedOffset += 2 * bytesPerRow; | ||||
|         copy1->textureOffset.y = copySize.height - blockInfo.height; | ||||
|         // Offset two rows from the copy height for the bufferOffset (See the figure above):
 | ||||
|         //   - one for the row we advanced in the buffer: row (N + 4).
 | ||||
|         //   - one for the last row we want to copy: row (N + 3) itself.
 | ||||
|         copy1->bufferOffset.y = copySize.height - 2 * blockInfo.height; | ||||
|         copy1->copySize.height = blockInfo.height; | ||||
|         copy1->copySize.depthOrArrayLayers--; | ||||
|         copy1->bufferSize.depthOrArrayLayers--; | ||||
| 
 | ||||
|         // Copy 2: copy the last row of the last image.
 | ||||
|         uint64_t offsetForCopy0 = OffsetToFirstCopiedTexel(blockInfo, bytesPerRow, | ||||
|                                                            copy0.alignedOffset, copy0.bufferOffset); | ||||
|         uint64_t offsetForLastRowOfLastImage = | ||||
|             offsetForCopy0 + bytesPerRow * (copy0.copySize.height + | ||||
|                                             rowsPerImage * (copySize.depthOrArrayLayers - 1)); | ||||
|         uint64_t alignedOffsetForLastRowOfLastImage = | ||||
|             AlignDownForDataPlacement(offsetForLastRowOfLastImage); | ||||
|         Origin3D texelOffsetForLastRowOfLastImage = ComputeTexelOffsets( | ||||
|             blockInfo, | ||||
|             static_cast<uint32_t>(offsetForLastRowOfLastImage - alignedOffsetForLastRowOfLastImage), | ||||
|             bytesPerRow); | ||||
| 
 | ||||
|         TextureCopySubresource::CopyInfo* copy2 = copy.AddCopy(); | ||||
|         copy2->alignedOffset = alignedOffsetForLastRowOfLastImage; | ||||
|         copy2->textureOffset = copy1->textureOffset; | ||||
|         copy2->textureOffset.z = origin.z + copySize.depthOrArrayLayers - 1; | ||||
|         copy2->copySize = copy1->copySize; | ||||
|         copy2->copySize.depthOrArrayLayers = 1; | ||||
|         copy2->bufferOffset = texelOffsetForLastRowOfLastImage; | ||||
|         copy2->bufferSize.width = copy1->bufferSize.width; | ||||
|         ASSERT(copy2->copySize.height == 1); | ||||
|         copy2->bufferSize.height = copy2->bufferOffset.y + copy2->copySize.height; | ||||
|         copy2->bufferSize.depthOrArrayLayers = 1; | ||||
|     } | ||||
| 
 | ||||
|     TextureCopySubresource Compute3DTextureCopySplits(Origin3D origin, | ||||
|                                                       Extent3D copySize, | ||||
|                                                       const TexelBlockInfo& blockInfo, | ||||
| @ -291,29 +449,27 @@ namespace dawn_native { namespace d3d12 { | ||||
|             return copySubresource; | ||||
|         } | ||||
| 
 | ||||
|         bool needRecompute = false; | ||||
|         uint32_t rowsPerImageInTexels = rowsPerImage * blockInfo.height; | ||||
|         for (uint32_t i = 0; i < copySubresource.count; ++i) { | ||||
|         // The copy region(s) generated by Compute2DTextureCopySubresource might be incorrect.
 | ||||
|         // However, we may append a couple more copy regions in the for loop below. We don't need
 | ||||
|         // to revise these new added copy regions.
 | ||||
|         uint32_t originalCopyCount = copySubresource.count; | ||||
|         for (uint32_t i = 0; i < originalCopyCount; ++i) { | ||||
|             // There can be one empty row at most in a copy region.
 | ||||
|             ASSERT(copySubresource.copies[i].bufferSize.height <= rowsPerImage + blockInfo.height); | ||||
|             Extent3D& bufferSize = copySubresource.copies[i].bufferSize; | ||||
|             if (bufferSize.height > rowsPerImageInTexels) { | ||||
|                 needRecompute = true; | ||||
|             } else if (bufferSize.height < rowsPerImageInTexels) { | ||||
|                 ASSERT(bytesPerRow == D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); | ||||
|                 Recompute3DTextureCopyRegionsForBlockWithEmptyFirstRow( | ||||
|                     origin, copySize, blockInfo, bytesPerRow, rowsPerImage, copySubresource, i); | ||||
|                 // TODO(crbug.com/dawn/547): recompute copy regions when copySize.height is 1.
 | ||||
|             } else { | ||||
|                 // If we are copying multiple depth slices, we should skip rowsPerImageInTexels rows
 | ||||
|                 // at least for each slice even though we only copy partial rows in each slice
 | ||||
|                 // sometimes.
 | ||||
|                 // for each slice even though we only copy partial rows in each slice sometimes.
 | ||||
|                 bufferSize.height = rowsPerImageInTexels; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (!needRecompute) { | ||||
|             return copySubresource; | ||||
|         } | ||||
| 
 | ||||
|         // TODO(yunchao.he@intel.com): recompute copy regions for special cases for 3D textures,
 | ||||
|         // and return the revised copy regions.
 | ||||
|         ASSERT(bytesPerRow == D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); | ||||
|         return copySubresource; | ||||
|     } | ||||
| }}  // namespace dawn_native::d3d12
 | ||||
|  | ||||
| @ -28,7 +28,7 @@ namespace dawn_native { | ||||
| namespace dawn_native { namespace d3d12 { | ||||
| 
 | ||||
|     struct TextureCopySubresource { | ||||
|         static constexpr unsigned int kMaxTextureCopyRegions = 2; | ||||
|         static constexpr unsigned int kMaxTextureCopyRegions = 4; | ||||
| 
 | ||||
|         struct CopyInfo { | ||||
|             uint64_t alignedOffset = 0; | ||||
| @ -39,6 +39,8 @@ namespace dawn_native { namespace d3d12 { | ||||
|             Extent3D copySize; | ||||
|         }; | ||||
| 
 | ||||
|         CopyInfo* AddCopy(); | ||||
| 
 | ||||
|         uint32_t count = 0; | ||||
|         std::array<CopyInfo, kMaxTextureCopyRegions> copies; | ||||
|     }; | ||||
|  | ||||
| @ -1054,8 +1054,6 @@ TEST_P(CopyTests_T2B, Texture3DSubRegion) { | ||||
| } | ||||
| 
 | ||||
| TEST_P(CopyTests_T2B, Texture3DNoSplitRowDataWithEmptyFirstRow) { | ||||
|     DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
 | ||||
| 
 | ||||
|     constexpr uint32_t kWidth = 2; | ||||
|     constexpr uint32_t kHeight = 4; | ||||
|     constexpr uint32_t kDepth = 3; | ||||
| @ -1095,8 +1093,6 @@ TEST_P(CopyTests_T2B, Texture3DSplitRowDataWithoutEmptyFirstRow) { | ||||
| } | ||||
| 
 | ||||
| TEST_P(CopyTests_T2B, Texture3DSplitRowDataWithEmptyFirstRow) { | ||||
|     DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
 | ||||
| 
 | ||||
|     constexpr uint32_t kWidth = 39; | ||||
|     constexpr uint32_t kHeight = 4; | ||||
|     constexpr uint32_t kDepth = 3; | ||||
| @ -1637,8 +1633,6 @@ TEST_P(CopyTests_B2T, Texture3DSubRegion) { | ||||
| } | ||||
| 
 | ||||
| TEST_P(CopyTests_B2T, Texture3DNoSplitRowDataWithEmptyFirstRow) { | ||||
|     DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
 | ||||
| 
 | ||||
|     constexpr uint32_t kWidth = 2; | ||||
|     constexpr uint32_t kHeight = 4; | ||||
|     constexpr uint32_t kDepth = 3; | ||||
| @ -1678,8 +1672,6 @@ TEST_P(CopyTests_B2T, Texture3DSplitRowDataWithoutEmptyFirstRow) { | ||||
| } | ||||
| 
 | ||||
| TEST_P(CopyTests_B2T, Texture3DSplitRowDataWithEmptyFirstRow) { | ||||
|     DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
 | ||||
| 
 | ||||
|     constexpr uint32_t kWidth = 39; | ||||
|     constexpr uint32_t kHeight = 4; | ||||
|     constexpr uint32_t kDepth = 3; | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user