diff --git a/src/dawn/native/CommandValidation.cpp b/src/dawn/native/CommandValidation.cpp index 56e226bb5a..e1af879eba 100644 --- a/src/dawn/native/CommandValidation.cpp +++ b/src/dawn/native/CommandValidation.cpp @@ -111,13 +111,6 @@ namespace dawn::native { static_cast(maxStart); } - template - DAWN_FORCE_INLINE uint64_t Safe32x32(A a, B b) { - static_assert(std::is_same::value, "'a' must be uint32_t"); - static_assert(std::is_same::value, "'b' must be uint32_t"); - return uint64_t(a) * uint64_t(b); - } - ResultOrError ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo, const Extent3D& copySize, uint32_t bytesPerRow, diff --git a/src/dawn/native/CommandValidation.h b/src/dawn/native/CommandValidation.h index 91b98cf3e0..477a3f58d5 100644 --- a/src/dawn/native/CommandValidation.h +++ b/src/dawn/native/CommandValidation.h @@ -38,6 +38,13 @@ namespace dawn::native { uint64_t bufferOffset, uint64_t size); + template + DAWN_FORCE_INLINE uint64_t Safe32x32(A a, B b) { + static_assert(std::is_same::value, "'a' must be uint32_t"); + static_assert(std::is_same::value, "'b' must be uint32_t"); + return uint64_t(a) * uint64_t(b); + } + ResultOrError ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo, const Extent3D& copySize, uint32_t bytesPerRow, diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp index a807c41150..eb473c2300 100644 --- a/src/dawn/native/Toggles.cpp +++ b/src/dawn/native/Toggles.cpp @@ -259,6 +259,15 @@ namespace dawn::native { "Initialize workgroup memory with OpConstantNull on Vulkan when the Vulkan extension " "VK_KHR_zero_initialize_workgroup_memory is supported.", "https://crbug.com/dawn/1302"}}, + {Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings, + {"d3d12_split_buffer_texture_copy_for_rows_per_image_paddings", + "D3D12 requires more buffer storage than it should when rowsPerImage is greater than " + "copyHeight, which means there are pure padding row(s) on each image. In this " + "situation, the buffer used for B2T/T2B copy might be big enough according to " + "WebGPU's spec but it doesn't meet D3D12's requirement, then we need to workaround " + "it via split the copy operation into two copies, in order to make B2T/T2B copy " + "being done correctly on D3D12.", + "https://crbug.com/dawn/1289"}}, // Comment to separate the }} so it is clearer what to copy-paste to add a toggle. }}; diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h index 656a6026f5..6e6c90f10a 100644 --- a/src/dawn/native/Toggles.h +++ b/src/dawn/native/Toggles.h @@ -67,6 +67,7 @@ namespace dawn::native { RecordDetailedTimingInTraceEvents, DisableTimestampQueryConversion, VulkanUseZeroInitializeWorkgroupMemoryExtension, + D3D12SplitBufferTextureCopyForRowsPerImagePaddings, EnumCount, InvalidEnum = EnumCount, diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp index 8af38371e5..722eb9a3a0 100644 --- a/src/dawn/native/d3d12/DeviceD3D12.cpp +++ b/src/dawn/native/d3d12/DeviceD3D12.cpp @@ -612,6 +612,11 @@ namespace dawn::native::d3d12 { true); } } + + // Currently this workaround is needed on any D3D12 backend for some particular situations. + // But we may need to limit it if D3D12 runtime fixes the bug on its new release. See + // https://crbug.com/dawn/1289 for more information. + SetToggle(Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings, true); } MaybeError Device::WaitForIdleForDestruction() { diff --git a/src/dawn/native/d3d12/UtilsD3D12.cpp b/src/dawn/native/d3d12/UtilsD3D12.cpp index aa79a713d8..e559f37939 100644 --- a/src/dawn/native/d3d12/UtilsD3D12.cpp +++ b/src/dawn/native/d3d12/UtilsD3D12.cpp @@ -19,6 +19,7 @@ #include #include "dawn/common/Assert.h" +#include "dawn/native/CommandValidation.h" #include "dawn/native/Format.h" #include "dawn/native/d3d12/BufferD3D12.h" #include "dawn/native/d3d12/CommandRecordingContext.h" @@ -27,6 +28,63 @@ namespace dawn::native::d3d12 { + namespace { + + uint64_t RequiredCopySizeByD3D12(const uint32_t bytesPerRow, + const uint32_t rowsPerImage, + const Extent3D& copySize, + const TexelBlockInfo& blockInfo) { + uint64_t bytesPerImage = Safe32x32(bytesPerRow, rowsPerImage); + + // Required copy size for B2T/T2B copy on D3D12 is smaller than (but very close to) + // depth * bytesPerImage. The latter is already checked at ComputeRequiredBytesInCopy() + // in CommandValidation.cpp. + uint64_t requiredCopySizeByD3D12 = bytesPerImage * (copySize.depthOrArrayLayers - 1); + + // When calculating the required copy size for B2T/T2B copy, D3D12 doesn't respect + // rowsPerImage paddings on the last image for 3D texture, but it does respect + // bytesPerRow paddings on the last row. + ASSERT(blockInfo.width == 1); + ASSERT(blockInfo.height == 1); + uint64_t lastRowBytes = Safe32x32(blockInfo.byteSize, copySize.width); + ASSERT(rowsPerImage > copySize.height); + uint64_t lastImageBytesByD3D12 = + Safe32x32(bytesPerRow, rowsPerImage - 1) + lastRowBytes; + + requiredCopySizeByD3D12 += lastImageBytesByD3D12; + return requiredCopySizeByD3D12; + } + + // This function is used to access whether we need a workaround for D3D12's wrong algorithm + // of calculating required buffer size for B2T/T2B copy. The workaround is needed only when + // - The corresponding toggle is enabled. + // - It is a 3D texture (so the format is uncompressed). + // - There are multiple depth images to be copied (copySize.depthOrArrayLayers > 1). + // - It has rowsPerImage paddings (rowsPerImage > copySize.height). + // - The buffer size doesn't meet D3D12's requirement. + bool NeedBufferSizeWorkaroundForBufferTextureCopyOnD3D12(const BufferCopy& bufferCopy, + const TextureCopy& textureCopy, + const Extent3D& copySize) { + TextureBase* texture = textureCopy.texture.Get(); + Device* device = ToBackend(texture->GetDevice()); + + if (!device->IsToggleEnabled( + Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings) || + texture->GetDimension() != wgpu::TextureDimension::e3D || + copySize.depthOrArrayLayers <= 1 || bufferCopy.rowsPerImage <= copySize.height) { + return false; + } + + const TexelBlockInfo& blockInfo = + texture->GetFormat().GetAspectInfo(textureCopy.aspect).block; + uint64_t requiredCopySizeByD3D12 = RequiredCopySizeByD3D12( + bufferCopy.bytesPerRow, bufferCopy.rowsPerImage, copySize, blockInfo); + return bufferCopy.buffer->GetAllocatedSize() - bufferCopy.offset < + requiredCopySizeByD3D12; + } + + } // anonymous namespace + ResultOrError ConvertStringToWstring(const char* str) { size_t len = strlen(str); if (len == 0) { @@ -285,8 +343,36 @@ namespace dawn::native::d3d12 { const BufferCopy& bufferCopy, const TextureCopy& textureCopy, const Extent3D& copySize) { - RecordBufferTextureCopyWithBufferHandle(direction, commandList, - ToBackend(bufferCopy.buffer)->GetD3D12Resource(), + ID3D12Resource* bufferResource = ToBackend(bufferCopy.buffer)->GetD3D12Resource(); + + if (NeedBufferSizeWorkaroundForBufferTextureCopyOnD3D12(bufferCopy, textureCopy, + copySize)) { + // Split the copy into two copies if the size of bufferCopy.buffer doesn't meet D3D12's + // requirement and a workaround is needed: + // - The first copy will copy all depth images but the last depth image, + // - The second copy will copy the last depth image. + Extent3D extentForAllButTheLastImage = copySize; + extentForAllButTheLastImage.depthOrArrayLayers -= 1; + RecordBufferTextureCopyWithBufferHandle( + direction, commandList, bufferResource, bufferCopy.offset, bufferCopy.bytesPerRow, + bufferCopy.rowsPerImage, textureCopy, extentForAllButTheLastImage); + + Extent3D extentForTheLastImage = copySize; + extentForTheLastImage.depthOrArrayLayers = 1; + + TextureCopy textureCopyForTheLastImage = textureCopy; + textureCopyForTheLastImage.origin.z += copySize.depthOrArrayLayers - 1; + + uint64_t copiedBytes = bufferCopy.bytesPerRow * bufferCopy.rowsPerImage * + (copySize.depthOrArrayLayers - 1); + RecordBufferTextureCopyWithBufferHandle( + direction, commandList, bufferResource, bufferCopy.offset + copiedBytes, + bufferCopy.bytesPerRow, bufferCopy.rowsPerImage, textureCopyForTheLastImage, + extentForTheLastImage); + return; + } + + RecordBufferTextureCopyWithBufferHandle(direction, commandList, bufferResource, bufferCopy.offset, bufferCopy.bytesPerRow, bufferCopy.rowsPerImage, textureCopy, copySize); } diff --git a/src/dawn/tests/end2end/RequiredBufferSizeInCopyTests.cpp b/src/dawn/tests/end2end/RequiredBufferSizeInCopyTests.cpp index 1ec0f01b5a..ef0608c49c 100644 --- a/src/dawn/tests/end2end/RequiredBufferSizeInCopyTests.cpp +++ b/src/dawn/tests/end2end/RequiredBufferSizeInCopyTests.cpp @@ -178,10 +178,6 @@ TEST_P(RequiredBufferSizeInCopyTests, BufferSizeOnBoundary) { kBytesPerRow * (rowsPerImage - 1) + kBytesPerBlock * copySize.width; DoTest(size, copySize, rowsPerImage); - // TODO(crbug.com/dawn/1278, 1288, 1289): Required buffer size for copy is wrong on D3D12. - bool mayNeedWorkaround = GetParam().mTextureDimension == wgpu::TextureDimension::e3D && - extraRowsPerImage > 0 && copySize.depthOrArrayLayers > 1; - DAWN_SUPPRESS_TEST_IF(IsD3D12() && mayNeedWorkaround); size -= kBytesPerBlock; DoTest(size, copySize, rowsPerImage); } @@ -190,25 +186,22 @@ TEST_P(RequiredBufferSizeInCopyTests, BufferSizeOnBoundary) { // The buffer doesn't have storage for any paddings on the last image. WebGPU spec doesn't require // storage for these paddings, and the copy operation will never access to these paddings. So it // should work. -TEST_P(RequiredBufferSizeInCopyTests, MininumBufferSize) { +TEST_P(RequiredBufferSizeInCopyTests, MinimumBufferSize) { wgpu::Extent3D copySize = kCopySize; copySize.depthOrArrayLayers = GetParam().mCopyDepth; const uint64_t extraRowsPerImage = GetParam().mExtraRowsPerImage; const uint64_t rowsPerImage = extraRowsPerImage + copySize.height; - // TODO(crbug.com/dawn/1278, 1288, 1289): Required buffer size for copy is wrong on D3D12. - bool mayNeedWorkaround = GetParam().mTextureDimension == wgpu::TextureDimension::e3D && - extraRowsPerImage > 0 && copySize.depthOrArrayLayers > 1; - DAWN_SUPPRESS_TEST_IF(IsD3D12() && mayNeedWorkaround); uint64_t size = kOffset + utils::RequiredBytesInCopy(kBytesPerRow, rowsPerImage, copySize, kFormat); DoTest(size, copySize, rowsPerImage); } -DAWN_INSTANTIATE_TEST_P(RequiredBufferSizeInCopyTests, - {D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(), - VulkanBackend()}, - {Type::T2BCopy, Type::B2TCopy}, - {wgpu::TextureDimension::e3D, wgpu::TextureDimension::e2D}, - {2u, 1u}, - {1u, 0u}); +DAWN_INSTANTIATE_TEST_P( + RequiredBufferSizeInCopyTests, + {D3D12Backend(), D3D12Backend({"d3d12_split_buffer_texture_copy_for_rows_per_image_paddings"}), + MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()}, + {Type::T2BCopy, Type::B2TCopy}, + {wgpu::TextureDimension::e3D, wgpu::TextureDimension::e2D}, + {2u, 1u}, + {1u, 0u});