From 4b9ebb146567404e616ac68cfbb46e7798eff4d3 Mon Sep 17 00:00:00 2001 From: Jiawei Shao Date: Sat, 6 Aug 2022 04:36:08 +0000 Subject: [PATCH] D3D12: Workaround buffer-texture copy with depth-stencil on some platforms On the D3D12 platforms that don't support programmable sample positions, the source box specifying a portion of the depth texture must all be 0, or an error and a device lost will occur. This patch adds a workaround for this issue by splitting the original buffer-texture copy into two copies: 1. copy from the source resource into a temporary buffer at offset 0 2. copy from the temporary buffer at offset 0 into the destination resource. In the next patch we will fix the corresponding issue in Queue.WriteTexture. Note that on newer version of D3D12 the restrictions about D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT and D3D12_TEXTURE_DATA_PITCH_ALIGNMENT have all been lifted out, so the workaround added in this patch will also be disabled on the platforms that don't support programmable sample positions but the restrictions about D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT and D3D12_TEXTURE_DATA_PITCH_ALIGNMENT are no longer available. Bug: dawn:727 Test: dawn_end2end_test Change-Id: I9f1d848a0eeac5bd52c9219af6992a2821307746 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/97182 Commit-Queue: Jiawei Shao Reviewed-by: Austin Eng Kokoro: Kokoro --- src/dawn/native/Toggles.cpp | 11 ++- src/dawn/native/Toggles.h | 1 + src/dawn/native/d3d12/CommandBufferD3D12.cpp | 93 +++++++++++++++++++ src/dawn/native/d3d12/D3D12Info.cpp | 6 ++ src/dawn/native/d3d12/D3D12Info.h | 1 + src/dawn/native/d3d12/DeviceD3D12.cpp | 8 ++ src/dawn/native/d3d12/TextureD3D12.cpp | 4 + src/dawn/native/d3d12/TextureD3D12.h | 1 + src/dawn/tests/end2end/CopyTests.cpp | 3 - .../tests/end2end/DepthStencilCopyTests.cpp | 33 +++---- 10 files changed, 134 insertions(+), 27 deletions(-) diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp index 5c90de2aa4..3c1ae30ebf 100644 --- a/src/dawn/native/Toggles.cpp +++ b/src/dawn/native/Toggles.cpp @@ -182,9 +182,8 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{ "Split texture-to-texture copy into two copies: copy from source texture into a temporary " "buffer, and copy from the temporary buffer into the destination texture under specific " "situations. This workaround is by default enabled on some Intel GPUs which have a driver " - "bug " - "in the execution of CopyTextureRegion() when we copy with the formats whose texel block " - "sizes are less than 4 bytes from a greater mip level to a smaller mip level on D3D12 " + "bug in the execution of CopyTextureRegion() when we copy with the formats whose texel " + "block sizes are less than 4 bytes from a greater mip level to a smaller mip level on D3D12 " "backends.", "https://crbug.com/1161355"}}, {Toggle::EmitHLSLDebugSymbols, @@ -290,6 +289,12 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{ "issue. We can workaround this security issue via allocating extra memory and limiting its " "access in itself.", "https://crbug.com/dawn/949"}}, + {Toggle::D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset, + {"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_copy_with_non_zero_buffer_offset", + "Split buffer-texture copy into two copies: do first copy with a temporary buffer at offset " + "0, then copy from the temporary buffer to the destination. Now this toggle must be enabled " + "on the D3D12 platforms where programmable MSAA is not supported.", + "https://crbug.com/dawn/727"}}, // Comment to separate the }} so it is clearer what to copy-paste to add a toggle. }}; } // anonymous namespace diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h index ceb8bf7075..3ea47dc22c 100644 --- a/src/dawn/native/Toggles.h +++ b/src/dawn/native/Toggles.h @@ -75,6 +75,7 @@ enum class Toggle { D3D12DontSetClearValueOnDepthTextureCreation, D3D12AlwaysUseTypelessFormatsForCastableTexture, D3D12AllocateExtraMemoryFor2DArrayTexture, + D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset, EnumCount, InvalidEnum = EnumCount, diff --git a/src/dawn/native/d3d12/CommandBufferD3D12.cpp b/src/dawn/native/d3d12/CommandBufferD3D12.cpp index 08e2bcbe16..08aad3418e 100644 --- a/src/dawn/native/d3d12/CommandBufferD3D12.cpp +++ b/src/dawn/native/d3d12/CommandBufferD3D12.cpp @@ -225,6 +225,87 @@ MaybeError RecordCopyTextureWithTemporaryBuffer(CommandRecordingContext* recordi return {}; } +bool ShouldCopyUsingTemporaryBuffer(DeviceBase* device, + const BufferCopy& bufferCopy, + const TextureCopy& textureCopy) { + // Currently we only need the workaround for some D3D12 platforms. + if (device->IsToggleEnabled( + Toggle::D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset)) { + if ((ToBackend(textureCopy.texture)->GetD3D12ResourceFlags() & + D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) && + bufferCopy.offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT > 0) { + return true; + } + } + return false; +} + +MaybeError RecordBufferTextureCopyWithTemporaryBuffer(CommandRecordingContext* recordingContext, + BufferTextureCopyDirection copyDirection, + const BufferCopy& bufferCopy, + const TextureCopy& textureCopy, + const Extent3D& copySize) { + dawn::native::Format format = textureCopy.texture->GetFormat(); + const TexelBlockInfo& blockInfo = format.GetAspectInfo(textureCopy.aspect).block; + + // Create tempBuffer + // The size of temporary buffer isn't needed to be a multiple of 4 because we don't + // need to set mappedAtCreation to be true. + auto tempBufferSize = ComputeRequiredBytesInCopy(blockInfo, copySize, bufferCopy.bytesPerRow, + bufferCopy.rowsPerImage); + + BufferDescriptor tempBufferDescriptor; + tempBufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; + tempBufferDescriptor.size = tempBufferSize.AcquireSuccess(); + Device* device = ToBackend(textureCopy.texture->GetDevice()); + Ref tempBufferBase; + DAWN_TRY_ASSIGN(tempBufferBase, device->CreateBuffer(&tempBufferDescriptor)); + // D3D12 aligns the entire buffer to at least 64KB, so the virtual address of tempBuffer will + // always be aligned to D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT (512). + Ref tempBuffer = ToBackend(std::move(tempBufferBase)); + ASSERT(tempBuffer->GetVA() % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT == 0); + + BufferCopy tempBufferCopy; + tempBufferCopy.buffer = tempBuffer; + tempBufferCopy.offset = 0; + tempBufferCopy.bytesPerRow = bufferCopy.bytesPerRow; + tempBufferCopy.rowsPerImage = bufferCopy.rowsPerImage; + + tempBuffer->TrackUsageAndTransitionNow(recordingContext, wgpu::BufferUsage::CopyDst); + + ID3D12GraphicsCommandList* commandList = recordingContext->GetCommandList(); + switch (copyDirection) { + case BufferTextureCopyDirection::B2T: { + commandList->CopyBufferRegion(tempBuffer->GetD3D12Resource(), 0, + ToBackend(bufferCopy.buffer)->GetD3D12Resource(), + bufferCopy.offset, tempBufferDescriptor.size); + tempBuffer->TrackUsageAndTransitionNow(recordingContext, wgpu::BufferUsage::CopySrc); + RecordBufferTextureCopy(BufferTextureCopyDirection::B2T, + recordingContext->GetCommandList(), tempBufferCopy, textureCopy, + copySize); + break; + } + case BufferTextureCopyDirection::T2B: { + RecordBufferTextureCopy(BufferTextureCopyDirection::T2B, + recordingContext->GetCommandList(), tempBufferCopy, textureCopy, + copySize); + tempBuffer->TrackUsageAndTransitionNow(recordingContext, wgpu::BufferUsage::CopySrc); + commandList->CopyBufferRegion(ToBackend(bufferCopy.buffer)->GetD3D12Resource(), + bufferCopy.offset, tempBuffer->GetD3D12Resource(), 0, + tempBufferDescriptor.size); + break; + } + default: + UNREACHABLE(); + break; + } + + // Save tempBuffer into recordingContext + recordingContext->AddToTempBuffers(std::move(tempBuffer)); + + return {}; +} + void RecordNumWorkgroupsForDispatch(ID3D12GraphicsCommandList* commandList, ComputePipeline* pipeline, DispatchCmd* dispatch) { @@ -733,6 +814,12 @@ MaybeError CommandBuffer::RecordCommands(CommandRecordingContext* commandContext texture->TrackUsageAndTransitionNow(commandContext, wgpu::TextureUsage::CopyDst, subresources); + if (ShouldCopyUsingTemporaryBuffer(GetDevice(), copy->source, copy->destination)) { + DAWN_TRY(RecordBufferTextureCopyWithTemporaryBuffer( + commandContext, BufferTextureCopyDirection::B2T, copy->source, + copy->destination, copy->copySize)); + break; + } RecordBufferTextureCopy(BufferTextureCopyDirection::B2T, commandList, copy->source, copy->destination, copy->copySize); @@ -760,6 +847,12 @@ MaybeError CommandBuffer::RecordCommands(CommandRecordingContext* commandContext subresources); buffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopyDst); + if (ShouldCopyUsingTemporaryBuffer(GetDevice(), copy->destination, copy->source)) { + DAWN_TRY(RecordBufferTextureCopyWithTemporaryBuffer( + commandContext, BufferTextureCopyDirection::T2B, copy->destination, + copy->source, copy->copySize)); + break; + } RecordBufferTextureCopy(BufferTextureCopyDirection::T2B, commandList, copy->destination, copy->source, copy->copySize); diff --git a/src/dawn/native/d3d12/D3D12Info.cpp b/src/dawn/native/d3d12/D3D12Info.cpp index 6bbb865956..3d3470c173 100644 --- a/src/dawn/native/d3d12/D3D12Info.cpp +++ b/src/dawn/native/d3d12/D3D12Info.cpp @@ -44,6 +44,12 @@ ResultOrError GatherDeviceInfo(const Adapter& adapter) { "ID3D12Device::CheckFeatureSupport")); info.resourceHeapTier = featureOptions.ResourceHeapTier; + D3D12_FEATURE_DATA_D3D12_OPTIONS2 featureOptions2 = {}; + if (SUCCEEDED(adapter.GetDevice()->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS2, &featureOptions2, sizeof(featureOptions2)))) { + info.programmableSamplePositionsTier = featureOptions2.ProgrammableSamplePositionsTier; + } + D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureOptions3 = {}; if (SUCCEEDED(adapter.GetDevice()->CheckFeatureSupport( D3D12_FEATURE_D3D12_OPTIONS3, &featureOptions3, sizeof(featureOptions3)))) { diff --git a/src/dawn/native/d3d12/D3D12Info.h b/src/dawn/native/d3d12/D3D12Info.h index d985f55411..f81e28de11 100644 --- a/src/dawn/native/d3d12/D3D12Info.h +++ b/src/dawn/native/d3d12/D3D12Info.h @@ -35,6 +35,7 @@ struct D3D12DeviceInfo { bool supportsSharedResourceCapabilityTier1; bool supportsDP4a; bool supportsCastingFullyTypedFormat; + uint32_t programmableSamplePositionsTier; }; ResultOrError GatherDeviceInfo(const Adapter& adapter); diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp index 498fef2c67..5663ca2e7e 100644 --- a/src/dawn/native/d3d12/DeviceD3D12.cpp +++ b/src/dawn/native/d3d12/DeviceD3D12.cpp @@ -640,6 +640,12 @@ void Device::InitTogglesFromDriver() { SetToggle(Toggle::D3D12AlwaysUseTypelessFormatsForCastableTexture, !GetDeviceInfo().supportsCastingFullyTypedFormat); + // The restriction on the source box specifying a portion of the depth stencil texture in + // CopyTextureRegion() is only available on the D3D12 platforms which doesn't support + // programmable sample positions. + SetToggle(Toggle::D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset, + GetDeviceInfo().programmableSamplePositionsTier == 0); + // Disable optimizations when using FXC // See https://crbug.com/dawn/1203 SetToggle(Toggle::FxcOptimizations, false); @@ -673,6 +679,8 @@ void Device::InitTogglesFromDriver() { // Currently this workaround is needed on any D3D12 backend for some particular situations. // But we may need to limit it if D3D12 runtime fixes the bug on its new release. See // https://crbug.com/dawn/1289 for more information. + // TODO(dawn:1289): Unset this toggle when we skip the split on the buffer-texture copy + // on the platforms where UnrestrictedBufferTextureCopyPitchSupported is true. SetToggle(Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings, true); // This workaround is only needed on Intel Gen12LP with driver prior to 30.0.101.1960. diff --git a/src/dawn/native/d3d12/TextureD3D12.cpp b/src/dawn/native/d3d12/TextureD3D12.cpp index 5f6492097c..41bc0a099c 100644 --- a/src/dawn/native/d3d12/TextureD3D12.cpp +++ b/src/dawn/native/d3d12/TextureD3D12.cpp @@ -706,6 +706,10 @@ ID3D12Resource* Texture::GetD3D12Resource() const { return mResourceAllocation.GetD3D12Resource(); } +D3D12_RESOURCE_FLAGS Texture::GetD3D12ResourceFlags() const { + return mD3D12ResourceFlags; +} + DXGI_FORMAT Texture::GetD3D12CopyableSubresourceFormat(Aspect aspect) const { ASSERT(GetFormat().aspects & aspect); diff --git a/src/dawn/native/d3d12/TextureD3D12.h b/src/dawn/native/d3d12/TextureD3D12.h index c67aff2ef6..3e49bc232b 100644 --- a/src/dawn/native/d3d12/TextureD3D12.h +++ b/src/dawn/native/d3d12/TextureD3D12.h @@ -58,6 +58,7 @@ class Texture final : public TextureBase { DXGI_FORMAT GetD3D12Format() const; ID3D12Resource* GetD3D12Resource() const; DXGI_FORMAT GetD3D12CopyableSubresourceFormat(Aspect aspect) const; + D3D12_RESOURCE_FLAGS GetD3D12ResourceFlags() const; D3D12_RENDER_TARGET_VIEW_DESC GetRTVDescriptor(const Format& format, uint32_t mipLevel, diff --git a/src/dawn/tests/end2end/CopyTests.cpp b/src/dawn/tests/end2end/CopyTests.cpp index 8b89e373f3..ed7fb59997 100644 --- a/src/dawn/tests/end2end/CopyTests.cpp +++ b/src/dawn/tests/end2end/CopyTests.cpp @@ -1028,9 +1028,6 @@ TEST_P(CopyTests_T2B, CopyOneRowWithDepth32Float) { // depth. DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("disable_depth_read")); - // TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers. - DAWN_SUPPRESS_TEST_IF(IsD3D12()); - constexpr wgpu::TextureFormat kFormat = wgpu::TextureFormat::Depth32Float; constexpr uint32_t kPixelsPerRow = 4u; diff --git a/src/dawn/tests/end2end/DepthStencilCopyTests.cpp b/src/dawn/tests/end2end/DepthStencilCopyTests.cpp index d6431c8a6d..1bcd45117b 100644 --- a/src/dawn/tests/end2end/DepthStencilCopyTests.cpp +++ b/src/dawn/tests/end2end/DepthStencilCopyTests.cpp @@ -543,10 +543,6 @@ TEST_P(DepthCopyTests, FromDepthAspect) { // Test copying the depth-only aspect into a buffer at a non-zero offset. TEST_P(DepthCopyTests, FromDepthAspectToBufferAtNonZeroOffset) { - // TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers as there is a bug - // in the implementation of texture-to-buffer copies with depth stencil textures on D3D12. - DAWN_SUPPRESS_TEST_IF(IsD3D12()); - constexpr float kInitDepth = 0.2f; constexpr uint32_t kWidth = 4; constexpr uint32_t kHeight = 4; @@ -661,10 +657,6 @@ TEST_P(DepthCopyFromBufferTests, BufferToNonRenderableDepthAspectAtNonZeroOffset // Test copying the depth-only aspect from a buffer at a non-zero offset. TEST_P(DepthCopyFromBufferTests, BufferToRenderableDepthAspectAtNonZeroOffset) { constexpr std::array kBufferCopyOffsets = {8, 512}; - - // TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers as there is a bug - // in the implementation of texture-to-buffer copies with depth stencil textures on D3D12. - DAWN_SUPPRESS_TEST_IF(IsD3D12()); constexpr bool kIsRenderable = true; for (uint32_t offset : kBufferCopyOffsets) { DoTest(offset, kIsRenderable); @@ -875,10 +867,6 @@ TEST_P(StencilCopyTests, FromStencilAspect) { // Test copying the stencil-only aspect into a buffer at a non-zero offset TEST_P(StencilCopyTests, FromStencilAspectAtNonZeroOffset) { - // TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers as there is a bug - // in the implementation of texture-to-buffer copies with depth stencil textures on D3D12. - DAWN_SUPPRESS_TEST_IF(IsD3D12()); - constexpr uint32_t kWidth = 4; constexpr uint32_t kHeight = 4; constexpr uint32_t kTestLevel = 0; @@ -905,10 +893,6 @@ TEST_P(StencilCopyTests, ToStencilAspect) { // Test copying to the stencil-aspect of a texture at non-zero offset TEST_P(StencilCopyTests, ToStencilAspectAtNonZeroOffset) { - // TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers as there is a bug - // in the implementation of texture-to-buffer copies with depth stencil textures on D3D12. - DAWN_SUPPRESS_TEST_IF(IsD3D12()); - constexpr std::array kBufferCopyOffsets = {8, 512}; for (uint32_t offset : kBufferCopyOffsets) { DoCopyToStencilTest(offset); @@ -924,19 +908,26 @@ DAWN_INSTANTIATE_TEST_P(DepthStencilCopyTests, utils::kDepthAndStencilFormats.end())); DAWN_INSTANTIATE_TEST_P(DepthCopyTests, - {D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(), - VulkanBackend()}, + {D3D12Backend(), + D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_" + "copy_with_non_zero_buffer_offset"}), + MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()}, std::vector(kValidDepthCopyTextureFormats.begin(), kValidDepthCopyTextureFormats.end())); DAWN_INSTANTIATE_TEST_P(DepthCopyFromBufferTests, - {D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(), - VulkanBackend()}, + {D3D12Backend(), + D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_" + "copy_with_non_zero_buffer_offset"}), + MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()}, std::vector(kValidDepthCopyFromBufferFormats.begin(), kValidDepthCopyFromBufferFormats.end())); DAWN_INSTANTIATE_TEST_P(StencilCopyTests, - {D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(), + {D3D12Backend(), + D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_" + "copy_with_non_zero_buffer_offset"}), + MetalBackend(), OpenGLBackend(), OpenGLESBackend(), // Test with the vulkan_use_s8 toggle forced on and off. VulkanBackend({"vulkan_use_s8"}, {}), VulkanBackend({}, {"vulkan_use_s8"})},