D3D12: Workaround buffer-texture copy with depth-stencil on some platforms

On the D3D12 platforms that don't support programmable sample positions,
the source box specifying a portion of the depth texture must all be 0, or
an error and a device lost will occur. This patch adds a workaround for this
issue by splitting the original buffer-texture copy into two copies:
1. copy from the source resource into a temporary buffer at offset 0
2. copy from the temporary buffer at offset 0 into the destination resource.

In the next patch we will fix the corresponding issue in Queue.WriteTexture.

Note that on newer version of D3D12 the restrictions about
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT and D3D12_TEXTURE_DATA_PITCH_ALIGNMENT
have all been lifted out, so the workaround added in this patch will also be
disabled on the platforms that don't support programmable sample positions
but the restrictions about D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT and
D3D12_TEXTURE_DATA_PITCH_ALIGNMENT are no longer available.

Bug: dawn:727
Test: dawn_end2end_test
Change-Id: I9f1d848a0eeac5bd52c9219af6992a2821307746
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/97182
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
Jiawei Shao 2022-08-06 04:36:08 +00:00 committed by Dawn LUCI CQ
parent 40647c72af
commit 4b9ebb1465
10 changed files with 134 additions and 27 deletions

View File

@ -182,9 +182,8 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
"Split texture-to-texture copy into two copies: copy from source texture into a temporary "
"buffer, and copy from the temporary buffer into the destination texture under specific "
"situations. This workaround is by default enabled on some Intel GPUs which have a driver "
"bug "
"in the execution of CopyTextureRegion() when we copy with the formats whose texel block "
"sizes are less than 4 bytes from a greater mip level to a smaller mip level on D3D12 "
"bug in the execution of CopyTextureRegion() when we copy with the formats whose texel "
"block sizes are less than 4 bytes from a greater mip level to a smaller mip level on D3D12 "
"backends.",
"https://crbug.com/1161355"}},
{Toggle::EmitHLSLDebugSymbols,
@ -290,6 +289,12 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
"issue. We can workaround this security issue via allocating extra memory and limiting its "
"access in itself.",
"https://crbug.com/dawn/949"}},
{Toggle::D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset,
{"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_copy_with_non_zero_buffer_offset",
"Split buffer-texture copy into two copies: do first copy with a temporary buffer at offset "
"0, then copy from the temporary buffer to the destination. Now this toggle must be enabled "
"on the D3D12 platforms where programmable MSAA is not supported.",
"https://crbug.com/dawn/727"}},
// Comment to separate the }} so it is clearer what to copy-paste to add a toggle.
}};
} // anonymous namespace

View File

@ -75,6 +75,7 @@ enum class Toggle {
D3D12DontSetClearValueOnDepthTextureCreation,
D3D12AlwaysUseTypelessFormatsForCastableTexture,
D3D12AllocateExtraMemoryFor2DArrayTexture,
D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset,
EnumCount,
InvalidEnum = EnumCount,

View File

@ -225,6 +225,87 @@ MaybeError RecordCopyTextureWithTemporaryBuffer(CommandRecordingContext* recordi
return {};
}
bool ShouldCopyUsingTemporaryBuffer(DeviceBase* device,
const BufferCopy& bufferCopy,
const TextureCopy& textureCopy) {
// Currently we only need the workaround for some D3D12 platforms.
if (device->IsToggleEnabled(
Toggle::D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset)) {
if ((ToBackend(textureCopy.texture)->GetD3D12ResourceFlags() &
D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) &&
bufferCopy.offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT > 0) {
return true;
}
}
return false;
}
MaybeError RecordBufferTextureCopyWithTemporaryBuffer(CommandRecordingContext* recordingContext,
BufferTextureCopyDirection copyDirection,
const BufferCopy& bufferCopy,
const TextureCopy& textureCopy,
const Extent3D& copySize) {
dawn::native::Format format = textureCopy.texture->GetFormat();
const TexelBlockInfo& blockInfo = format.GetAspectInfo(textureCopy.aspect).block;
// Create tempBuffer
// The size of temporary buffer isn't needed to be a multiple of 4 because we don't
// need to set mappedAtCreation to be true.
auto tempBufferSize = ComputeRequiredBytesInCopy(blockInfo, copySize, bufferCopy.bytesPerRow,
bufferCopy.rowsPerImage);
BufferDescriptor tempBufferDescriptor;
tempBufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
tempBufferDescriptor.size = tempBufferSize.AcquireSuccess();
Device* device = ToBackend(textureCopy.texture->GetDevice());
Ref<BufferBase> tempBufferBase;
DAWN_TRY_ASSIGN(tempBufferBase, device->CreateBuffer(&tempBufferDescriptor));
// D3D12 aligns the entire buffer to at least 64KB, so the virtual address of tempBuffer will
// always be aligned to D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT (512).
Ref<Buffer> tempBuffer = ToBackend(std::move(tempBufferBase));
ASSERT(tempBuffer->GetVA() % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT == 0);
BufferCopy tempBufferCopy;
tempBufferCopy.buffer = tempBuffer;
tempBufferCopy.offset = 0;
tempBufferCopy.bytesPerRow = bufferCopy.bytesPerRow;
tempBufferCopy.rowsPerImage = bufferCopy.rowsPerImage;
tempBuffer->TrackUsageAndTransitionNow(recordingContext, wgpu::BufferUsage::CopyDst);
ID3D12GraphicsCommandList* commandList = recordingContext->GetCommandList();
switch (copyDirection) {
case BufferTextureCopyDirection::B2T: {
commandList->CopyBufferRegion(tempBuffer->GetD3D12Resource(), 0,
ToBackend(bufferCopy.buffer)->GetD3D12Resource(),
bufferCopy.offset, tempBufferDescriptor.size);
tempBuffer->TrackUsageAndTransitionNow(recordingContext, wgpu::BufferUsage::CopySrc);
RecordBufferTextureCopy(BufferTextureCopyDirection::B2T,
recordingContext->GetCommandList(), tempBufferCopy, textureCopy,
copySize);
break;
}
case BufferTextureCopyDirection::T2B: {
RecordBufferTextureCopy(BufferTextureCopyDirection::T2B,
recordingContext->GetCommandList(), tempBufferCopy, textureCopy,
copySize);
tempBuffer->TrackUsageAndTransitionNow(recordingContext, wgpu::BufferUsage::CopySrc);
commandList->CopyBufferRegion(ToBackend(bufferCopy.buffer)->GetD3D12Resource(),
bufferCopy.offset, tempBuffer->GetD3D12Resource(), 0,
tempBufferDescriptor.size);
break;
}
default:
UNREACHABLE();
break;
}
// Save tempBuffer into recordingContext
recordingContext->AddToTempBuffers(std::move(tempBuffer));
return {};
}
void RecordNumWorkgroupsForDispatch(ID3D12GraphicsCommandList* commandList,
ComputePipeline* pipeline,
DispatchCmd* dispatch) {
@ -733,6 +814,12 @@ MaybeError CommandBuffer::RecordCommands(CommandRecordingContext* commandContext
texture->TrackUsageAndTransitionNow(commandContext, wgpu::TextureUsage::CopyDst,
subresources);
if (ShouldCopyUsingTemporaryBuffer(GetDevice(), copy->source, copy->destination)) {
DAWN_TRY(RecordBufferTextureCopyWithTemporaryBuffer(
commandContext, BufferTextureCopyDirection::B2T, copy->source,
copy->destination, copy->copySize));
break;
}
RecordBufferTextureCopy(BufferTextureCopyDirection::B2T, commandList, copy->source,
copy->destination, copy->copySize);
@ -760,6 +847,12 @@ MaybeError CommandBuffer::RecordCommands(CommandRecordingContext* commandContext
subresources);
buffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopyDst);
if (ShouldCopyUsingTemporaryBuffer(GetDevice(), copy->destination, copy->source)) {
DAWN_TRY(RecordBufferTextureCopyWithTemporaryBuffer(
commandContext, BufferTextureCopyDirection::T2B, copy->destination,
copy->source, copy->copySize));
break;
}
RecordBufferTextureCopy(BufferTextureCopyDirection::T2B, commandList,
copy->destination, copy->source, copy->copySize);

View File

@ -44,6 +44,12 @@ ResultOrError<D3D12DeviceInfo> GatherDeviceInfo(const Adapter& adapter) {
"ID3D12Device::CheckFeatureSupport"));
info.resourceHeapTier = featureOptions.ResourceHeapTier;
D3D12_FEATURE_DATA_D3D12_OPTIONS2 featureOptions2 = {};
if (SUCCEEDED(adapter.GetDevice()->CheckFeatureSupport(
D3D12_FEATURE_D3D12_OPTIONS2, &featureOptions2, sizeof(featureOptions2)))) {
info.programmableSamplePositionsTier = featureOptions2.ProgrammableSamplePositionsTier;
}
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureOptions3 = {};
if (SUCCEEDED(adapter.GetDevice()->CheckFeatureSupport(
D3D12_FEATURE_D3D12_OPTIONS3, &featureOptions3, sizeof(featureOptions3)))) {

View File

@ -35,6 +35,7 @@ struct D3D12DeviceInfo {
bool supportsSharedResourceCapabilityTier1;
bool supportsDP4a;
bool supportsCastingFullyTypedFormat;
uint32_t programmableSamplePositionsTier;
};
ResultOrError<D3D12DeviceInfo> GatherDeviceInfo(const Adapter& adapter);

View File

@ -640,6 +640,12 @@ void Device::InitTogglesFromDriver() {
SetToggle(Toggle::D3D12AlwaysUseTypelessFormatsForCastableTexture,
!GetDeviceInfo().supportsCastingFullyTypedFormat);
// The restriction on the source box specifying a portion of the depth stencil texture in
// CopyTextureRegion() is only available on the D3D12 platforms which doesn't support
// programmable sample positions.
SetToggle(Toggle::D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset,
GetDeviceInfo().programmableSamplePositionsTier == 0);
// Disable optimizations when using FXC
// See https://crbug.com/dawn/1203
SetToggle(Toggle::FxcOptimizations, false);
@ -673,6 +679,8 @@ void Device::InitTogglesFromDriver() {
// Currently this workaround is needed on any D3D12 backend for some particular situations.
// But we may need to limit it if D3D12 runtime fixes the bug on its new release. See
// https://crbug.com/dawn/1289 for more information.
// TODO(dawn:1289): Unset this toggle when we skip the split on the buffer-texture copy
// on the platforms where UnrestrictedBufferTextureCopyPitchSupported is true.
SetToggle(Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings, true);
// This workaround is only needed on Intel Gen12LP with driver prior to 30.0.101.1960.

View File

@ -706,6 +706,10 @@ ID3D12Resource* Texture::GetD3D12Resource() const {
return mResourceAllocation.GetD3D12Resource();
}
D3D12_RESOURCE_FLAGS Texture::GetD3D12ResourceFlags() const {
return mD3D12ResourceFlags;
}
DXGI_FORMAT Texture::GetD3D12CopyableSubresourceFormat(Aspect aspect) const {
ASSERT(GetFormat().aspects & aspect);

View File

@ -58,6 +58,7 @@ class Texture final : public TextureBase {
DXGI_FORMAT GetD3D12Format() const;
ID3D12Resource* GetD3D12Resource() const;
DXGI_FORMAT GetD3D12CopyableSubresourceFormat(Aspect aspect) const;
D3D12_RESOURCE_FLAGS GetD3D12ResourceFlags() const;
D3D12_RENDER_TARGET_VIEW_DESC GetRTVDescriptor(const Format& format,
uint32_t mipLevel,

View File

@ -1028,9 +1028,6 @@ TEST_P(CopyTests_T2B, CopyOneRowWithDepth32Float) {
// depth.
DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("disable_depth_read"));
// TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers.
DAWN_SUPPRESS_TEST_IF(IsD3D12());
constexpr wgpu::TextureFormat kFormat = wgpu::TextureFormat::Depth32Float;
constexpr uint32_t kPixelsPerRow = 4u;

View File

@ -543,10 +543,6 @@ TEST_P(DepthCopyTests, FromDepthAspect) {
// Test copying the depth-only aspect into a buffer at a non-zero offset.
TEST_P(DepthCopyTests, FromDepthAspectToBufferAtNonZeroOffset) {
// TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers as there is a bug
// in the implementation of texture-to-buffer copies with depth stencil textures on D3D12.
DAWN_SUPPRESS_TEST_IF(IsD3D12());
constexpr float kInitDepth = 0.2f;
constexpr uint32_t kWidth = 4;
constexpr uint32_t kHeight = 4;
@ -661,10 +657,6 @@ TEST_P(DepthCopyFromBufferTests, BufferToNonRenderableDepthAspectAtNonZeroOffset
// Test copying the depth-only aspect from a buffer at a non-zero offset.
TEST_P(DepthCopyFromBufferTests, BufferToRenderableDepthAspectAtNonZeroOffset) {
constexpr std::array<uint32_t, 2> kBufferCopyOffsets = {8, 512};
// TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers as there is a bug
// in the implementation of texture-to-buffer copies with depth stencil textures on D3D12.
DAWN_SUPPRESS_TEST_IF(IsD3D12());
constexpr bool kIsRenderable = true;
for (uint32_t offset : kBufferCopyOffsets) {
DoTest(offset, kIsRenderable);
@ -875,10 +867,6 @@ TEST_P(StencilCopyTests, FromStencilAspect) {
// Test copying the stencil-only aspect into a buffer at a non-zero offset
TEST_P(StencilCopyTests, FromStencilAspectAtNonZeroOffset) {
// TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers as there is a bug
// in the implementation of texture-to-buffer copies with depth stencil textures on D3D12.
DAWN_SUPPRESS_TEST_IF(IsD3D12());
constexpr uint32_t kWidth = 4;
constexpr uint32_t kHeight = 4;
constexpr uint32_t kTestLevel = 0;
@ -905,10 +893,6 @@ TEST_P(StencilCopyTests, ToStencilAspect) {
// Test copying to the stencil-aspect of a texture at non-zero offset
TEST_P(StencilCopyTests, ToStencilAspectAtNonZeroOffset) {
// TODO(crbug.com/dawn/727): currently this test fails on many D3D12 drivers as there is a bug
// in the implementation of texture-to-buffer copies with depth stencil textures on D3D12.
DAWN_SUPPRESS_TEST_IF(IsD3D12());
constexpr std::array<uint32_t, 2> kBufferCopyOffsets = {8, 512};
for (uint32_t offset : kBufferCopyOffsets) {
DoCopyToStencilTest(offset);
@ -924,19 +908,26 @@ DAWN_INSTANTIATE_TEST_P(DepthStencilCopyTests,
utils::kDepthAndStencilFormats.end()));
DAWN_INSTANTIATE_TEST_P(DepthCopyTests,
{D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(),
VulkanBackend()},
{D3D12Backend(),
D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_"
"copy_with_non_zero_buffer_offset"}),
MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()},
std::vector<wgpu::TextureFormat>(kValidDepthCopyTextureFormats.begin(),
kValidDepthCopyTextureFormats.end()));
DAWN_INSTANTIATE_TEST_P(DepthCopyFromBufferTests,
{D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(),
VulkanBackend()},
{D3D12Backend(),
D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_"
"copy_with_non_zero_buffer_offset"}),
MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()},
std::vector<wgpu::TextureFormat>(kValidDepthCopyFromBufferFormats.begin(),
kValidDepthCopyFromBufferFormats.end()));
DAWN_INSTANTIATE_TEST_P(StencilCopyTests,
{D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(),
{D3D12Backend(),
D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_"
"copy_with_non_zero_buffer_offset"}),
MetalBackend(), OpenGLBackend(), OpenGLESBackend(),
// Test with the vulkan_use_s8 toggle forced on and off.
VulkanBackend({"vulkan_use_s8"}, {}),
VulkanBackend({}, {"vulkan_use_s8"})},