D3D12: work around buffer size insufficient issue for copy

D3D12 requires more buffer storage than it should. If the buffer used
for B2T/T2B copy is big enough according to WebGPU spec but it doesn't
meet D3D12's requirement, we need to workaround it via split the copy
into two copies, in order to make B2T/T2B copy being done correctly on
D3D12.

Bug: dawn:1289, dawn:1278

Change-Id: Id88c580dce00553845817677a2d356690ef4acfe
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/86860
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Yunchao He <yunchao.he@intel.com>
This commit is contained in:
Yunchao He 2022-04-20 16:18:05 +00:00 committed by Dawn LUCI CQ
parent b3483b68f7
commit 803f033392
7 changed files with 119 additions and 25 deletions

View File

@ -111,13 +111,6 @@ namespace dawn::native {
static_cast<uint64_t>(maxStart); static_cast<uint64_t>(maxStart);
} }
template <typename A, typename B>
DAWN_FORCE_INLINE uint64_t Safe32x32(A a, B b) {
static_assert(std::is_same<A, uint32_t>::value, "'a' must be uint32_t");
static_assert(std::is_same<B, uint32_t>::value, "'b' must be uint32_t");
return uint64_t(a) * uint64_t(b);
}
ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo, ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo,
const Extent3D& copySize, const Extent3D& copySize,
uint32_t bytesPerRow, uint32_t bytesPerRow,

View File

@ -38,6 +38,13 @@ namespace dawn::native {
uint64_t bufferOffset, uint64_t bufferOffset,
uint64_t size); uint64_t size);
template <typename A, typename B>
DAWN_FORCE_INLINE uint64_t Safe32x32(A a, B b) {
static_assert(std::is_same<A, uint32_t>::value, "'a' must be uint32_t");
static_assert(std::is_same<B, uint32_t>::value, "'b' must be uint32_t");
return uint64_t(a) * uint64_t(b);
}
ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo, ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockInfo,
const Extent3D& copySize, const Extent3D& copySize,
uint32_t bytesPerRow, uint32_t bytesPerRow,

View File

@ -259,6 +259,15 @@ namespace dawn::native {
"Initialize workgroup memory with OpConstantNull on Vulkan when the Vulkan extension " "Initialize workgroup memory with OpConstantNull on Vulkan when the Vulkan extension "
"VK_KHR_zero_initialize_workgroup_memory is supported.", "VK_KHR_zero_initialize_workgroup_memory is supported.",
"https://crbug.com/dawn/1302"}}, "https://crbug.com/dawn/1302"}},
{Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings,
{"d3d12_split_buffer_texture_copy_for_rows_per_image_paddings",
"D3D12 requires more buffer storage than it should when rowsPerImage is greater than "
"copyHeight, which means there are pure padding row(s) on each image. In this "
"situation, the buffer used for B2T/T2B copy might be big enough according to "
"WebGPU's spec but it doesn't meet D3D12's requirement, then we need to workaround "
"it via split the copy operation into two copies, in order to make B2T/T2B copy "
"being done correctly on D3D12.",
"https://crbug.com/dawn/1289"}},
// Comment to separate the }} so it is clearer what to copy-paste to add a toggle. // Comment to separate the }} so it is clearer what to copy-paste to add a toggle.
}}; }};

View File

@ -67,6 +67,7 @@ namespace dawn::native {
RecordDetailedTimingInTraceEvents, RecordDetailedTimingInTraceEvents,
DisableTimestampQueryConversion, DisableTimestampQueryConversion,
VulkanUseZeroInitializeWorkgroupMemoryExtension, VulkanUseZeroInitializeWorkgroupMemoryExtension,
D3D12SplitBufferTextureCopyForRowsPerImagePaddings,
EnumCount, EnumCount,
InvalidEnum = EnumCount, InvalidEnum = EnumCount,

View File

@ -612,6 +612,11 @@ namespace dawn::native::d3d12 {
true); true);
} }
} }
// Currently this workaround is needed on any D3D12 backend for some particular situations.
// But we may need to limit it if D3D12 runtime fixes the bug on its new release. See
// https://crbug.com/dawn/1289 for more information.
SetToggle(Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings, true);
} }
MaybeError Device::WaitForIdleForDestruction() { MaybeError Device::WaitForIdleForDestruction() {

View File

@ -19,6 +19,7 @@
#include <utility> #include <utility>
#include "dawn/common/Assert.h" #include "dawn/common/Assert.h"
#include "dawn/native/CommandValidation.h"
#include "dawn/native/Format.h" #include "dawn/native/Format.h"
#include "dawn/native/d3d12/BufferD3D12.h" #include "dawn/native/d3d12/BufferD3D12.h"
#include "dawn/native/d3d12/CommandRecordingContext.h" #include "dawn/native/d3d12/CommandRecordingContext.h"
@ -27,6 +28,63 @@
namespace dawn::native::d3d12 { namespace dawn::native::d3d12 {
namespace {
uint64_t RequiredCopySizeByD3D12(const uint32_t bytesPerRow,
const uint32_t rowsPerImage,
const Extent3D& copySize,
const TexelBlockInfo& blockInfo) {
uint64_t bytesPerImage = Safe32x32(bytesPerRow, rowsPerImage);
// Required copy size for B2T/T2B copy on D3D12 is smaller than (but very close to)
// depth * bytesPerImage. The latter is already checked at ComputeRequiredBytesInCopy()
// in CommandValidation.cpp.
uint64_t requiredCopySizeByD3D12 = bytesPerImage * (copySize.depthOrArrayLayers - 1);
// When calculating the required copy size for B2T/T2B copy, D3D12 doesn't respect
// rowsPerImage paddings on the last image for 3D texture, but it does respect
// bytesPerRow paddings on the last row.
ASSERT(blockInfo.width == 1);
ASSERT(blockInfo.height == 1);
uint64_t lastRowBytes = Safe32x32(blockInfo.byteSize, copySize.width);
ASSERT(rowsPerImage > copySize.height);
uint64_t lastImageBytesByD3D12 =
Safe32x32(bytesPerRow, rowsPerImage - 1) + lastRowBytes;
requiredCopySizeByD3D12 += lastImageBytesByD3D12;
return requiredCopySizeByD3D12;
}
// This function is used to access whether we need a workaround for D3D12's wrong algorithm
// of calculating required buffer size for B2T/T2B copy. The workaround is needed only when
// - The corresponding toggle is enabled.
// - It is a 3D texture (so the format is uncompressed).
// - There are multiple depth images to be copied (copySize.depthOrArrayLayers > 1).
// - It has rowsPerImage paddings (rowsPerImage > copySize.height).
// - The buffer size doesn't meet D3D12's requirement.
bool NeedBufferSizeWorkaroundForBufferTextureCopyOnD3D12(const BufferCopy& bufferCopy,
const TextureCopy& textureCopy,
const Extent3D& copySize) {
TextureBase* texture = textureCopy.texture.Get();
Device* device = ToBackend(texture->GetDevice());
if (!device->IsToggleEnabled(
Toggle::D3D12SplitBufferTextureCopyForRowsPerImagePaddings) ||
texture->GetDimension() != wgpu::TextureDimension::e3D ||
copySize.depthOrArrayLayers <= 1 || bufferCopy.rowsPerImage <= copySize.height) {
return false;
}
const TexelBlockInfo& blockInfo =
texture->GetFormat().GetAspectInfo(textureCopy.aspect).block;
uint64_t requiredCopySizeByD3D12 = RequiredCopySizeByD3D12(
bufferCopy.bytesPerRow, bufferCopy.rowsPerImage, copySize, blockInfo);
return bufferCopy.buffer->GetAllocatedSize() - bufferCopy.offset <
requiredCopySizeByD3D12;
}
} // anonymous namespace
ResultOrError<std::wstring> ConvertStringToWstring(const char* str) { ResultOrError<std::wstring> ConvertStringToWstring(const char* str) {
size_t len = strlen(str); size_t len = strlen(str);
if (len == 0) { if (len == 0) {
@ -285,8 +343,36 @@ namespace dawn::native::d3d12 {
const BufferCopy& bufferCopy, const BufferCopy& bufferCopy,
const TextureCopy& textureCopy, const TextureCopy& textureCopy,
const Extent3D& copySize) { const Extent3D& copySize) {
RecordBufferTextureCopyWithBufferHandle(direction, commandList, ID3D12Resource* bufferResource = ToBackend(bufferCopy.buffer)->GetD3D12Resource();
ToBackend(bufferCopy.buffer)->GetD3D12Resource(),
if (NeedBufferSizeWorkaroundForBufferTextureCopyOnD3D12(bufferCopy, textureCopy,
copySize)) {
// Split the copy into two copies if the size of bufferCopy.buffer doesn't meet D3D12's
// requirement and a workaround is needed:
// - The first copy will copy all depth images but the last depth image,
// - The second copy will copy the last depth image.
Extent3D extentForAllButTheLastImage = copySize;
extentForAllButTheLastImage.depthOrArrayLayers -= 1;
RecordBufferTextureCopyWithBufferHandle(
direction, commandList, bufferResource, bufferCopy.offset, bufferCopy.bytesPerRow,
bufferCopy.rowsPerImage, textureCopy, extentForAllButTheLastImage);
Extent3D extentForTheLastImage = copySize;
extentForTheLastImage.depthOrArrayLayers = 1;
TextureCopy textureCopyForTheLastImage = textureCopy;
textureCopyForTheLastImage.origin.z += copySize.depthOrArrayLayers - 1;
uint64_t copiedBytes = bufferCopy.bytesPerRow * bufferCopy.rowsPerImage *
(copySize.depthOrArrayLayers - 1);
RecordBufferTextureCopyWithBufferHandle(
direction, commandList, bufferResource, bufferCopy.offset + copiedBytes,
bufferCopy.bytesPerRow, bufferCopy.rowsPerImage, textureCopyForTheLastImage,
extentForTheLastImage);
return;
}
RecordBufferTextureCopyWithBufferHandle(direction, commandList, bufferResource,
bufferCopy.offset, bufferCopy.bytesPerRow, bufferCopy.offset, bufferCopy.bytesPerRow,
bufferCopy.rowsPerImage, textureCopy, copySize); bufferCopy.rowsPerImage, textureCopy, copySize);
} }

View File

@ -178,10 +178,6 @@ TEST_P(RequiredBufferSizeInCopyTests, BufferSizeOnBoundary) {
kBytesPerRow * (rowsPerImage - 1) + kBytesPerBlock * copySize.width; kBytesPerRow * (rowsPerImage - 1) + kBytesPerBlock * copySize.width;
DoTest(size, copySize, rowsPerImage); DoTest(size, copySize, rowsPerImage);
// TODO(crbug.com/dawn/1278, 1288, 1289): Required buffer size for copy is wrong on D3D12.
bool mayNeedWorkaround = GetParam().mTextureDimension == wgpu::TextureDimension::e3D &&
extraRowsPerImage > 0 && copySize.depthOrArrayLayers > 1;
DAWN_SUPPRESS_TEST_IF(IsD3D12() && mayNeedWorkaround);
size -= kBytesPerBlock; size -= kBytesPerBlock;
DoTest(size, copySize, rowsPerImage); DoTest(size, copySize, rowsPerImage);
} }
@ -190,25 +186,22 @@ TEST_P(RequiredBufferSizeInCopyTests, BufferSizeOnBoundary) {
// The buffer doesn't have storage for any paddings on the last image. WebGPU spec doesn't require // The buffer doesn't have storage for any paddings on the last image. WebGPU spec doesn't require
// storage for these paddings, and the copy operation will never access to these paddings. So it // storage for these paddings, and the copy operation will never access to these paddings. So it
// should work. // should work.
TEST_P(RequiredBufferSizeInCopyTests, MininumBufferSize) { TEST_P(RequiredBufferSizeInCopyTests, MinimumBufferSize) {
wgpu::Extent3D copySize = kCopySize; wgpu::Extent3D copySize = kCopySize;
copySize.depthOrArrayLayers = GetParam().mCopyDepth; copySize.depthOrArrayLayers = GetParam().mCopyDepth;
const uint64_t extraRowsPerImage = GetParam().mExtraRowsPerImage; const uint64_t extraRowsPerImage = GetParam().mExtraRowsPerImage;
const uint64_t rowsPerImage = extraRowsPerImage + copySize.height; const uint64_t rowsPerImage = extraRowsPerImage + copySize.height;
// TODO(crbug.com/dawn/1278, 1288, 1289): Required buffer size for copy is wrong on D3D12.
bool mayNeedWorkaround = GetParam().mTextureDimension == wgpu::TextureDimension::e3D &&
extraRowsPerImage > 0 && copySize.depthOrArrayLayers > 1;
DAWN_SUPPRESS_TEST_IF(IsD3D12() && mayNeedWorkaround);
uint64_t size = uint64_t size =
kOffset + utils::RequiredBytesInCopy(kBytesPerRow, rowsPerImage, copySize, kFormat); kOffset + utils::RequiredBytesInCopy(kBytesPerRow, rowsPerImage, copySize, kFormat);
DoTest(size, copySize, rowsPerImage); DoTest(size, copySize, rowsPerImage);
} }
DAWN_INSTANTIATE_TEST_P(RequiredBufferSizeInCopyTests, DAWN_INSTANTIATE_TEST_P(
{D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(), RequiredBufferSizeInCopyTests,
VulkanBackend()}, {D3D12Backend(), D3D12Backend({"d3d12_split_buffer_texture_copy_for_rows_per_image_paddings"}),
{Type::T2BCopy, Type::B2TCopy}, MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()},
{wgpu::TextureDimension::e3D, wgpu::TextureDimension::e2D}, {Type::T2BCopy, Type::B2TCopy},
{2u, 1u}, {wgpu::TextureDimension::e3D, wgpu::TextureDimension::e2D},
{1u, 0u}); {2u, 1u},
{1u, 0u});