Optimize B2T and T2B copies with multiple texture layers on D3D12

This patch optimizes the implementation of buffer-to-texture and
texture-to-buffer computations on D3D12 backends by computing
TextureCopySplits once for all the 2D texture array layers in the
copy instead of computing it once per layer. You can see the
comments in the function D3D12::ComputeTextureCopySplits() for more
details.

BUG=dawn:453
TEST=dawn_end2end_tests

Change-Id: I1b66d24d2418147957fbe03e2c25144bd043a62e
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24643
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
This commit is contained in:
Jiawei Shao
2020-07-15 08:33:39 +00:00
committed by Commit Bot service account
parent b31f5e717e
commit 3b17f0bde8
6 changed files with 321 additions and 99 deletions

View File

@@ -746,6 +746,60 @@ TEST_P(CopyTests_T2B, Texture2DArrayRegionNonzeroRowsPerImage) {
DoTest(textureSpec, bufferSpec, {kWidth, kHeight, kCopyLayers});
}
// Test a special code path in the D3D12 backends when (BytesPerRow * RowsPerImage) is not a
// multiple of 512.
TEST_P(CopyTests_T2B, Texture2DArrayRegionWithOffsetOddRowsPerImage) {
// TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
// with swiftshader.
DAWN_SKIP_TEST_IF(IsSwiftshader());
constexpr uint32_t kWidth = 64;
constexpr uint32_t kHeight = 128;
constexpr uint32_t kLayers = 8u;
constexpr uint32_t kBaseLayer = 2u;
constexpr uint32_t kCopyLayers = 5u;
constexpr uint32_t kRowsPerImage = kHeight + 1;
TextureSpec textureSpec;
textureSpec.copyOrigin = {0, 0, kBaseLayer};
textureSpec.textureSize = {kWidth, kHeight, kLayers};
textureSpec.level = 0;
BufferSpec bufferSpec = MinimumBufferSpec(kWidth, kRowsPerImage, kCopyLayers, false);
bufferSpec.offset += 128u;
bufferSpec.size += 128u;
bufferSpec.rowsPerImage = kRowsPerImage;
DoTest(textureSpec, bufferSpec, {kWidth, kHeight, kCopyLayers});
}
// Test a special code path in the D3D12 backends when (BytesPerRow * RowsPerImage) is a multiple
// of 512.
TEST_P(CopyTests_T2B, Texture2DArrayRegionWithOffsetEvenRowsPerImage) {
// TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
// with swiftshader.
DAWN_SKIP_TEST_IF(IsSwiftshader());
constexpr uint32_t kWidth = 64;
constexpr uint32_t kHeight = 128;
constexpr uint32_t kLayers = 8u;
constexpr uint32_t kBaseLayer = 2u;
constexpr uint32_t kCopyLayers = 4u;
constexpr uint32_t kRowsPerImage = kHeight + 2;
TextureSpec textureSpec;
textureSpec.copyOrigin = {0, 0, kBaseLayer};
textureSpec.textureSize = {kWidth, kHeight, kLayers};
textureSpec.level = 0;
BufferSpec bufferSpec = MinimumBufferSpec(kWidth, kRowsPerImage, kCopyLayers, false);
bufferSpec.offset += 128u;
bufferSpec.size += 128u;
bufferSpec.rowsPerImage = kRowsPerImage;
DoTest(textureSpec, bufferSpec, {kWidth, kHeight, kCopyLayers});
}
DAWN_INSTANTIATE_TEST(CopyTests_T2B,
D3D12Backend(),
MetalBackend(),
@@ -1094,6 +1148,60 @@ TEST_P(CopyTests_B2T, Texture2DArrayRegionNonzeroRowsPerImage) {
DoTest(textureSpec, bufferSpec, {kWidth, kHeight, kCopyLayers});
}
// Test a special code path in the D3D12 backends when (BytesPerRow * RowsPerImage) is not a
// multiple of 512.
TEST_P(CopyTests_B2T, Texture2DArrayRegionWithOffsetOddRowsPerImage) {
// TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
// with swiftshader.
DAWN_SKIP_TEST_IF(IsSwiftshader());
constexpr uint32_t kWidth = 64;
constexpr uint32_t kHeight = 128;
constexpr uint32_t kLayers = 8u;
constexpr uint32_t kBaseLayer = 2u;
constexpr uint32_t kCopyLayers = 5u;
constexpr uint32_t kRowsPerImage = kHeight + 1;
TextureSpec textureSpec;
textureSpec.copyOrigin = {0, 0, kBaseLayer};
textureSpec.textureSize = {kWidth, kHeight, kLayers};
textureSpec.level = 0;
BufferSpec bufferSpec = MinimumBufferSpec(kWidth, kRowsPerImage, kCopyLayers, false);
bufferSpec.offset += 128u;
bufferSpec.size += 128u;
bufferSpec.rowsPerImage = kRowsPerImage;
DoTest(textureSpec, bufferSpec, {kWidth, kHeight, kCopyLayers});
}
// Test a special code path in the D3D12 backends when (BytesPerRow * RowsPerImage) is a multiple
// of 512.
TEST_P(CopyTests_B2T, Texture2DArrayRegionWithOffsetEvenRowsPerImage) {
// TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
// with swiftshader.
DAWN_SKIP_TEST_IF(IsSwiftshader());
constexpr uint32_t kWidth = 64;
constexpr uint32_t kHeight = 128;
constexpr uint32_t kLayers = 8u;
constexpr uint32_t kBaseLayer = 2u;
constexpr uint32_t kCopyLayers = 5u;
constexpr uint32_t kRowsPerImage = kHeight + 2;
TextureSpec textureSpec;
textureSpec.copyOrigin = {0, 0, kBaseLayer};
textureSpec.textureSize = {kWidth, kHeight, kLayers};
textureSpec.level = 0;
BufferSpec bufferSpec = MinimumBufferSpec(kWidth, kRowsPerImage, kCopyLayers, false);
bufferSpec.offset += 128u;
bufferSpec.size += 128u;
bufferSpec.rowsPerImage = kRowsPerImage;
DoTest(textureSpec, bufferSpec, {kWidth, kHeight, kCopyLayers});
}
DAWN_INSTANTIATE_TEST(CopyTests_B2T,
D3D12Backend(),
MetalBackend(),

View File

@@ -44,7 +44,7 @@ namespace {
};
// Check that each copy region fits inside the buffer footprint
void ValidateFootprints(const TextureCopySplit& copySplit) {
void ValidateFootprints(const Texture2DCopySplit& copySplit) {
for (uint32_t i = 0; i < copySplit.count; ++i) {
const auto& copy = copySplit.copies[i];
ASSERT_LE(copy.bufferOffset.x + copy.copySize.width, copy.bufferSize.width);
@@ -54,7 +54,7 @@ namespace {
}
// Check that the offset is aligned
void ValidateOffset(const TextureCopySplit& copySplit) {
void ValidateOffset(const Texture2DCopySplit& copySplit) {
ASSERT_TRUE(Align(copySplit.offset, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT) ==
copySplit.offset);
}
@@ -64,7 +64,7 @@ namespace {
}
// Check that no pair of copy regions intersect each other
void ValidateDisjoint(const TextureCopySplit& copySplit) {
void ValidateDisjoint(const Texture2DCopySplit& copySplit) {
for (uint32_t i = 0; i < copySplit.count; ++i) {
const auto& a = copySplit.copies[i];
for (uint32_t j = i + 1; j < copySplit.count; ++j) {
@@ -84,7 +84,8 @@ namespace {
}
// Check that the union of the copy regions exactly covers the texture region
void ValidateTextureBounds(const TextureSpec& textureSpec, const TextureCopySplit& copySplit) {
void ValidateTextureBounds(const TextureSpec& textureSpec,
const Texture2DCopySplit& copySplit) {
ASSERT_TRUE(copySplit.count > 0);
uint32_t minX = copySplit.copies[0].textureOffset.x;
@@ -114,7 +115,7 @@ namespace {
// Validate that the number of pixels copied is exactly equal to the number of pixels in the
// texture region
void ValidatePixelCount(const TextureSpec& textureSpec, const TextureCopySplit& copySplit) {
void ValidatePixelCount(const TextureSpec& textureSpec, const Texture2DCopySplit& copySplit) {
uint32_t count = 0;
for (uint32_t i = 0; i < copySplit.count; ++i) {
const auto& copy = copySplit.copies[i];
@@ -126,7 +127,7 @@ namespace {
// Check that every buffer offset is at the correct pixel location
void ValidateBufferOffset(const TextureSpec& textureSpec,
const BufferSpec& bufferSpec,
const TextureCopySplit& copySplit) {
const Texture2DCopySplit& copySplit) {
ASSERT_TRUE(copySplit.count > 0);
uint32_t texelsPerBlock = textureSpec.blockWidth * textureSpec.blockHeight;
@@ -161,7 +162,7 @@ namespace {
void ValidateCopySplit(const TextureSpec& textureSpec,
const BufferSpec& bufferSpec,
const TextureCopySplit& copySplit) {
const Texture2DCopySplit& copySplit) {
ValidateFootprints(copySplit);
ValidateOffset(copySplit);
ValidateDisjoint(copySplit);
@@ -184,7 +185,7 @@ namespace {
return os;
}
std::ostream& operator<<(std::ostream& os, const TextureCopySplit& copySplit) {
std::ostream& operator<<(std::ostream& os, const Texture2DCopySplit& copySplit) {
os << "CopySplit" << std::endl;
for (uint32_t i = 0; i < copySplit.count; ++i) {
const auto& copy = copySplit.copies[i];
@@ -286,14 +287,14 @@ namespace {
class CopySplitTest : public testing::Test {
protected:
TextureCopySplit DoTest(const TextureSpec& textureSpec, const BufferSpec& bufferSpec) {
Texture2DCopySplit DoTest(const TextureSpec& textureSpec, const BufferSpec& bufferSpec) {
ASSERT(textureSpec.width % textureSpec.blockWidth == 0 &&
textureSpec.height % textureSpec.blockHeight == 0);
dawn_native::Format fakeFormat = {};
fakeFormat.blockWidth = textureSpec.blockWidth;
fakeFormat.blockHeight = textureSpec.blockHeight;
fakeFormat.blockByteSize = textureSpec.texelBlockSizeInBytes;
TextureCopySplit copySplit = ComputeTextureCopySplit(
Texture2DCopySplit copySplit = ComputeTextureCopySplit(
{textureSpec.x, textureSpec.y, textureSpec.z},
{textureSpec.width, textureSpec.height, textureSpec.depth}, fakeFormat,
bufferSpec.offset, bufferSpec.bytesPerRow, bufferSpec.rowsPerImage);
@@ -305,7 +306,7 @@ class CopySplitTest : public testing::Test {
TEST_F(CopySplitTest, General) {
for (TextureSpec textureSpec : kBaseTextureSpecs) {
for (BufferSpec bufferSpec : BaseBufferSpecs(textureSpec)) {
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec
@@ -325,7 +326,7 @@ TEST_F(CopySplitTest, TextureWidth) {
}
textureSpec.width = val;
for (BufferSpec bufferSpec : BaseBufferSpecs(textureSpec)) {
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec
@@ -346,7 +347,7 @@ TEST_F(CopySplitTest, TextureHeight) {
}
textureSpec.height = val;
for (BufferSpec bufferSpec : BaseBufferSpecs(textureSpec)) {
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec
@@ -364,7 +365,7 @@ TEST_F(CopySplitTest, TextureX) {
for (uint32_t val : kCheckValues) {
textureSpec.x = val;
for (BufferSpec bufferSpec : BaseBufferSpecs(textureSpec)) {
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec
@@ -382,7 +383,7 @@ TEST_F(CopySplitTest, TextureY) {
for (uint32_t val : kCheckValues) {
textureSpec.y = val;
for (BufferSpec bufferSpec : BaseBufferSpecs(textureSpec)) {
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec
@@ -400,7 +401,7 @@ TEST_F(CopySplitTest, TexelSize) {
for (uint32_t texelSize : {4, 8, 16, 32, 64}) {
textureSpec.texelBlockSizeInBytes = texelSize;
for (BufferSpec bufferSpec : BaseBufferSpecs(textureSpec)) {
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec
@@ -419,7 +420,7 @@ TEST_F(CopySplitTest, BufferOffset) {
for (uint32_t val : kCheckValues) {
bufferSpec.offset = textureSpec.texelBlockSizeInBytes * val;
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec
@@ -439,7 +440,7 @@ TEST_F(CopySplitTest, RowPitch) {
for (uint32_t i = 0; i < 5; ++i) {
bufferSpec.bytesPerRow = baseRowPitch + i * 256;
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec
@@ -459,7 +460,7 @@ TEST_F(CopySplitTest, ImageHeight) {
for (uint32_t i = 0; i < 5; ++i) {
bufferSpec.rowsPerImage = baseImageHeight + i * 256;
TextureCopySplit copySplit = DoTest(textureSpec, bufferSpec);
Texture2DCopySplit copySplit = DoTest(textureSpec, bufferSpec);
if (HasFatalFailure()) {
std::ostringstream message;
message << "Failed generating splits: " << textureSpec << ", " << bufferSpec