From ea2d558479e03ce1f4edeb69dead8976b0dfb1c6 Mon Sep 17 00:00:00 2001
From: Jiawei Shao <jiawei.shao@intel.com>
Date: Wed, 10 Jul 2019 23:58:13 +0000
Subject: [PATCH] Support BC5 formats on Metal

This patch adds the support of BC5 formats on Metal and the related
dawn_end2end_tests to verify Dawn works correctly when (bufferSize -
bufferOffset < bytesPerImage * copyExtent.depth), which is the special
case of buffer-to-texture and texture-to-buffer copies on Metal.

BUG=dawn:42
TEST=dawn_end2end_tests

Change-Id: I27c384d0d8d2bb908f1ad15c2451fd23c1313598
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/8720
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
---
 src/dawn_native/metal/CommandBufferMTL.mm     | 68 +++++++++++++------
 src/dawn_native/metal/TextureMTL.mm           |  6 ++
 .../end2end/CompressedTextureFormatTests.cpp  | 61 +++++++++++++++--
 3 files changed, 111 insertions(+), 24 deletions(-)

diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm
index 74efdebb05..efb3555e1b 100644
--- a/src/dawn_native/metal/CommandBufferMTL.mm
+++ b/src/dawn_native/metal/CommandBufferMTL.mm
@@ -314,6 +314,7 @@ namespace dawn_native { namespace metal {
         TextureBufferCopySplit ComputeTextureBufferCopySplit(Origin3D origin,
                                                              Extent3D copyExtent,
                                                              Format textureFormat,
+                                                             Extent3D virtualSizeAtLevel,
                                                              uint64_t bufferSize,
                                                              uint64_t bufferOffset,
                                                              uint32_t rowPitch,
@@ -323,9 +324,10 @@ namespace dawn_native { namespace metal {
             // When copying textures from/to an unpacked buffer, the Metal validation layer doesn't
             // compute the correct range when checking if the buffer is big enough to contain the
             // data for the whole copy. Instead of looking at the position of the last texel in the
-            // buffer, it computes the volume of the 3D box with rowPitch * imageHeight *
-            // copySize.depth. For example considering the pixel buffer below where in memory, each
-            // row data (D) of the texture is followed by some padding data (P):
+            // buffer, it computes the volume of the 3D box with rowPitch * (imageHeight /
+            // format.blockHeight) * copySize.depth. For example considering the pixel buffer below
+            // where in memory, each row data (D) of the texture is followed by some padding data
+            // (P):
             //     |DDDDDDD|PP|
             //     |DDDDDDD|PP|
             //     |DDDDDDD|PP|
@@ -336,7 +338,21 @@ namespace dawn_native { namespace metal {
 
             // We work around this limitation by detecting when Metal would complain and copy the
             // last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
-            uint32_t bytesPerImage = rowPitch * imageHeight;
+            uint32_t rowPitchCountPerImage = imageHeight / textureFormat.blockHeight;
+            uint32_t bytesPerImage = rowPitch * rowPitchCountPerImage;
+
+            // Metal validation layer requires that if the texture's pixel format is a compressed
+            // format, the sourceSize must be a multiple of the pixel format's block size or be
+            // clamped to the edge of the texture if the block extends outside the bounds of a
+            // texture.
+            uint32_t clampedCopyExtentWidth =
+                (origin.x + copyExtent.width > virtualSizeAtLevel.width)
+                    ? (virtualSizeAtLevel.width - origin.x)
+                    : copyExtent.width;
+            uint32_t clampedCopyExtentHeight =
+                (origin.y + copyExtent.height > virtualSizeAtLevel.height)
+                    ? (virtualSizeAtLevel.height - origin.y)
+                    : copyExtent.height;
 
             // Check whether buffer size is big enough.
             bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth;
@@ -347,7 +363,7 @@ namespace dawn_native { namespace metal {
                 copy.copies[0].bytesPerImage = bytesPerImage;
                 copy.copies[0].textureOrigin = MTLOriginMake(origin.x, origin.y, origin.z);
                 copy.copies[0].copyExtent =
-                    MTLSizeMake(copyExtent.width, copyExtent.height, copyExtent.depth);
+                    MTLSizeMake(clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth);
                 return copy;
             }
 
@@ -359,8 +375,8 @@ namespace dawn_native { namespace metal {
                 copy.copies[copy.count].bytesPerRow = rowPitch;
                 copy.copies[copy.count].bytesPerImage = bytesPerImage;
                 copy.copies[copy.count].textureOrigin = MTLOriginMake(origin.x, origin.y, origin.z);
-                copy.copies[copy.count].copyExtent =
-                    MTLSizeMake(copyExtent.width, copyExtent.height, copyExtent.depth - 1);
+                copy.copies[copy.count].copyExtent = MTLSizeMake(
+                    clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth - 1);
 
                 ++copy.count;
 
@@ -369,30 +385,40 @@ namespace dawn_native { namespace metal {
             }
 
             // Doing all the copy in last image except the last row.
-            if (copyExtent.height > 1) {
+            uint32_t copyBlockRowCount = copyExtent.height / textureFormat.blockHeight;
+            if (copyBlockRowCount > 1) {
                 copy.copies[copy.count].bufferOffset = currentOffset;
                 copy.copies[copy.count].bytesPerRow = rowPitch;
-                copy.copies[copy.count].bytesPerImage = rowPitch * (imageHeight - 1);
+                copy.copies[copy.count].bytesPerImage = rowPitch * (copyBlockRowCount - 1);
                 copy.copies[copy.count].textureOrigin =
                     MTLOriginMake(origin.x, origin.y, origin.z + copyExtent.depth - 1);
-                copy.copies[copy.count].copyExtent =
-                    MTLSizeMake(copyExtent.width, copyExtent.height - 1, 1);
+
+                ASSERT(copyExtent.height - textureFormat.blockHeight < virtualSizeAtLevel.height);
+                copy.copies[copy.count].copyExtent = MTLSizeMake(
+                    clampedCopyExtentWidth, copyExtent.height - textureFormat.blockHeight, 1);
 
                 ++copy.count;
 
                 // Update offset to copy to the last row.
-                currentOffset += (copyExtent.height - 1) * rowPitch;
+                currentOffset += (copyBlockRowCount - 1) * rowPitch;
             }
 
             // Doing the last row copy with the exact number of bytes in last row.
             // Workaround this issue in a way just like the copy to a 1D texture.
-            uint32_t lastRowDataSize = copyExtent.width * textureFormat.blockByteSize;
+            uint32_t lastRowDataSize =
+                (copyExtent.width / textureFormat.blockWidth) * textureFormat.blockByteSize;
+            uint32_t lastRowCopyExtentHeight =
+                textureFormat.blockHeight + clampedCopyExtentHeight - copyExtent.height;
+            ASSERT(lastRowCopyExtentHeight <= textureFormat.blockHeight);
+
             copy.copies[copy.count].bufferOffset = currentOffset;
             copy.copies[copy.count].bytesPerRow = lastRowDataSize;
             copy.copies[copy.count].bytesPerImage = lastRowDataSize;
-            copy.copies[copy.count].textureOrigin = MTLOriginMake(
-                origin.x, origin.y + copyExtent.height - 1, origin.z + copyExtent.depth - 1);
-            copy.copies[copy.count].copyExtent = MTLSizeMake(copyExtent.width, 1, 1);
+            copy.copies[copy.count].textureOrigin =
+                MTLOriginMake(origin.x, origin.y + copyExtent.height - textureFormat.blockHeight,
+                              origin.z + copyExtent.depth - 1);
+            copy.copies[copy.count].copyExtent =
+                MTLSizeMake(clampedCopyExtentWidth, lastRowCopyExtentHeight, 1);
             ++copy.count;
 
             return copy;
@@ -446,9 +472,10 @@ namespace dawn_native { namespace metal {
                     Buffer* buffer = ToBackend(src.buffer.Get());
                     Texture* texture = ToBackend(dst.texture.Get());
 
+                    Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(dst.mipLevel);
                     TextureBufferCopySplit splittedCopies = ComputeTextureBufferCopySplit(
-                        dst.origin, copySize, texture->GetFormat(), buffer->GetSize(), src.offset,
-                        src.rowPitch, src.imageHeight);
+                        dst.origin, copySize, texture->GetFormat(), virtualSizeAtLevel,
+                        buffer->GetSize(), src.offset, src.rowPitch, src.imageHeight);
 
                     encoders.EnsureBlit(commandBuffer);
                     for (uint32_t i = 0; i < splittedCopies.count; ++i) {
@@ -473,9 +500,10 @@ namespace dawn_native { namespace metal {
                     Texture* texture = ToBackend(src.texture.Get());
                     Buffer* buffer = ToBackend(dst.buffer.Get());
 
+                    Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(src.mipLevel);
                     TextureBufferCopySplit splittedCopies = ComputeTextureBufferCopySplit(
-                        src.origin, copySize, texture->GetFormat(), buffer->GetSize(), dst.offset,
-                        dst.rowPitch, dst.imageHeight);
+                        src.origin, copySize, texture->GetFormat(), virtualSizeAtLevel,
+                        buffer->GetSize(), dst.offset, dst.rowPitch, dst.imageHeight);
 
                     encoders.EnsureBlit(commandBuffer);
                     for (uint32_t i = 0; i < splittedCopies.count; ++i) {
diff --git a/src/dawn_native/metal/TextureMTL.mm b/src/dawn_native/metal/TextureMTL.mm
index 0efe780e17..900d762921 100644
--- a/src/dawn_native/metal/TextureMTL.mm
+++ b/src/dawn_native/metal/TextureMTL.mm
@@ -215,6 +215,12 @@ namespace dawn_native { namespace metal {
             case dawn::TextureFormat::Depth24PlusStencil8:
                 return MTLPixelFormatDepth32Float_Stencil8;
 
+            // TODO(jiawei.shao@intel.com): support all BC formats
+            case dawn::TextureFormat::BC5RGSnorm:
+                return MTLPixelFormatBC5_RGSnorm;
+            case dawn::TextureFormat::BC5RGUnorm:
+                return MTLPixelFormatBC5_RGUnorm;
+
             default:
                 UNREACHABLE();
         }
diff --git a/src/tests/end2end/CompressedTextureFormatTests.cpp b/src/tests/end2end/CompressedTextureFormatTests.cpp
index 704af0ffdf..d44c196c94 100644
--- a/src/tests/end2end/CompressedTextureFormatTests.cpp
+++ b/src/tests/end2end/CompressedTextureFormatTests.cpp
@@ -55,6 +55,7 @@ struct CopyConfig {
     uint32_t baseArrayLayer = 0;
     uint32_t bufferOffset = 0;
     uint32_t rowPitchAlignment = kTextureRowPitchAlignment;
+    uint32_t imageHeight = 0;
 };
 
 class CompressedTextureBCFormatTest : public DawnTest {
@@ -102,8 +103,9 @@ class CompressedTextureBCFormatTest : public DawnTest {
         // Copy texture data from a staging buffer to the destination texture.
         dawn::Buffer stagingBuffer = utils::CreateBufferFromData(
             device, uploadData.data(), uploadBufferSize, dawn::BufferUsageBit::CopySrc);
-        dawn::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(
-            stagingBuffer, copyConfig.bufferOffset, copyConfig.rowPitchAlignment, 0);
+        dawn::BufferCopyView bufferCopyView =
+            utils::CreateBufferCopyView(stagingBuffer, copyConfig.bufferOffset,
+                                        copyConfig.rowPitchAlignment, copyConfig.imageHeight);
         dawn::TextureCopyView textureCopyView =
             utils::CreateTextureCopyView(bcCompressedTexture, copyConfig.baseMipmapLevel,
                                          copyConfig.baseArrayLayer, copyConfig.copyOrigin3D);
@@ -654,5 +656,56 @@ TEST_P(CompressedTextureBCFormatTest, RowPitchEqualToSlicePitch) {
     }
 }
 
-// TODO(jiawei.shao@intel.com): support BC formats on Metal and OpenGL backend
-DAWN_INSTANTIATE_TEST(CompressedTextureBCFormatTest, D3D12Backend, VulkanBackend);
+// Test the workaround in the B2T copies when (bufferSize - bufferOffset < bytesPerImage *
+// copyExtent.depth) on Metal backends. As copyExtent.depth can only be 1 for BC formats, on Metal
+// backend we will use two copies to implement such copy.
+TEST_P(CompressedTextureBCFormatTest, LargeImageHeight) {
+    CopyConfig config;
+    config.textureWidthLevel0 = 8;
+    config.textureHeightLevel0 = 8;
+    config.copyExtent3D = {config.textureWidthLevel0, config.textureHeightLevel0, 1};
+
+    config.imageHeight = config.textureHeightLevel0 * 2;
+
+    for (dawn::TextureFormat format : kBCFormats) {
+        config.format = format;
+        TestCopyRegionIntoBCFormatTextures(config);
+    }
+}
+
+// Test the workaround in the B2T copies when (bufferSize - bufferOffset < bytesPerImage *
+// copyExtent.depth) and copyExtent needs to be clamped.
+TEST_P(CompressedTextureBCFormatTest, LargeImageHeightAndClampedCopyExtent) {
+    CopyConfig config;
+    config.textureHeightLevel0 = 56;
+    config.textureWidthLevel0 = 56;
+    config.rowPitchAlignment = kTextureRowPitchAlignment;
+
+    constexpr uint32_t kMipmapLevelCount = 3;
+    config.mipmapLevelCount = kMipmapLevelCount;
+    config.baseMipmapLevel = kMipmapLevelCount - 1;
+
+    // The actual size of the texture at mipmap level == 2 is not a multiple of 4, paddings are
+    // required in the copies.
+    const uint32_t kActualWidthAtLevel = config.textureWidthLevel0 >> config.baseMipmapLevel;
+    const uint32_t kActualHeightAtLevel = config.textureHeightLevel0 >> config.baseMipmapLevel;
+    ASSERT(kActualWidthAtLevel % kBCBlockWidthInTexels != 0);
+    ASSERT(kActualHeightAtLevel % kBCBlockHeightInTexels != 0);
+
+    const uint32_t kCopyWidthAtLevel = (kActualWidthAtLevel + kBCBlockWidthInTexels - 1) /
+                                       kBCBlockWidthInTexels * kBCBlockWidthInTexels;
+    const uint32_t kCopyHeightAtLevel = (kActualHeightAtLevel + kBCBlockHeightInTexels - 1) /
+                                        kBCBlockHeightInTexels * kBCBlockHeightInTexels;
+
+    config.copyExtent3D = {kCopyWidthAtLevel, kCopyHeightAtLevel, 1};
+
+    config.imageHeight = kCopyHeightAtLevel * 2;
+
+    for (dawn::TextureFormat format : kBCFormats) {
+        config.format = format;
+        TestCopyRegionIntoBCFormatTextures(config);
+    }
+}
+
+// TODO(jiawei.shao@intel.com): support BC formats on OpenGL backend
+DAWN_INSTANTIATE_TEST(CompressedTextureBCFormatTest, D3D12Backend, MetalBackend, VulkanBackend);