Implement depth-only/stencil-only copies on Vulkan and Metal

Bug: dawn:439 Change-Id: I07ab014f4f13b73c09b2eecc48cd38b06d88166a Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24684 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Jiawei Shao <jiawei.shao@intel.com> Commit-Queue: Austin Eng <enga@chromium.org>
2025-08-22 03:32:13 +00:00 · 2020-08-04 19:46:37 +00:00 · 2020-08-04 19:46:37 +00:00 · 0a4342793e
commit 0a4342793e
parent e84a1b1376
16 changed files with 506 additions and 79 deletions
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp
@ -723,6 +723,7 @@ namespace dawn_native {
            copy->destination.texture = destination->texture;
            copy->destination.origin = destination->origin;
            copy->destination.mipLevel = destination->mipLevel;
+            copy->destination.aspect = destination->aspect;
            copy->copySize = *copySize;

            return {};
@ -778,6 +779,7 @@ namespace dawn_native {
            copy->source.texture = source->texture;
            copy->source.origin = source->origin;
            copy->source.mipLevel = source->mipLevel;
+            copy->source.aspect = source->aspect;
            copy->destination.buffer = destination->buffer;
            copy->destination.offset = destination->layout.offset;
            copy->destination.bytesPerRow = destination->layout.bytesPerRow;
@ -826,9 +828,11 @@ namespace dawn_native {
            copy->source.texture = source->texture;
            copy->source.origin = source->origin;
            copy->source.mipLevel = source->mipLevel;
+            copy->source.aspect = source->aspect;
            copy->destination.texture = destination->texture;
            copy->destination.origin = destination->origin;
            copy->destination.mipLevel = destination->mipLevel;
+            copy->destination.aspect = destination->aspect;
            copy->copySize = *copySize;

            return {};
--- a/src/dawn_native/Commands.h
+++ b/src/dawn_native/Commands.h
@ -105,6 +105,7 @@ namespace dawn_native {
        Ref<TextureBase> texture;
        uint32_t mipLevel;
        Origin3D origin;  // Texels / array layer
+        wgpu::TextureAspect aspect;
    };

    struct CopyBufferToBufferCmd {
--- a/src/dawn_native/metal/CommandBufferMTL.mm
+++ b/src/dawn_native/metal/CommandBufferMTL.mm
@ -622,6 +622,9 @@ namespace dawn_native { namespace metal {
                        const MTLSize copyExtent =
                            MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);

+                        MTLBlitOption blitOption =
+                            ComputeMTLBlitOption(texture->GetFormat(), dst.aspect);
+
                        uint64_t bufferOffset = copyInfo.bufferOffset;
                        for (uint32_t copyLayer = copyBaseLayer;
                             copyLayer < copyBaseLayer + copyLayerCount; ++copyLayer) {
@ -633,7 +636,8 @@ namespace dawn_native { namespace metal {
                                                               toTexture:texture->GetMTLTexture()
                                                        destinationSlice:copyLayer
                                                        destinationLevel:dst.mipLevel
-                                                       destinationOrigin:textureOrigin];
+                                                       destinationOrigin:textureOrigin
+                                                                 options:blitOption];
                            bufferOffset += copyInfo.bytesPerImage;
                        }
                    }
@ -668,6 +672,9 @@ namespace dawn_native { namespace metal {
                        const MTLSize copyExtent =
                            MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);

+                        MTLBlitOption blitOption =
+                            ComputeMTLBlitOption(texture->GetFormat(), src.aspect);
+
                        uint64_t bufferOffset = copyInfo.bufferOffset;
                        for (uint32_t copyLayer = copyBaseLayer;
                             copyLayer < copyBaseLayer + copyLayerCount; ++copyLayer) {
@ -679,7 +686,8 @@ namespace dawn_native { namespace metal {
                                                                 toBuffer:buffer->GetMTLBuffer()
                                                        destinationOffset:bufferOffset
                                                   destinationBytesPerRow:copyInfo.bytesPerRow
-                                                 destinationBytesPerImage:copyInfo.bytesPerImage];
+                                                 destinationBytesPerImage:copyInfo.bytesPerImage
+                                                                  options:blitOption];
                            bufferOffset += copyInfo.bytesPerImage;
                        }
                    }
--- a/src/dawn_native/metal/DeviceMTL.mm
+++ b/src/dawn_native/metal/DeviceMTL.mm
@ -276,9 +276,10 @@ namespace dawn_native { namespace metal {

        // This function assumes data is perfectly aligned. Otherwise, it might be necessary
        // to split copying to several stages: see ComputeTextureBufferCopySplit.
-        uint32_t blockSize = dst->texture->GetFormat().blockByteSize;
-        uint32_t blockWidth = dst->texture->GetFormat().blockWidth;
-        uint32_t blockHeight = dst->texture->GetFormat().blockHeight;
+        const TexelBlockInfo& blockInfo = texture->GetFormat().GetTexelBlockInfo(dst->aspect);
+        uint32_t blockSize = blockInfo.blockByteSize;
+        uint32_t blockWidth = blockInfo.blockWidth;
+        uint32_t blockHeight = blockInfo.blockHeight;
        ASSERT(dataLayout.rowsPerImage == (copySize.height));
        ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize);

@ -295,6 +296,8 @@ namespace dawn_native { namespace metal {
        const uint64_t bytesPerImage =
            dataLayout.rowsPerImage * dataLayout.bytesPerRow / blockHeight;

+        MTLBlitOption blitOption = ComputeMTLBlitOption(texture->GetFormat(), dst->aspect);
+
        uint64_t bufferOffset = dataLayout.offset;
        for (uint32_t copyLayer = copyBaseLayer; copyLayer < copyBaseLayer + copyLayerCount;
             ++copyLayer) {
@ -307,7 +310,8 @@ namespace dawn_native { namespace metal {
                          toTexture:texture->GetMTLTexture()
                   destinationSlice:copyLayer
                   destinationLevel:dst->mipLevel
-                  destinationOrigin:MTLOriginMake(dst->origin.x, dst->origin.y, 0)];
+                  destinationOrigin:MTLOriginMake(dst->origin.x, dst->origin.y, 0)
+                            options:blitOption];

            bufferOffset += bytesPerImage;
        }
--- a/src/dawn_native/metal/QueueMTL.mm
+++ b/src/dawn_native/metal/QueueMTL.mm
@ -33,10 +33,10 @@ namespace dawn_native { namespace metal {
            uint32_t alignedBytesPerRow,
            uint32_t alignedRowsPerImage,
            const TextureDataLayout* dataLayout,
-            const Format& textureFormat,
+            const TexelBlockInfo& blockInfo,
            const Extent3D* writeSize) {
            uint32_t newDataSize = ComputeRequiredBytesInCopy(
-                textureFormat, *writeSize, alignedBytesPerRow, alignedRowsPerImage);
+                blockInfo, *writeSize, alignedBytesPerRow, alignedRowsPerImage);

            UploadHandle uploadHandle;
            DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate(
@ -47,10 +47,10 @@ namespace dawn_native { namespace metal {
            const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
            srcPointer += dataLayout->offset;

-            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / textureFormat.blockHeight;
-            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / textureFormat.blockHeight;
+            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / blockInfo.blockHeight;
+            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / blockInfo.blockHeight;
            if (dataRowsPerImageInBlock == 0) {
-                dataRowsPerImageInBlock = writeSize->height / textureFormat.blockHeight;
+                dataRowsPerImageInBlock = writeSize->height / blockInfo.blockHeight;
            }

            ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
@ -91,19 +91,20 @@ namespace dawn_native { namespace metal {
                                       size_t dataSize,
                                       const TextureDataLayout* dataLayout,
                                       const Extent3D* writeSize) {
-        uint32_t blockSize = destination->texture->GetFormat().blockByteSize;
-        uint32_t blockWidth = destination->texture->GetFormat().blockWidth;
+        const TexelBlockInfo& blockInfo =
+            destination->texture->GetFormat().GetTexelBlockInfo(destination->aspect);
+
        // We are only copying the part of the data that will appear in the texture.
        // Note that validating texture copy range ensures that writeSize->width and
        // writeSize->height are multiples of blockWidth and blockHeight respectively.
-        uint32_t alignedBytesPerRow = (writeSize->width) / blockWidth * blockSize;
+        uint32_t alignedBytesPerRow =
+            (writeSize->width) / blockInfo.blockWidth * blockInfo.blockByteSize;
        uint32_t alignedRowsPerImage = writeSize->height;

        UploadHandle uploadHandle;
-        DAWN_TRY_ASSIGN(uploadHandle,
-                        UploadTextureDataAligningBytesPerRow(
-                            GetDevice(), data, dataSize, alignedBytesPerRow, alignedRowsPerImage,
-                            dataLayout, destination->texture->GetFormat(), writeSize));
+        DAWN_TRY_ASSIGN(uploadHandle, UploadTextureDataAligningBytesPerRow(
+                                          GetDevice(), data, dataSize, alignedBytesPerRow,
+                                          alignedRowsPerImage, dataLayout, blockInfo, writeSize));

        TextureDataLayout passDataLayout = *dataLayout;
        passDataLayout.offset = uploadHandle.startOffset;
@ -114,6 +115,7 @@ namespace dawn_native { namespace metal {
        textureCopy.texture = destination->texture;
        textureCopy.mipLevel = destination->mipLevel;
        textureCopy.origin = destination->origin;
+        textureCopy.aspect = destination->aspect;

        return ToBackend(GetDevice())
            ->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout, &textureCopy,
--- a/src/dawn_native/metal/UtilsMetal.h
+++ b/src/dawn_native/metal/UtilsMetal.h
@ -53,6 +53,8 @@ namespace dawn_native { namespace metal {
                                             const TextureCopy& dst,
                                             const Extent3D& size);

+    MTLBlitOption ComputeMTLBlitOption(const Format& format, wgpu::TextureAspect aspect);
+
 }}  // namespace dawn_native::metal

 #endif  // DAWNNATIVE_METAL_UTILSMETAL_H_
--- a/src/dawn_native/metal/UtilsMetal.mm
+++ b/src/dawn_native/metal/UtilsMetal.mm
@ -164,4 +164,21 @@ namespace dawn_native { namespace metal {
        }
    }

+    MTLBlitOption ComputeMTLBlitOption(const Format& format, wgpu::TextureAspect aspect) {
+        constexpr Aspect kDepthStencil = Aspect::Depth | Aspect::Stencil;
+        if ((format.aspects & kDepthStencil) == kDepthStencil) {
+            // We only provide a blit option if the format has both depth and stencil.
+            // It is invalid to provide a blit option otherwise.
+            switch (aspect) {
+                case wgpu::TextureAspect::DepthOnly:
+                    return MTLBlitOptionDepthFromDepthStencil;
+                case wgpu::TextureAspect::StencilOnly:
+                    return MTLBlitOptionStencilFromDepthStencil;
+                default:
+                    UNREACHABLE();
+            }
+        }
+        return MTLBlitOptionNone;
+    }
+
 }}  // namespace dawn_native::metal
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@ -68,7 +68,7 @@ namespace dawn_native { namespace vulkan {
            // TODO(jiawei.shao@intel.com): support 1D and 3D textures
            ASSERT(srcTexture->GetDimension() == wgpu::TextureDimension::e2D &&
                   dstTexture->GetDimension() == wgpu::TextureDimension::e2D);
-            region.srcSubresource.aspectMask = srcTexture->GetVkAspectMask();
+            region.srcSubresource.aspectMask = srcTexture->GetVkAspectMask(srcCopy.aspect);
            region.srcSubresource.mipLevel = srcCopy.mipLevel;
            region.srcSubresource.baseArrayLayer = srcCopy.origin.z;
            region.srcSubresource.layerCount = copySize.depth;
@ -77,7 +77,7 @@ namespace dawn_native { namespace vulkan {
            region.srcOffset.y = srcCopy.origin.y;
            region.srcOffset.z = 0;

-            region.dstSubresource.aspectMask = dstTexture->GetVkAspectMask();
+            region.dstSubresource.aspectMask = dstTexture->GetVkAspectMask(dstCopy.aspect);
            region.dstSubresource.mipLevel = dstCopy.mipLevel;
            region.dstSubresource.baseArrayLayer = dstCopy.origin.z;
            region.dstSubresource.layerCount = copySize.depth;
--- a/src/dawn_native/vulkan/QueueVk.cpp
+++ b/src/dawn_native/vulkan/QueueVk.cpp
@ -36,10 +36,10 @@ namespace dawn_native { namespace vulkan {
            uint32_t optimallyAlignedBytesPerRow,
            uint32_t alignedRowsPerImage,
            const TextureDataLayout* dataLayout,
-            const Format& textureFormat,
+            const TexelBlockInfo& blockInfo,
            const Extent3D* writeSize) {
            uint32_t newDataSize = ComputeRequiredBytesInCopy(
-                textureFormat, *writeSize, optimallyAlignedBytesPerRow, alignedRowsPerImage);
+                blockInfo, *writeSize, optimallyAlignedBytesPerRow, alignedRowsPerImage);

            uint64_t optimalOffsetAlignment =
                ToBackend(device)
@ -56,10 +56,10 @@ namespace dawn_native { namespace vulkan {
            const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
            srcPointer += dataLayout->offset;

-            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / textureFormat.blockHeight;
-            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / textureFormat.blockHeight;
+            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / blockInfo.blockHeight;
+            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / blockInfo.blockHeight;
            if (dataRowsPerImageInBlock == 0) {
-                dataRowsPerImageInBlock = writeSize->height / textureFormat.blockHeight;
+                dataRowsPerImageInBlock = writeSize->height / blockInfo.blockHeight;
            }

            uint64_t additionalOffset =
@ -110,12 +110,14 @@ namespace dawn_native { namespace vulkan {
                                       size_t dataSize,
                                       const TextureDataLayout* dataLayout,
                                       const Extent3D* writeSize) {
-        uint32_t blockSize = destination->texture->GetFormat().blockByteSize;
-        uint32_t blockWidth = destination->texture->GetFormat().blockWidth;
+        const TexelBlockInfo& blockInfo =
+            destination->texture->GetFormat().GetTexelBlockInfo(destination->aspect);
+
        // We are only copying the part of the data that will appear in the texture.
        // Note that validating texture copy range ensures that writeSize->width and
        // writeSize->height are multiples of blockWidth and blockHeight respectively.
-        uint32_t alignedBytesPerRow = (writeSize->width) / blockWidth * blockSize;
+        uint32_t alignedBytesPerRow =
+            (writeSize->width) / blockInfo.blockWidth * blockInfo.blockByteSize;
        uint32_t alignedRowsPerImage = writeSize->height;

        uint32_t optimalBytesPerRowAlignment =
@ -126,11 +128,10 @@ namespace dawn_native { namespace vulkan {
            Align(alignedBytesPerRow, optimalBytesPerRowAlignment);

        UploadHandle uploadHandle;
-        DAWN_TRY_ASSIGN(
-            uploadHandle,
-            UploadTextureDataAligningBytesPerRow(
-                GetDevice(), data, dataSize, alignedBytesPerRow, optimallyAlignedBytesPerRow,
-                alignedRowsPerImage, dataLayout, destination->texture->GetFormat(), writeSize));
+        DAWN_TRY_ASSIGN(uploadHandle, UploadTextureDataAligningBytesPerRow(
+                                          GetDevice(), data, dataSize, alignedBytesPerRow,
+                                          optimallyAlignedBytesPerRow, alignedRowsPerImage,
+                                          dataLayout, blockInfo, writeSize));

        TextureDataLayout passDataLayout = *dataLayout;
        passDataLayout.offset = uploadHandle.startOffset;
@ -141,6 +142,7 @@ namespace dawn_native { namespace vulkan {
        textureCopy.texture = destination->texture;
        textureCopy.mipLevel = destination->mipLevel;
        textureCopy.origin = destination->origin;
+        textureCopy.aspect = destination->aspect;

        return ToBackend(GetDevice())
            ->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout, &textureCopy,
--- a/src/dawn_native/vulkan/TextureVk.cpp
+++ b/src/dawn_native/vulkan/TextureVk.cpp
@ -669,8 +669,21 @@ namespace dawn_native { namespace vulkan {
        return mHandle;
    }

-    VkImageAspectFlags Texture::GetVkAspectMask() const {
+    VkImageAspectFlags Texture::GetVkAspectMask(wgpu::TextureAspect aspect) const {
+        // TODO(enga): These masks could be precomputed.
+        switch (aspect) {
+            case wgpu::TextureAspect::All:
                return VulkanAspectMask(GetFormat().aspects);
+            case wgpu::TextureAspect::DepthOnly:
+                ASSERT(GetFormat().aspects & Aspect::Depth);
+                return VulkanAspectMask(Aspect::Depth);
+            case wgpu::TextureAspect::StencilOnly:
+                ASSERT(GetFormat().aspects & Aspect::Stencil);
+                return VulkanAspectMask(Aspect::Stencil);
+            default:
+                UNREACHABLE();
+                return 0;
+        }
    }

    void Texture::TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
@ -872,7 +885,7 @@ namespace dawn_native { namespace vulkan {
        TransitionUsageNow(recordingContext, wgpu::TextureUsage::CopyDst, range);
        if (GetFormat().isRenderable) {
            VkImageSubresourceRange imageRange = {};
-            imageRange.aspectMask = GetVkAspectMask();
+            imageRange.aspectMask = GetVkAspectMask(wgpu::TextureAspect::All);
            imageRange.levelCount = 1;
            imageRange.layerCount = 1;

@ -943,10 +956,12 @@ namespace dawn_native { namespace vulkan {
                        continue;
                    }

+                    ASSERT(GetFormat().aspects == Aspect::Color);
                    dawn_native::TextureCopy textureCopy;
                    textureCopy.texture = this;
                    textureCopy.origin = {0, 0, layer};
                    textureCopy.mipLevel = level;
+                    textureCopy.aspect = wgpu::TextureAspect::All;

                    VkBufferImageCopy region =
                        ComputeBufferImageCopyRegion(bufferCopy, textureCopy, copySize);
--- a/src/dawn_native/vulkan/TextureVk.h
+++ b/src/dawn_native/vulkan/TextureVk.h
@ -59,7 +59,7 @@ namespace dawn_native { namespace vulkan {
                                               VkImage nativeImage);

        VkImage GetHandle() const;
-        VkImageAspectFlags GetVkAspectMask() const;
+        VkImageAspectFlags GetVkAspectMask(wgpu::TextureAspect aspect) const;

        // Transitions the texture to be used as `usage`, recording any necessary barrier in
        // `commands`.
--- a/src/dawn_native/vulkan/UtilsVulkan.cpp
+++ b/src/dawn_native/vulkan/UtilsVulkan.cpp
@ -84,12 +84,14 @@ namespace dawn_native { namespace vulkan {

        region.bufferOffset = dataLayout.offset;
        // In Vulkan the row length is in texels while it is in bytes for Dawn
-        const Format& format = texture->GetFormat();
-        ASSERT(dataLayout.bytesPerRow % format.blockByteSize == 0);
-        region.bufferRowLength = dataLayout.bytesPerRow / format.blockByteSize * format.blockWidth;
+        const TexelBlockInfo& blockInfo =
+            texture->GetFormat().GetTexelBlockInfo(textureCopy.aspect);
+        ASSERT(dataLayout.bytesPerRow % blockInfo.blockByteSize == 0);
+        region.bufferRowLength =
+            dataLayout.bytesPerRow / blockInfo.blockByteSize * blockInfo.blockWidth;
        region.bufferImageHeight = dataLayout.rowsPerImage;

-        region.imageSubresource.aspectMask = texture->GetVkAspectMask();
+        region.imageSubresource.aspectMask = texture->GetVkAspectMask(textureCopy.aspect);
        region.imageSubresource.mipLevel = textureCopy.mipLevel;

        switch (textureCopy.texture->GetDimension()) {
--- a/src/tests/BUILD.gn
+++ b/src/tests/BUILD.gn
@ -276,6 +276,7 @@ source_set("dawn_end2end_tests_sources") {
    "end2end/DebugMarkerTests.cpp",
    "end2end/DeprecatedAPITests.cpp",
    "end2end/DepthSamplingTests.cpp",
+    "end2end/DepthStencilCopyTests.cpp",
    "end2end/DepthStencilStateTests.cpp",
    "end2end/DestroyTests.cpp",
    "end2end/DeviceLostTests.cpp",
--- a/src/tests/DawnTest.cpp
+++ b/src/tests/DawnTest.cpp
@ -926,8 +926,9 @@ std::ostringstream& DawnTestBase::AddBufferExpectation(const char* file,
    return *(mDeferredExpectations.back().message.get());
 }

-std::ostringstream& DawnTestBase::AddTextureExpectation(const char* file,
+std::ostringstream& DawnTestBase::AddTextureExpectationImpl(const char* file,
                                                            int line,
+                                                            detail::Expectation* expectation,
                                                            const wgpu::Texture& texture,
                                                            uint32_t x,
                                                            uint32_t y,
@ -935,17 +936,26 @@ std::ostringstream& DawnTestBase::AddTextureExpectation(const char* file,
                                                            uint32_t height,
                                                            uint32_t level,
                                                            uint32_t slice,
-                                                        uint32_t pixelSize,
-                                                        detail::Expectation* expectation) {
-    uint32_t bytesPerRow = Align(width * pixelSize, kTextureBytesPerRowAlignment);
-    uint32_t size = bytesPerRow * (height - 1) + width * pixelSize;
+                                                            wgpu::TextureAspect aspect,
+                                                            uint32_t dataSize,
+                                                            uint32_t bytesPerRow) {
+    if (bytesPerRow == 0) {
+        bytesPerRow = Align(width * dataSize, kTextureBytesPerRowAlignment);
+    } else {
+        ASSERT(bytesPerRow >= width * dataSize);
+        ASSERT(bytesPerRow == Align(bytesPerRow, kTextureBytesPerRowAlignment));
+    }

-    auto readback = ReserveReadback(size);
+    uint32_t size = bytesPerRow * (height - 1) + width * dataSize;
+
+    // TODO(enga): We should have the map async alignment in Contants.h. Also, it should change to 8
+    // for Float64Array.
+    auto readback = ReserveReadback(Align(size, 4));

    // We need to enqueue the copy immediately because by the time we resolve the expectation,
    // the texture might have been modified.
    wgpu::TextureCopyView textureCopyView =
-        utils::CreateTextureCopyView(texture, level, {x, y, slice});
+        utils::CreateTextureCopyView(texture, level, {x, y, slice}, aspect);
    wgpu::BufferCopyView bufferCopyView =
        utils::CreateBufferCopyView(readback.buffer, readback.offset, bytesPerRow, 0);
    wgpu::Extent3D copySize = {width, height, 1};
@ -962,7 +972,7 @@ std::ostringstream& DawnTestBase::AddTextureExpectation(const char* file,
    deferred.readbackSlot = readback.slot;
    deferred.readbackOffset = readback.offset;
    deferred.size = size;
-    deferred.rowBytes = width * pixelSize;
+    deferred.rowBytes = width * dataSize;
    deferred.bytesPerRow = bytesPerRow;
    deferred.expectation.reset(expectation);

--- a/src/tests/DawnTest.h
+++ b/src/tests/DawnTest.h
@ -61,22 +61,19 @@

 // Test a pixel of the mip level 0 of a 2D texture.
 #define EXPECT_PIXEL_RGBA8_EQ(expected, texture, x, y) \
-    AddTextureExpectation(__FILE__, __LINE__, texture, x, y, 1, 1, 0, 0, sizeof(RGBA8), \
-                          new ::detail::ExpectEq<RGBA8>(expected))
+    AddTextureExpectation(__FILE__, __LINE__, expected, texture, x, y)

 #define EXPECT_TEXTURE_RGBA8_EQ(expected, texture, x, y, width, height, level, slice) \
-    AddTextureExpectation(__FILE__, __LINE__, texture, x, y, width, height, level, slice, \
-                          sizeof(RGBA8),                                                  \
-                          new ::detail::ExpectEq<RGBA8>(expected, (width) * (height)))
+    AddTextureExpectation(__FILE__, __LINE__, expected, texture, x, y, width, height, level, slice)

 #define EXPECT_PIXEL_FLOAT_EQ(expected, texture, x, y) \
-    AddTextureExpectation(__FILE__, __LINE__, texture, x, y, 1, 1, 0, 0, sizeof(float), \
-                          new ::detail::ExpectEq<float>(expected))
+    AddTextureExpectation(__FILE__, __LINE__, expected, texture, x, y)

 #define EXPECT_TEXTURE_FLOAT_EQ(expected, texture, x, y, width, height, level, slice) \
-    AddTextureExpectation(__FILE__, __LINE__, texture, x, y, width, height, level, slice, \
-                          sizeof(float),                                                  \
-                          new ::detail::ExpectEq<float>(expected, (width) * (height)))
+    AddTextureExpectation(__FILE__, __LINE__, expected, texture, x, y, width, height, level, slice)
+
+// TODO(enga): Migrate other texure expectation helpers to this common one.
+#define EXPECT_TEXTURE_EQ(...) AddTextureExpectation(__FILE__, __LINE__, __VA_ARGS__)

 // Should only be used to test validation of function that can't be tested by regular validation
 // tests;
@ -163,6 +160,9 @@ namespace utils {

 namespace detail {
    class Expectation;
+
+    template <typename T>
+    class ExpectEq;
 }  // namespace detail

 namespace dawn_wire {
@ -281,17 +281,39 @@ class DawnTestBase {
                                             uint64_t offset,
                                             uint64_t size,
                                             detail::Expectation* expectation);
+
+    template <typename T>
    std::ostringstream& AddTextureExpectation(const char* file,
                                              int line,
+                                              const T* expectedData,
                                              const wgpu::Texture& texture,
                                              uint32_t x,
                                              uint32_t y,
-                                              uint32_t width,
-                                              uint32_t height,
-                                              uint32_t level,
-                                              uint32_t slice,
-                                              uint32_t pixelSize,
-                                              detail::Expectation* expectation);
+                                              uint32_t width = 1,
+                                              uint32_t height = 1,
+                                              uint32_t level = 0,
+                                              uint32_t slice = 0,
+                                              wgpu::TextureAspect aspect = wgpu::TextureAspect::All,
+                                              uint32_t bytesPerRow = 0) {
+        return AddTextureExpectationImpl(
+            file, line, new detail::ExpectEq<T>(expectedData, width * height), texture, x, y, width,
+            height, level, slice, aspect, sizeof(T), bytesPerRow);
+    }
+
+    template <typename T>
+    std::ostringstream& AddTextureExpectation(const char* file,
+                                              int line,
+                                              const T& expectedData,
+                                              const wgpu::Texture& texture,
+                                              uint32_t x,
+                                              uint32_t y,
+                                              uint32_t level = 0,
+                                              uint32_t slice = 0,
+                                              wgpu::TextureAspect aspect = wgpu::TextureAspect::All,
+                                              uint32_t bytesPerRow = 0) {
+        return AddTextureExpectationImpl(file, line, new detail::ExpectEq<T>(expectedData), texture,
+                                         x, y, 1, 1, level, slice, aspect, sizeof(T), bytesPerRow);
+    }

    void WaitABit();
    void FlushWire();
@ -323,6 +345,20 @@ class DawnTestBase {
    bool mExpectError = false;
    bool mError = false;

+    std::ostringstream& AddTextureExpectationImpl(const char* file,
+                                                  int line,
+                                                  detail::Expectation* expectation,
+                                                  const wgpu::Texture& texture,
+                                                  uint32_t x,
+                                                  uint32_t y,
+                                                  uint32_t width,
+                                                  uint32_t height,
+                                                  uint32_t level,
+                                                  uint32_t slice,
+                                                  wgpu::TextureAspect aspect,
+                                                  uint32_t dataSize,
+                                                  uint32_t bytesPerRow);
+
    // MapRead buffers used to get data for the expectations
    struct ReadbackSlot {
        wgpu::Buffer buffer;
--- a/src/tests/end2end/DepthStencilCopyTests.cpp
+++ b/src/tests/end2end/DepthStencilCopyTests.cpp
@ -0,0 +1,323 @@
+// Copyright 2020 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tests/DawnTest.h"
+
+#include <array>
+#include "common/Constants.h"
+#include "common/Math.h"
+#include "utils/ComboRenderPipelineDescriptor.h"
+#include "utils/TextureFormatUtils.h"
+#include "utils/WGPUHelpers.h"
+
+class DepthStencilCopyTests : public DawnTest {
+  protected:
+    void SetUp() override {
+        DawnTest::SetUp();
+
+        // Draw a square in the bottom left quarter of the screen.
+        mVertexModule = utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"(
+    #version 450
+    void main() {
+        const vec2 pos[6] = vec2[6](vec2(-1.f, -1.f), vec2(0.f, -1.f), vec2(-1.f,  0.f),
+                                    vec2(-1.f,  0.f), vec2(0.f, -1.f), vec2( 0.f,  0.f));
+        gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
+    })");
+
+        mFragmentModule = utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, R"(
+    #version 450
+    void main() {
+        gl_FragDepth = 0.3;
+    })");
+    }
+
+    static constexpr float kWrittenDepthValue = 0.3;
+
+    wgpu::ShaderModule mVertexModule;
+    wgpu::ShaderModule mFragmentModule;
+};
+
+// Test copying the depth-only aspect into a buffer.
+TEST_P(DepthStencilCopyTests, FromDepthAspect) {
+    // Create a depth texture
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+    wgpu::TextureDescriptor texDescriptor = {};
+    texDescriptor.size = {kWidth, kHeight, 1};
+    texDescriptor.format = wgpu::TextureFormat::Depth32Float;
+    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
+    wgpu::Texture depthTexture = device.CreateTexture(&texDescriptor);
+
+    // Create a render pass which clears depth to 0
+    utils::ComboRenderPassDescriptor renderPassDesc({}, depthTexture.CreateView());
+    renderPassDesc.cDepthStencilAttachmentInfo.clearDepth = 0.f;
+
+    // Create a render pipeline to render a bottom-left quad with depth 0.3.
+    utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
+    renderPipelineDesc.vertexStage.module = mVertexModule;
+    renderPipelineDesc.cFragmentStage.module = mFragmentModule;
+    renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
+    renderPipelineDesc.cDepthStencilState.depthWriteEnabled = true;
+    renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
+    renderPipelineDesc.colorStateCount = 0;
+
+    wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+
+    // Draw the quad (two triangles)
+    wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+    wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
+    pass.SetPipeline(pipeline);
+    pass.Draw(6);
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = commandEncoder.Finish();
+    queue.Submit(1, &commands);
+
+    // Only the bottom left quad has depth values
+    std::vector<float> expected = {
+        0.0, 0.0, 0.0, 0.0,  //
+        0.0, 0.0, 0.0, 0.0,  //
+        0.3, 0.3, 0.0, 0.0,  //
+        0.3, 0.3, 0.0, 0.0,  //
+    };
+
+    // This expectation is the test as it performs the CopyTextureToBuffer.
+    EXPECT_TEXTURE_EQ(expected.data(), depthTexture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::DepthOnly);
+}
+
+// Test copying the stencil-only aspect into a buffer.
+TEST_P(DepthStencilCopyTests, FromStencilAspect) {
+    // TODO(enga): Figure out why this fails on Linux Vulkan Intel
+    DAWN_SKIP_TEST_IF(IsLinux() && IsVulkan() && IsIntel());
+
+    // Create a stencil texture
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+    wgpu::TextureDescriptor texDescriptor = {};
+    texDescriptor.size = {kWidth, kHeight, 1};
+    texDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
+    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
+    wgpu::Texture depthStencilTexture = device.CreateTexture(&texDescriptor);
+
+    // Create a render pass which clears the stencil to 0 on load.
+    utils::ComboRenderPassDescriptor renderPassDesc({}, depthStencilTexture.CreateView());
+    renderPassDesc.cDepthStencilAttachmentInfo.clearStencil = 0;
+
+    // Create a render pipline which increments the stencil value for passing fragments.
+    // A quad is drawn in the bottom left.
+    utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
+    renderPipelineDesc.vertexStage.module = mVertexModule;
+    renderPipelineDesc.cFragmentStage.module = mFragmentModule;
+    renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
+    renderPipelineDesc.cDepthStencilState.stencilFront.passOp =
+        wgpu::StencilOperation::IncrementClamp;
+    renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
+    renderPipelineDesc.colorStateCount = 0;
+
+    wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+
+    // Draw the quad (two triangles)
+    wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+    wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
+    pass.SetPipeline(pipeline);
+    pass.Draw(6);
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = commandEncoder.Finish();
+    queue.Submit(1, &commands);
+
+    // Only the bottom left quad has stencil values
+    std::vector<uint8_t> expected = {
+        0u, 0u, 0u, 0u,  //
+        0u, 0u, 0u, 0u,  //
+        1u, 1u, 0u, 0u,  //
+        1u, 1u, 0u, 0u,  //
+    };
+
+    // This expectation is the test as it performs the CopyTextureToBuffer.
+    EXPECT_TEXTURE_EQ(expected.data(), depthStencilTexture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::StencilOnly);
+}
+
+// Test copying to the stencil-aspect of a buffer
+TEST_P(DepthStencilCopyTests, ToStencilAspect) {
+    // TODO(enga): Figure out why this fails on Vulkan Intel
+    // Results are shifted by 1 byte on Windows, and crash/hang on Linux.
+    DAWN_SKIP_TEST_IF(IsVulkan() && IsIntel());
+
+    // TODO(enga): Figure out why this fails on MacOS Intel Iris.
+    // It passes on AMD Radeon Pro and Intel HD Graphics 630.
+    DAWN_SKIP_TEST_IF(IsMetal() && IsIntel());
+
+    // Create a stencil texture
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+    wgpu::TextureDescriptor texDescriptor = {};
+    texDescriptor.size = {kWidth, kHeight, 1};
+    texDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
+    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc |
+                          wgpu::TextureUsage::CopyDst;
+    wgpu::Texture depthStencilTexture = device.CreateTexture(&texDescriptor);
+
+    // Bytes per row for the stencil data we will upload.
+    // TODO(enga): Use WriteTexture when implemented everywhere.
+    uint32_t bytesPerRow = Align(kWidth * sizeof(uint8_t), kTextureBytesPerRowAlignment);
+
+    wgpu::BufferDescriptor bufferDesc = {};
+    bufferDesc.usage = wgpu::BufferUsage::CopySrc;
+    bufferDesc.size = kHeight * bytesPerRow;
+    bufferDesc.mappedAtCreation = true;
+
+    std::vector<uint8_t> stencilData = {
+        1u,  2u,  3u,  4u,   //
+        5u,  6u,  7u,  8u,   //
+        9u,  10u, 11u, 12u,  //
+        13u, 14u, 15u, 16u,  //
+    };
+
+    // After copying stencil data in, we will decrement stencil values in the bottom left
+    // of the screen. This is the expected result.
+    std::vector<uint8_t> expectedStencilData = {
+        1u,  2u,  3u,  4u,   //
+        5u,  6u,  7u,  8u,   //
+        8u,  9u,  11u, 12u,  //
+        12u, 13u, 15u, 16u,  //
+    };
+
+    // Copy the stencil data into the buffer.
+    wgpu::Buffer buffer = device.CreateBuffer(&bufferDesc);
+    uint8_t* mappedData = static_cast<uint8_t*>(buffer.GetMappedRange());
+    for (uint32_t r = 0; r < kHeight; ++r) {
+        memcpy(mappedData + r * bytesPerRow, &stencilData[r * kWidth], kWidth);
+    }
+    buffer.Unmap();
+
+    {
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+
+        // Clear depth to 0.7, so we can check that the stencil copy doesn't mutate the depth.
+        utils::ComboRenderPassDescriptor passDescriptor({}, depthStencilTexture.CreateView());
+        passDescriptor.cDepthStencilAttachmentInfo.clearDepth = 0.7;
+
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
+        pass.EndPass();
+
+        // Copy from the buffer into the stencil aspect of the texture.
+        wgpu::BufferCopyView bufferCopy = utils::CreateBufferCopyView(buffer, 0, bytesPerRow, 0);
+        wgpu::TextureCopyView textureCopy = utils::CreateTextureCopyView(
+            depthStencilTexture, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly);
+
+        commandEncoder.CopyBufferToTexture(&bufferCopy, &textureCopy, &texDescriptor.size);
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+    }
+    {
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+        // Create a render pipline which decrements the stencil value for passing fragments.
+        // A quad is drawn in the bottom left.
+        utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
+        renderPipelineDesc.vertexStage.module = mVertexModule;
+        renderPipelineDesc.cFragmentStage.module = mFragmentModule;
+        renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
+        renderPipelineDesc.cDepthStencilState.stencilFront.passOp =
+            wgpu::StencilOperation::DecrementClamp;
+        renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
+        renderPipelineDesc.colorStateCount = 0;
+
+        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+
+        // Create a render pass which loads the stencil. We want to load the values we
+        // copied in. Also load the canary depth values so they're not lost.
+        utils::ComboRenderPassDescriptor passDescriptor({}, depthStencilTexture.CreateView());
+        passDescriptor.cDepthStencilAttachmentInfo.stencilLoadOp = wgpu::LoadOp::Load;
+        passDescriptor.cDepthStencilAttachmentInfo.depthLoadOp = wgpu::LoadOp::Load;
+
+        // Draw the quad in the bottom left (two triangles).
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
+        pass.SetPipeline(pipeline);
+        pass.Draw(6);
+        pass.EndPass();
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+    }
+
+    // Copy back the stencil data and check it is the same.
+    EXPECT_TEXTURE_EQ(expectedStencilData.data(), depthStencilTexture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::StencilOnly);
+
+    // Check that the depth buffer isn't changed.
+    // We do this by running executing a draw call that only passes the depth test if
+    // the depth is equal to the current depth buffer.
+    {
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+
+        // Make the color attachment that we'll use to read back.
+        wgpu::TextureDescriptor colorTexDesc = {};
+        colorTexDesc.size = {kWidth, kHeight, 1};
+        colorTexDesc.format = wgpu::TextureFormat::R32Uint;
+        colorTexDesc.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
+        wgpu::Texture colorTexture = device.CreateTexture(&colorTexDesc);
+
+        // Pipeline for a full screen quad.
+        utils::ComboRenderPipelineDescriptor pipelineDescriptor(device);
+
+        pipelineDescriptor.vertexStage.module =
+            utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"(
+    #version 450
+    void main() {
+        const vec2 pos[3] = vec2[3](vec2(-1.f, -1.f), vec2(3.f, -1.f), vec2(-1.f, 3.f));
+                    gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
+        gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
+    })");
+
+        // Write out 0.7 for depth. This is the same canary value we wrote previously.
+        pipelineDescriptor.cFragmentStage.module =
+            utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, R"(
+    #version 450
+
+    layout(location = 0) out uint result;
+    void main() {
+        result = 1u;
+        gl_FragDepth = 0.7;
+    })");
+
+        // Pass the depth test only if the depth is equal.
+        pipelineDescriptor.primitiveTopology = wgpu::PrimitiveTopology::TriangleList;
+        pipelineDescriptor.depthStencilState = &pipelineDescriptor.cDepthStencilState;
+        pipelineDescriptor.cDepthStencilState.format = texDescriptor.format;
+        pipelineDescriptor.cDepthStencilState.depthCompare = wgpu::CompareFunction::Equal;
+        pipelineDescriptor.cColorStates[0].format = colorTexDesc.format;
+
+        utils::ComboRenderPassDescriptor passDescriptor({colorTexture.CreateView()},
+                                                        depthStencilTexture.CreateView());
+        passDescriptor.cDepthStencilAttachmentInfo.depthLoadOp = wgpu::LoadOp::Load;
+
+        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&pipelineDescriptor);
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
+        pass.SetPipeline(pipeline);
+        pass.Draw(3);
+        pass.EndPass();
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+
+        std::vector<uint32_t> colorData(16, 1u);
+        EXPECT_TEXTURE_EQ(colorData.data(), colorTexture, 0, 0, kWidth, kHeight, 0, 0);
+    }
+}
+
+DAWN_INSTANTIATE_TEST(DepthStencilCopyTests, MetalBackend(), VulkanBackend());