From 0a4342793e0e31ed36e59d2eef36817c82ede555 Mon Sep 17 00:00:00 2001
From: Austin Eng <enga@chromium.org>
Date: Tue, 4 Aug 2020 19:46:37 +0000
Subject: [PATCH] Implement depth-only/stencil-only copies on Vulkan and Metal

Bug: dawn:439
Change-Id: I07ab014f4f13b73c09b2eecc48cd38b06d88166a
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24684
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Jiawei Shao <jiawei.shao@intel.com>
Commit-Queue: Austin Eng <enga@chromium.org>
---
 src/dawn_native/CommandEncoder.cpp          |   4 +
 src/dawn_native/Commands.h                  |   1 +
 src/dawn_native/metal/CommandBufferMTL.mm   |  12 +-
 src/dawn_native/metal/DeviceMTL.mm          |  12 +-
 src/dawn_native/metal/QueueMTL.mm           |  26 +-
 src/dawn_native/metal/UtilsMetal.h          |   2 +
 src/dawn_native/metal/UtilsMetal.mm         |  17 ++
 src/dawn_native/vulkan/CommandBufferVk.cpp  |   4 +-
 src/dawn_native/vulkan/QueueVk.cpp          |  30 +-
 src/dawn_native/vulkan/TextureVk.cpp        |  21 +-
 src/dawn_native/vulkan/TextureVk.h          |   2 +-
 src/dawn_native/vulkan/UtilsVulkan.cpp      |  12 +-
 src/tests/BUILD.gn                          |   1 +
 src/tests/DawnTest.cpp                      |  42 ++-
 src/tests/DawnTest.h                        |  76 +++--
 src/tests/end2end/DepthStencilCopyTests.cpp | 323 ++++++++++++++++++++
 16 files changed, 506 insertions(+), 79 deletions(-)
 create mode 100644 src/tests/end2end/DepthStencilCopyTests.cpp
diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp
index 1d049b5bb7..2450edc919 100644
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp
@@ -723,6 +723,7 @@ namespace dawn_native {
             copy->destination.texture = destination->texture;
             copy->destination.origin = destination->origin;
             copy->destination.mipLevel = destination->mipLevel;
+            copy->destination.aspect = destination->aspect;
             copy->copySize = *copySize;
 
             return {};
@@ -778,6 +779,7 @@ namespace dawn_native {
             copy->source.texture = source->texture;
             copy->source.origin = source->origin;
             copy->source.mipLevel = source->mipLevel;
+            copy->source.aspect = source->aspect;
             copy->destination.buffer = destination->buffer;
             copy->destination.offset = destination->layout.offset;
             copy->destination.bytesPerRow = destination->layout.bytesPerRow;
@@ -826,9 +828,11 @@ namespace dawn_native {
             copy->source.texture = source->texture;
             copy->source.origin = source->origin;
             copy->source.mipLevel = source->mipLevel;
+            copy->source.aspect = source->aspect;
             copy->destination.texture = destination->texture;
             copy->destination.origin = destination->origin;
             copy->destination.mipLevel = destination->mipLevel;
+            copy->destination.aspect = destination->aspect;
             copy->copySize = *copySize;
 
             return {};
diff --git a/src/dawn_native/Commands.h b/src/dawn_native/Commands.h
index 85e462bc6a..32ffcc3bf3 100644
--- a/src/dawn_native/Commands.h
+++ b/src/dawn_native/Commands.h
@@ -105,6 +105,7 @@ namespace dawn_native {
         Ref<TextureBase> texture;
         uint32_t mipLevel;
         Origin3D origin;  // Texels / array layer
+        wgpu::TextureAspect aspect;
     };
 
     struct CopyBufferToBufferCmd {
diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm
index 9300f3806b..f35448a851 100644
--- a/src/dawn_native/metal/CommandBufferMTL.mm
+++ b/src/dawn_native/metal/CommandBufferMTL.mm
@@ -622,6 +622,9 @@ namespace dawn_native { namespace metal {
                         const MTLSize copyExtent =
                             MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);
 
+                        MTLBlitOption blitOption =
+                            ComputeMTLBlitOption(texture->GetFormat(), dst.aspect);
+
                         uint64_t bufferOffset = copyInfo.bufferOffset;
                         for (uint32_t copyLayer = copyBaseLayer;
                              copyLayer < copyBaseLayer + copyLayerCount; ++copyLayer) {
@@ -633,7 +636,8 @@ namespace dawn_native { namespace metal {
                                                                toTexture:texture->GetMTLTexture()
                                                         destinationSlice:copyLayer
                                                         destinationLevel:dst.mipLevel
-                                                       destinationOrigin:textureOrigin];
+                                                       destinationOrigin:textureOrigin
+                                                                 options:blitOption];
                             bufferOffset += copyInfo.bytesPerImage;
                         }
                     }
@@ -668,6 +672,9 @@ namespace dawn_native { namespace metal {
                         const MTLSize copyExtent =
                             MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);
 
+                        MTLBlitOption blitOption =
+                            ComputeMTLBlitOption(texture->GetFormat(), src.aspect);
+
                         uint64_t bufferOffset = copyInfo.bufferOffset;
                         for (uint32_t copyLayer = copyBaseLayer;
                              copyLayer < copyBaseLayer + copyLayerCount; ++copyLayer) {
@@ -679,7 +686,8 @@ namespace dawn_native { namespace metal {
                                                                  toBuffer:buffer->GetMTLBuffer()
                                                         destinationOffset:bufferOffset
                                                    destinationBytesPerRow:copyInfo.bytesPerRow
-                                                 destinationBytesPerImage:copyInfo.bytesPerImage];
+                                                 destinationBytesPerImage:copyInfo.bytesPerImage
+                                                                  options:blitOption];
                             bufferOffset += copyInfo.bytesPerImage;
                         }
                     }
diff --git a/src/dawn_native/metal/DeviceMTL.mm b/src/dawn_native/metal/DeviceMTL.mm
index 13cb7dfd08..fb7fdc389c 100644
--- a/src/dawn_native/metal/DeviceMTL.mm
+++ b/src/dawn_native/metal/DeviceMTL.mm
@@ -276,9 +276,10 @@ namespace dawn_native { namespace metal {
 
         // This function assumes data is perfectly aligned. Otherwise, it might be necessary
         // to split copying to several stages: see ComputeTextureBufferCopySplit.
-        uint32_t blockSize = dst->texture->GetFormat().blockByteSize;
-        uint32_t blockWidth = dst->texture->GetFormat().blockWidth;
-        uint32_t blockHeight = dst->texture->GetFormat().blockHeight;
+        const TexelBlockInfo& blockInfo = texture->GetFormat().GetTexelBlockInfo(dst->aspect);
+        uint32_t blockSize = blockInfo.blockByteSize;
+        uint32_t blockWidth = blockInfo.blockWidth;
+        uint32_t blockHeight = blockInfo.blockHeight;
         ASSERT(dataLayout.rowsPerImage == (copySize.height));
         ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize);
 
@@ -295,6 +296,8 @@ namespace dawn_native { namespace metal {
         const uint64_t bytesPerImage =
             dataLayout.rowsPerImage * dataLayout.bytesPerRow / blockHeight;
 
+        MTLBlitOption blitOption = ComputeMTLBlitOption(texture->GetFormat(), dst->aspect);
+
         uint64_t bufferOffset = dataLayout.offset;
         for (uint32_t copyLayer = copyBaseLayer; copyLayer < copyBaseLayer + copyLayerCount;
              ++copyLayer) {
@@ -307,7 +310,8 @@ namespace dawn_native { namespace metal {
                           toTexture:texture->GetMTLTexture()
                    destinationSlice:copyLayer
                    destinationLevel:dst->mipLevel
-                  destinationOrigin:MTLOriginMake(dst->origin.x, dst->origin.y, 0)];
+                  destinationOrigin:MTLOriginMake(dst->origin.x, dst->origin.y, 0)
+                            options:blitOption];
 
             bufferOffset += bytesPerImage;
         }
diff --git a/src/dawn_native/metal/QueueMTL.mm b/src/dawn_native/metal/QueueMTL.mm
index 016e0cd69a..a4c9caaf59 100644
--- a/src/dawn_native/metal/QueueMTL.mm
+++ b/src/dawn_native/metal/QueueMTL.mm
@@ -33,10 +33,10 @@ namespace dawn_native { namespace metal {
             uint32_t alignedBytesPerRow,
             uint32_t alignedRowsPerImage,
             const TextureDataLayout* dataLayout,
-            const Format& textureFormat,
+            const TexelBlockInfo& blockInfo,
             const Extent3D* writeSize) {
             uint32_t newDataSize = ComputeRequiredBytesInCopy(
-                textureFormat, *writeSize, alignedBytesPerRow, alignedRowsPerImage);
+                blockInfo, *writeSize, alignedBytesPerRow, alignedRowsPerImage);
 
             UploadHandle uploadHandle;
             DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate(
@@ -47,10 +47,10 @@ namespace dawn_native { namespace metal {
             const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
             srcPointer += dataLayout->offset;
 
-            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / textureFormat.blockHeight;
-            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / textureFormat.blockHeight;
+            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / blockInfo.blockHeight;
+            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / blockInfo.blockHeight;
             if (dataRowsPerImageInBlock == 0) {
-                dataRowsPerImageInBlock = writeSize->height / textureFormat.blockHeight;
+                dataRowsPerImageInBlock = writeSize->height / blockInfo.blockHeight;
             }
 
             ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
@@ -91,19 +91,20 @@ namespace dawn_native { namespace metal {
                                        size_t dataSize,
                                        const TextureDataLayout* dataLayout,
                                        const Extent3D* writeSize) {
-        uint32_t blockSize = destination->texture->GetFormat().blockByteSize;
-        uint32_t blockWidth = destination->texture->GetFormat().blockWidth;
+        const TexelBlockInfo& blockInfo =
+            destination->texture->GetFormat().GetTexelBlockInfo(destination->aspect);
+
         // We are only copying the part of the data that will appear in the texture.
         // Note that validating texture copy range ensures that writeSize->width and
         // writeSize->height are multiples of blockWidth and blockHeight respectively.
-        uint32_t alignedBytesPerRow = (writeSize->width) / blockWidth * blockSize;
+        uint32_t alignedBytesPerRow =
+            (writeSize->width) / blockInfo.blockWidth * blockInfo.blockByteSize;
         uint32_t alignedRowsPerImage = writeSize->height;
 
         UploadHandle uploadHandle;
-        DAWN_TRY_ASSIGN(uploadHandle,
-                        UploadTextureDataAligningBytesPerRow(
-                            GetDevice(), data, dataSize, alignedBytesPerRow, alignedRowsPerImage,
-                            dataLayout, destination->texture->GetFormat(), writeSize));
+        DAWN_TRY_ASSIGN(uploadHandle, UploadTextureDataAligningBytesPerRow(
+                                          GetDevice(), data, dataSize, alignedBytesPerRow,
+                                          alignedRowsPerImage, dataLayout, blockInfo, writeSize));
 
         TextureDataLayout passDataLayout = *dataLayout;
         passDataLayout.offset = uploadHandle.startOffset;
@@ -114,6 +115,7 @@ namespace dawn_native { namespace metal {
         textureCopy.texture = destination->texture;
         textureCopy.mipLevel = destination->mipLevel;
         textureCopy.origin = destination->origin;
+        textureCopy.aspect = destination->aspect;
 
         return ToBackend(GetDevice())
             ->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout, &textureCopy,
diff --git a/src/dawn_native/metal/UtilsMetal.h b/src/dawn_native/metal/UtilsMetal.h
index fe0e2283d2..f7d514d1ba 100644
--- a/src/dawn_native/metal/UtilsMetal.h
+++ b/src/dawn_native/metal/UtilsMetal.h
@@ -53,6 +53,8 @@ namespace dawn_native { namespace metal {
                                              const TextureCopy& dst,
                                              const Extent3D& size);
 
+    MTLBlitOption ComputeMTLBlitOption(const Format& format, wgpu::TextureAspect aspect);
+
 }}  // namespace dawn_native::metal
 
 #endif  // DAWNNATIVE_METAL_UTILSMETAL_H_
diff --git a/src/dawn_native/metal/UtilsMetal.mm b/src/dawn_native/metal/UtilsMetal.mm
index 13b4668818..5f50a0fb7f 100644
--- a/src/dawn_native/metal/UtilsMetal.mm
+++ b/src/dawn_native/metal/UtilsMetal.mm
@@ -164,4 +164,21 @@ namespace dawn_native { namespace metal {
         }
     }
 
+    MTLBlitOption ComputeMTLBlitOption(const Format& format, wgpu::TextureAspect aspect) {
+        constexpr Aspect kDepthStencil = Aspect::Depth | Aspect::Stencil;
+        if ((format.aspects & kDepthStencil) == kDepthStencil) {
+            // We only provide a blit option if the format has both depth and stencil.
+            // It is invalid to provide a blit option otherwise.
+            switch (aspect) {
+                case wgpu::TextureAspect::DepthOnly:
+                    return MTLBlitOptionDepthFromDepthStencil;
+                case wgpu::TextureAspect::StencilOnly:
+                    return MTLBlitOptionStencilFromDepthStencil;
+                default:
+                    UNREACHABLE();
+            }
+        }
+        return MTLBlitOptionNone;
+    }
+
 }}  // namespace dawn_native::metal
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index 8315b1b787..3b7d016e4d 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -68,7 +68,7 @@ namespace dawn_native { namespace vulkan {
             // TODO(jiawei.shao@intel.com): support 1D and 3D textures
             ASSERT(srcTexture->GetDimension() == wgpu::TextureDimension::e2D &&
                    dstTexture->GetDimension() == wgpu::TextureDimension::e2D);
-            region.srcSubresource.aspectMask = srcTexture->GetVkAspectMask();
+            region.srcSubresource.aspectMask = srcTexture->GetVkAspectMask(srcCopy.aspect);
             region.srcSubresource.mipLevel = srcCopy.mipLevel;
             region.srcSubresource.baseArrayLayer = srcCopy.origin.z;
             region.srcSubresource.layerCount = copySize.depth;
@@ -77,7 +77,7 @@ namespace dawn_native { namespace vulkan {
             region.srcOffset.y = srcCopy.origin.y;
             region.srcOffset.z = 0;
 
-            region.dstSubresource.aspectMask = dstTexture->GetVkAspectMask();
+            region.dstSubresource.aspectMask = dstTexture->GetVkAspectMask(dstCopy.aspect);
             region.dstSubresource.mipLevel = dstCopy.mipLevel;
             region.dstSubresource.baseArrayLayer = dstCopy.origin.z;
             region.dstSubresource.layerCount = copySize.depth;
diff --git a/src/dawn_native/vulkan/QueueVk.cpp b/src/dawn_native/vulkan/QueueVk.cpp
index b19b2368c4..c7c4ad7c6d 100644
--- a/src/dawn_native/vulkan/QueueVk.cpp
+++ b/src/dawn_native/vulkan/QueueVk.cpp
@@ -36,10 +36,10 @@ namespace dawn_native { namespace vulkan {
             uint32_t optimallyAlignedBytesPerRow,
             uint32_t alignedRowsPerImage,
             const TextureDataLayout* dataLayout,
-            const Format& textureFormat,
+            const TexelBlockInfo& blockInfo,
             const Extent3D* writeSize) {
             uint32_t newDataSize = ComputeRequiredBytesInCopy(
-                textureFormat, *writeSize, optimallyAlignedBytesPerRow, alignedRowsPerImage);
+                blockInfo, *writeSize, optimallyAlignedBytesPerRow, alignedRowsPerImage);
 
             uint64_t optimalOffsetAlignment =
                 ToBackend(device)
@@ -56,10 +56,10 @@ namespace dawn_native { namespace vulkan {
             const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
             srcPointer += dataLayout->offset;
 
-            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / textureFormat.blockHeight;
-            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / textureFormat.blockHeight;
+            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / blockInfo.blockHeight;
+            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / blockInfo.blockHeight;
             if (dataRowsPerImageInBlock == 0) {
-                dataRowsPerImageInBlock = writeSize->height / textureFormat.blockHeight;
+                dataRowsPerImageInBlock = writeSize->height / blockInfo.blockHeight;
             }
 
             uint64_t additionalOffset =
@@ -110,12 +110,14 @@ namespace dawn_native { namespace vulkan {
                                        size_t dataSize,
                                        const TextureDataLayout* dataLayout,
                                        const Extent3D* writeSize) {
-        uint32_t blockSize = destination->texture->GetFormat().blockByteSize;
-        uint32_t blockWidth = destination->texture->GetFormat().blockWidth;
+        const TexelBlockInfo& blockInfo =
+            destination->texture->GetFormat().GetTexelBlockInfo(destination->aspect);
+
         // We are only copying the part of the data that will appear in the texture.
         // Note that validating texture copy range ensures that writeSize->width and
         // writeSize->height are multiples of blockWidth and blockHeight respectively.
-        uint32_t alignedBytesPerRow = (writeSize->width) / blockWidth * blockSize;
+        uint32_t alignedBytesPerRow =
+            (writeSize->width) / blockInfo.blockWidth * blockInfo.blockByteSize;
         uint32_t alignedRowsPerImage = writeSize->height;
 
         uint32_t optimalBytesPerRowAlignment =
@@ -126,11 +128,10 @@ namespace dawn_native { namespace vulkan {
             Align(alignedBytesPerRow, optimalBytesPerRowAlignment);
 
         UploadHandle uploadHandle;
-        DAWN_TRY_ASSIGN(
-            uploadHandle,
-            UploadTextureDataAligningBytesPerRow(
-                GetDevice(), data, dataSize, alignedBytesPerRow, optimallyAlignedBytesPerRow,
-                alignedRowsPerImage, dataLayout, destination->texture->GetFormat(), writeSize));
+        DAWN_TRY_ASSIGN(uploadHandle, UploadTextureDataAligningBytesPerRow(
+                                          GetDevice(), data, dataSize, alignedBytesPerRow,
+                                          optimallyAlignedBytesPerRow, alignedRowsPerImage,
+                                          dataLayout, blockInfo, writeSize));
 
         TextureDataLayout passDataLayout = *dataLayout;
         passDataLayout.offset = uploadHandle.startOffset;
@@ -141,9 +142,10 @@ namespace dawn_native { namespace vulkan {
         textureCopy.texture = destination->texture;
         textureCopy.mipLevel = destination->mipLevel;
         textureCopy.origin = destination->origin;
+        textureCopy.aspect = destination->aspect;
 
         return ToBackend(GetDevice())
             ->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout, &textureCopy,
                                        *writeSize);
     }
-}}  // namespace dawn_native::vulkan
\ No newline at end of file
+}}  // namespace dawn_native::vulkan
diff --git a/src/dawn_native/vulkan/TextureVk.cpp b/src/dawn_native/vulkan/TextureVk.cpp
index 3c7ffe832c..6d1291bfb8 100644
--- a/src/dawn_native/vulkan/TextureVk.cpp
+++ b/src/dawn_native/vulkan/TextureVk.cpp
@@ -669,8 +669,21 @@ namespace dawn_native { namespace vulkan {
         return mHandle;
     }
 
-    VkImageAspectFlags Texture::GetVkAspectMask() const {
-        return VulkanAspectMask(GetFormat().aspects);
+    VkImageAspectFlags Texture::GetVkAspectMask(wgpu::TextureAspect aspect) const {
+        // TODO(enga): These masks could be precomputed.
+        switch (aspect) {
+            case wgpu::TextureAspect::All:
+                return VulkanAspectMask(GetFormat().aspects);
+            case wgpu::TextureAspect::DepthOnly:
+                ASSERT(GetFormat().aspects & Aspect::Depth);
+                return VulkanAspectMask(Aspect::Depth);
+            case wgpu::TextureAspect::StencilOnly:
+                ASSERT(GetFormat().aspects & Aspect::Stencil);
+                return VulkanAspectMask(Aspect::Stencil);
+            default:
+                UNREACHABLE();
+                return 0;
+        }
     }
 
     void Texture::TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
@@ -872,7 +885,7 @@ namespace dawn_native { namespace vulkan {
         TransitionUsageNow(recordingContext, wgpu::TextureUsage::CopyDst, range);
         if (GetFormat().isRenderable) {
             VkImageSubresourceRange imageRange = {};
-            imageRange.aspectMask = GetVkAspectMask();
+            imageRange.aspectMask = GetVkAspectMask(wgpu::TextureAspect::All);
             imageRange.levelCount = 1;
             imageRange.layerCount = 1;
 
@@ -943,10 +956,12 @@ namespace dawn_native { namespace vulkan {
                         continue;
                     }
 
+                    ASSERT(GetFormat().aspects == Aspect::Color);
                     dawn_native::TextureCopy textureCopy;
                     textureCopy.texture = this;
                     textureCopy.origin = {0, 0, layer};
                     textureCopy.mipLevel = level;
+                    textureCopy.aspect = wgpu::TextureAspect::All;
 
                     VkBufferImageCopy region =
                         ComputeBufferImageCopyRegion(bufferCopy, textureCopy, copySize);
diff --git a/src/dawn_native/vulkan/TextureVk.h b/src/dawn_native/vulkan/TextureVk.h
index 8a1564af72..6748ebd0d4 100644
--- a/src/dawn_native/vulkan/TextureVk.h
+++ b/src/dawn_native/vulkan/TextureVk.h
@@ -59,7 +59,7 @@ namespace dawn_native { namespace vulkan {
                                                VkImage nativeImage);
 
         VkImage GetHandle() const;
-        VkImageAspectFlags GetVkAspectMask() const;
+        VkImageAspectFlags GetVkAspectMask(wgpu::TextureAspect aspect) const;
 
         // Transitions the texture to be used as `usage`, recording any necessary barrier in
         // `commands`.
diff --git a/src/dawn_native/vulkan/UtilsVulkan.cpp b/src/dawn_native/vulkan/UtilsVulkan.cpp
index 15011ce527..b7116da568 100644
--- a/src/dawn_native/vulkan/UtilsVulkan.cpp
+++ b/src/dawn_native/vulkan/UtilsVulkan.cpp
@@ -84,12 +84,14 @@ namespace dawn_native { namespace vulkan {
 
         region.bufferOffset = dataLayout.offset;
         // In Vulkan the row length is in texels while it is in bytes for Dawn
-        const Format& format = texture->GetFormat();
-        ASSERT(dataLayout.bytesPerRow % format.blockByteSize == 0);
-        region.bufferRowLength = dataLayout.bytesPerRow / format.blockByteSize * format.blockWidth;
+        const TexelBlockInfo& blockInfo =
+            texture->GetFormat().GetTexelBlockInfo(textureCopy.aspect);
+        ASSERT(dataLayout.bytesPerRow % blockInfo.blockByteSize == 0);
+        region.bufferRowLength =
+            dataLayout.bytesPerRow / blockInfo.blockByteSize * blockInfo.blockWidth;
         region.bufferImageHeight = dataLayout.rowsPerImage;
 
-        region.imageSubresource.aspectMask = texture->GetVkAspectMask();
+        region.imageSubresource.aspectMask = texture->GetVkAspectMask(textureCopy.aspect);
         region.imageSubresource.mipLevel = textureCopy.mipLevel;
 
         switch (textureCopy.texture->GetDimension()) {
@@ -115,4 +117,4 @@ namespace dawn_native { namespace vulkan {
 
         return region;
     }
-}}  // namespace dawn_native::vulkan
\ No newline at end of file
+}}  // namespace dawn_native::vulkan
diff --git a/src/tests/BUILD.gn b/src/tests/BUILD.gn
index b69dcd095a..956f3d4588 100644
--- a/src/tests/BUILD.gn
+++ b/src/tests/BUILD.gn
@@ -276,6 +276,7 @@ source_set("dawn_end2end_tests_sources") {
     "end2end/DebugMarkerTests.cpp",
     "end2end/DeprecatedAPITests.cpp",
     "end2end/DepthSamplingTests.cpp",
+    "end2end/DepthStencilCopyTests.cpp",
     "end2end/DepthStencilStateTests.cpp",
     "end2end/DestroyTests.cpp",
     "end2end/DeviceLostTests.cpp",
diff --git a/src/tests/DawnTest.cpp b/src/tests/DawnTest.cpp
index 8b515424e8..fd5b7006f6 100644
--- a/src/tests/DawnTest.cpp
+++ b/src/tests/DawnTest.cpp
@@ -926,26 +926,36 @@ std::ostringstream& DawnTestBase::AddBufferExpectation(const char* file,
     return *(mDeferredExpectations.back().message.get());
 }
 
-std::ostringstream& DawnTestBase::AddTextureExpectation(const char* file,
-                                                        int line,
-                                                        const wgpu::Texture& texture,
-                                                        uint32_t x,
-                                                        uint32_t y,
-                                                        uint32_t width,
-                                                        uint32_t height,
-                                                        uint32_t level,
-                                                        uint32_t slice,
-                                                        uint32_t pixelSize,
-                                                        detail::Expectation* expectation) {
-    uint32_t bytesPerRow = Align(width * pixelSize, kTextureBytesPerRowAlignment);
-    uint32_t size = bytesPerRow * (height - 1) + width * pixelSize;
+std::ostringstream& DawnTestBase::AddTextureExpectationImpl(const char* file,
+                                                            int line,
+                                                            detail::Expectation* expectation,
+                                                            const wgpu::Texture& texture,
+                                                            uint32_t x,
+                                                            uint32_t y,
+                                                            uint32_t width,
+                                                            uint32_t height,
+                                                            uint32_t level,
+                                                            uint32_t slice,
+                                                            wgpu::TextureAspect aspect,
+                                                            uint32_t dataSize,
+                                                            uint32_t bytesPerRow) {
+    if (bytesPerRow == 0) {
+        bytesPerRow = Align(width * dataSize, kTextureBytesPerRowAlignment);
+    } else {
+        ASSERT(bytesPerRow >= width * dataSize);
+        ASSERT(bytesPerRow == Align(bytesPerRow, kTextureBytesPerRowAlignment));
+    }
 
-    auto readback = ReserveReadback(size);
+    uint32_t size = bytesPerRow * (height - 1) + width * dataSize;
+
+    // TODO(enga): We should have the map async alignment in Contants.h. Also, it should change to 8
+    // for Float64Array.
+    auto readback = ReserveReadback(Align(size, 4));
 
     // We need to enqueue the copy immediately because by the time we resolve the expectation,
     // the texture might have been modified.
     wgpu::TextureCopyView textureCopyView =
-        utils::CreateTextureCopyView(texture, level, {x, y, slice});
+        utils::CreateTextureCopyView(texture, level, {x, y, slice}, aspect);
     wgpu::BufferCopyView bufferCopyView =
         utils::CreateBufferCopyView(readback.buffer, readback.offset, bytesPerRow, 0);
     wgpu::Extent3D copySize = {width, height, 1};
@@ -962,7 +972,7 @@ std::ostringstream& DawnTestBase::AddTextureExpectation(const char* file,
     deferred.readbackSlot = readback.slot;
     deferred.readbackOffset = readback.offset;
     deferred.size = size;
-    deferred.rowBytes = width * pixelSize;
+    deferred.rowBytes = width * dataSize;
     deferred.bytesPerRow = bytesPerRow;
     deferred.expectation.reset(expectation);
 
diff --git a/src/tests/DawnTest.h b/src/tests/DawnTest.h
index 40f0fe2c42..903ab60d81 100644
--- a/src/tests/DawnTest.h
+++ b/src/tests/DawnTest.h
@@ -60,23 +60,20 @@
                   new ::detail::ExpectEq<float>(expected, count))
 
 // Test a pixel of the mip level 0 of a 2D texture.
-#define EXPECT_PIXEL_RGBA8_EQ(expected, texture, x, y)                                  \
-    AddTextureExpectation(__FILE__, __LINE__, texture, x, y, 1, 1, 0, 0, sizeof(RGBA8), \
-                          new ::detail::ExpectEq<RGBA8>(expected))
+#define EXPECT_PIXEL_RGBA8_EQ(expected, texture, x, y) \
+    AddTextureExpectation(__FILE__, __LINE__, expected, texture, x, y)
 
-#define EXPECT_TEXTURE_RGBA8_EQ(expected, texture, x, y, width, height, level, slice)     \
-    AddTextureExpectation(__FILE__, __LINE__, texture, x, y, width, height, level, slice, \
-                          sizeof(RGBA8),                                                  \
-                          new ::detail::ExpectEq<RGBA8>(expected, (width) * (height)))
+#define EXPECT_TEXTURE_RGBA8_EQ(expected, texture, x, y, width, height, level, slice) \
+    AddTextureExpectation(__FILE__, __LINE__, expected, texture, x, y, width, height, level, slice)
 
-#define EXPECT_PIXEL_FLOAT_EQ(expected, texture, x, y)                                  \
-    AddTextureExpectation(__FILE__, __LINE__, texture, x, y, 1, 1, 0, 0, sizeof(float), \
-                          new ::detail::ExpectEq<float>(expected))
+#define EXPECT_PIXEL_FLOAT_EQ(expected, texture, x, y) \
+    AddTextureExpectation(__FILE__, __LINE__, expected, texture, x, y)
 
-#define EXPECT_TEXTURE_FLOAT_EQ(expected, texture, x, y, width, height, level, slice)     \
-    AddTextureExpectation(__FILE__, __LINE__, texture, x, y, width, height, level, slice, \
-                          sizeof(float),                                                  \
-                          new ::detail::ExpectEq<float>(expected, (width) * (height)))
+#define EXPECT_TEXTURE_FLOAT_EQ(expected, texture, x, y, width, height, level, slice) \
+    AddTextureExpectation(__FILE__, __LINE__, expected, texture, x, y, width, height, level, slice)
+
+// TODO(enga): Migrate other texure expectation helpers to this common one.
+#define EXPECT_TEXTURE_EQ(...) AddTextureExpectation(__FILE__, __LINE__, __VA_ARGS__)
 
 // Should only be used to test validation of function that can't be tested by regular validation
 // tests;
@@ -163,6 +160,9 @@ namespace utils {
 
 namespace detail {
     class Expectation;
+
+    template <typename T>
+    class ExpectEq;
 }  // namespace detail
 
 namespace dawn_wire {
@@ -281,17 +281,39 @@ class DawnTestBase {
                                              uint64_t offset,
                                              uint64_t size,
                                              detail::Expectation* expectation);
+
+    template <typename T>
     std::ostringstream& AddTextureExpectation(const char* file,
                                               int line,
+                                              const T* expectedData,
                                               const wgpu::Texture& texture,
                                               uint32_t x,
                                               uint32_t y,
-                                              uint32_t width,
-                                              uint32_t height,
-                                              uint32_t level,
-                                              uint32_t slice,
-                                              uint32_t pixelSize,
-                                              detail::Expectation* expectation);
+                                              uint32_t width = 1,
+                                              uint32_t height = 1,
+                                              uint32_t level = 0,
+                                              uint32_t slice = 0,
+                                              wgpu::TextureAspect aspect = wgpu::TextureAspect::All,
+                                              uint32_t bytesPerRow = 0) {
+        return AddTextureExpectationImpl(
+            file, line, new detail::ExpectEq<T>(expectedData, width * height), texture, x, y, width,
+            height, level, slice, aspect, sizeof(T), bytesPerRow);
+    }
+
+    template <typename T>
+    std::ostringstream& AddTextureExpectation(const char* file,
+                                              int line,
+                                              const T& expectedData,
+                                              const wgpu::Texture& texture,
+                                              uint32_t x,
+                                              uint32_t y,
+                                              uint32_t level = 0,
+                                              uint32_t slice = 0,
+                                              wgpu::TextureAspect aspect = wgpu::TextureAspect::All,
+                                              uint32_t bytesPerRow = 0) {
+        return AddTextureExpectationImpl(file, line, new detail::ExpectEq<T>(expectedData), texture,
+                                         x, y, 1, 1, level, slice, aspect, sizeof(T), bytesPerRow);
+    }
 
     void WaitABit();
     void FlushWire();
@@ -323,6 +345,20 @@ class DawnTestBase {
     bool mExpectError = false;
     bool mError = false;
 
+    std::ostringstream& AddTextureExpectationImpl(const char* file,
+                                                  int line,
+                                                  detail::Expectation* expectation,
+                                                  const wgpu::Texture& texture,
+                                                  uint32_t x,
+                                                  uint32_t y,
+                                                  uint32_t width,
+                                                  uint32_t height,
+                                                  uint32_t level,
+                                                  uint32_t slice,
+                                                  wgpu::TextureAspect aspect,
+                                                  uint32_t dataSize,
+                                                  uint32_t bytesPerRow);
+
     // MapRead buffers used to get data for the expectations
     struct ReadbackSlot {
         wgpu::Buffer buffer;
diff --git a/src/tests/end2end/DepthStencilCopyTests.cpp b/src/tests/end2end/DepthStencilCopyTests.cpp
new file mode 100644
index 0000000000..72c9e66b72
--- /dev/null
+++ b/src/tests/end2end/DepthStencilCopyTests.cpp
@@ -0,0 +1,323 @@
+// Copyright 2020 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tests/DawnTest.h"
+
+#include <array>
+#include "common/Constants.h"
+#include "common/Math.h"
+#include "utils/ComboRenderPipelineDescriptor.h"
+#include "utils/TextureFormatUtils.h"
+#include "utils/WGPUHelpers.h"
+
+class DepthStencilCopyTests : public DawnTest {
+  protected:
+    void SetUp() override {
+        DawnTest::SetUp();
+
+        // Draw a square in the bottom left quarter of the screen.
+        mVertexModule = utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"(
+    #version 450
+    void main() {
+        const vec2 pos[6] = vec2[6](vec2(-1.f, -1.f), vec2(0.f, -1.f), vec2(-1.f,  0.f),
+                                    vec2(-1.f,  0.f), vec2(0.f, -1.f), vec2( 0.f,  0.f));
+        gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
+    })");
+
+        mFragmentModule = utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, R"(
+    #version 450
+    void main() {
+        gl_FragDepth = 0.3;
+    })");
+    }
+
+    static constexpr float kWrittenDepthValue = 0.3;
+
+    wgpu::ShaderModule mVertexModule;
+    wgpu::ShaderModule mFragmentModule;
+};
+
+// Test copying the depth-only aspect into a buffer.
+TEST_P(DepthStencilCopyTests, FromDepthAspect) {
+    // Create a depth texture
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+    wgpu::TextureDescriptor texDescriptor = {};
+    texDescriptor.size = {kWidth, kHeight, 1};
+    texDescriptor.format = wgpu::TextureFormat::Depth32Float;
+    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
+    wgpu::Texture depthTexture = device.CreateTexture(&texDescriptor);
+
+    // Create a render pass which clears depth to 0
+    utils::ComboRenderPassDescriptor renderPassDesc({}, depthTexture.CreateView());
+    renderPassDesc.cDepthStencilAttachmentInfo.clearDepth = 0.f;
+
+    // Create a render pipeline to render a bottom-left quad with depth 0.3.
+    utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
+    renderPipelineDesc.vertexStage.module = mVertexModule;
+    renderPipelineDesc.cFragmentStage.module = mFragmentModule;
+    renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
+    renderPipelineDesc.cDepthStencilState.depthWriteEnabled = true;
+    renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
+    renderPipelineDesc.colorStateCount = 0;
+
+    wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+
+    // Draw the quad (two triangles)
+    wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+    wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
+    pass.SetPipeline(pipeline);
+    pass.Draw(6);
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = commandEncoder.Finish();
+    queue.Submit(1, &commands);
+
+    // Only the bottom left quad has depth values
+    std::vector<float> expected = {
+        0.0, 0.0, 0.0, 0.0,  //
+        0.0, 0.0, 0.0, 0.0,  //
+        0.3, 0.3, 0.0, 0.0,  //
+        0.3, 0.3, 0.0, 0.0,  //
+    };
+
+    // This expectation is the test as it performs the CopyTextureToBuffer.
+    EXPECT_TEXTURE_EQ(expected.data(), depthTexture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::DepthOnly);
+}
+
+// Test copying the stencil-only aspect into a buffer.
+TEST_P(DepthStencilCopyTests, FromStencilAspect) {
+    // TODO(enga): Figure out why this fails on Linux Vulkan Intel
+    DAWN_SKIP_TEST_IF(IsLinux() && IsVulkan() && IsIntel());
+
+    // Create a stencil texture
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+    wgpu::TextureDescriptor texDescriptor = {};
+    texDescriptor.size = {kWidth, kHeight, 1};
+    texDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
+    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
+    wgpu::Texture depthStencilTexture = device.CreateTexture(&texDescriptor);
+
+    // Create a render pass which clears the stencil to 0 on load.
+    utils::ComboRenderPassDescriptor renderPassDesc({}, depthStencilTexture.CreateView());
+    renderPassDesc.cDepthStencilAttachmentInfo.clearStencil = 0;
+
+    // Create a render pipline which increments the stencil value for passing fragments.
+    // A quad is drawn in the bottom left.
+    utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
+    renderPipelineDesc.vertexStage.module = mVertexModule;
+    renderPipelineDesc.cFragmentStage.module = mFragmentModule;
+    renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
+    renderPipelineDesc.cDepthStencilState.stencilFront.passOp =
+        wgpu::StencilOperation::IncrementClamp;
+    renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
+    renderPipelineDesc.colorStateCount = 0;
+
+    wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+
+    // Draw the quad (two triangles)
+    wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+    wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
+    pass.SetPipeline(pipeline);
+    pass.Draw(6);
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = commandEncoder.Finish();
+    queue.Submit(1, &commands);
+
+    // Only the bottom left quad has stencil values
+    std::vector<uint8_t> expected = {
+        0u, 0u, 0u, 0u,  //
+        0u, 0u, 0u, 0u,  //
+        1u, 1u, 0u, 0u,  //
+        1u, 1u, 0u, 0u,  //
+    };
+
+    // This expectation is the test as it performs the CopyTextureToBuffer.
+    EXPECT_TEXTURE_EQ(expected.data(), depthStencilTexture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::StencilOnly);
+}
+
+// Test copying to the stencil-aspect of a buffer
+TEST_P(DepthStencilCopyTests, ToStencilAspect) {
+    // TODO(enga): Figure out why this fails on Vulkan Intel
+    // Results are shifted by 1 byte on Windows, and crash/hang on Linux.
+    DAWN_SKIP_TEST_IF(IsVulkan() && IsIntel());
+
+    // TODO(enga): Figure out why this fails on MacOS Intel Iris.
+    // It passes on AMD Radeon Pro and Intel HD Graphics 630.
+    DAWN_SKIP_TEST_IF(IsMetal() && IsIntel());
+
+    // Create a stencil texture
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+    wgpu::TextureDescriptor texDescriptor = {};
+    texDescriptor.size = {kWidth, kHeight, 1};
+    texDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
+    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc |
+                          wgpu::TextureUsage::CopyDst;
+    wgpu::Texture depthStencilTexture = device.CreateTexture(&texDescriptor);
+
+    // Bytes per row for the stencil data we will upload.
+    // TODO(enga): Use WriteTexture when implemented everywhere.
+    uint32_t bytesPerRow = Align(kWidth * sizeof(uint8_t), kTextureBytesPerRowAlignment);
+
+    wgpu::BufferDescriptor bufferDesc = {};
+    bufferDesc.usage = wgpu::BufferUsage::CopySrc;
+    bufferDesc.size = kHeight * bytesPerRow;
+    bufferDesc.mappedAtCreation = true;
+
+    std::vector<uint8_t> stencilData = {
+        1u,  2u,  3u,  4u,   //
+        5u,  6u,  7u,  8u,   //
+        9u,  10u, 11u, 12u,  //
+        13u, 14u, 15u, 16u,  //
+    };
+
+    // After copying stencil data in, we will decrement stencil values in the bottom left
+    // of the screen. This is the expected result.
+    std::vector<uint8_t> expectedStencilData = {
+        1u,  2u,  3u,  4u,   //
+        5u,  6u,  7u,  8u,   //
+        8u,  9u,  11u, 12u,  //
+        12u, 13u, 15u, 16u,  //
+    };
+
+    // Copy the stencil data into the buffer.
+    wgpu::Buffer buffer = device.CreateBuffer(&bufferDesc);
+    uint8_t* mappedData = static_cast<uint8_t*>(buffer.GetMappedRange());
+    for (uint32_t r = 0; r < kHeight; ++r) {
+        memcpy(mappedData + r * bytesPerRow, &stencilData[r * kWidth], kWidth);
+    }
+    buffer.Unmap();
+
+    {
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+
+        // Clear depth to 0.7, so we can check that the stencil copy doesn't mutate the depth.
+        utils::ComboRenderPassDescriptor passDescriptor({}, depthStencilTexture.CreateView());
+        passDescriptor.cDepthStencilAttachmentInfo.clearDepth = 0.7;
+
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
+        pass.EndPass();
+
+        // Copy from the buffer into the stencil aspect of the texture.
+        wgpu::BufferCopyView bufferCopy = utils::CreateBufferCopyView(buffer, 0, bytesPerRow, 0);
+        wgpu::TextureCopyView textureCopy = utils::CreateTextureCopyView(
+            depthStencilTexture, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly);
+
+        commandEncoder.CopyBufferToTexture(&bufferCopy, &textureCopy, &texDescriptor.size);
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+    }
+    {
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+        // Create a render pipline which decrements the stencil value for passing fragments.
+        // A quad is drawn in the bottom left.
+        utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
+        renderPipelineDesc.vertexStage.module = mVertexModule;
+        renderPipelineDesc.cFragmentStage.module = mFragmentModule;
+        renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
+        renderPipelineDesc.cDepthStencilState.stencilFront.passOp =
+            wgpu::StencilOperation::DecrementClamp;
+        renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
+        renderPipelineDesc.colorStateCount = 0;
+
+        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+
+        // Create a render pass which loads the stencil. We want to load the values we
+        // copied in. Also load the canary depth values so they're not lost.
+        utils::ComboRenderPassDescriptor passDescriptor({}, depthStencilTexture.CreateView());
+        passDescriptor.cDepthStencilAttachmentInfo.stencilLoadOp = wgpu::LoadOp::Load;
+        passDescriptor.cDepthStencilAttachmentInfo.depthLoadOp = wgpu::LoadOp::Load;
+
+        // Draw the quad in the bottom left (two triangles).
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
+        pass.SetPipeline(pipeline);
+        pass.Draw(6);
+        pass.EndPass();
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+    }
+
+    // Copy back the stencil data and check it is the same.
+    EXPECT_TEXTURE_EQ(expectedStencilData.data(), depthStencilTexture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::StencilOnly);
+
+    // Check that the depth buffer isn't changed.
+    // We do this by running executing a draw call that only passes the depth test if
+    // the depth is equal to the current depth buffer.
+    {
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+
+        // Make the color attachment that we'll use to read back.
+        wgpu::TextureDescriptor colorTexDesc = {};
+        colorTexDesc.size = {kWidth, kHeight, 1};
+        colorTexDesc.format = wgpu::TextureFormat::R32Uint;
+        colorTexDesc.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
+        wgpu::Texture colorTexture = device.CreateTexture(&colorTexDesc);
+
+        // Pipeline for a full screen quad.
+        utils::ComboRenderPipelineDescriptor pipelineDescriptor(device);
+
+        pipelineDescriptor.vertexStage.module =
+            utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"(
+    #version 450
+    void main() {
+        const vec2 pos[3] = vec2[3](vec2(-1.f, -1.f), vec2(3.f, -1.f), vec2(-1.f, 3.f));
+                    gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
+        gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
+    })");
+
+        // Write out 0.7 for depth. This is the same canary value we wrote previously.
+        pipelineDescriptor.cFragmentStage.module =
+            utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, R"(
+    #version 450
+
+    layout(location = 0) out uint result;
+    void main() {
+        result = 1u;
+        gl_FragDepth = 0.7;
+    })");
+
+        // Pass the depth test only if the depth is equal.
+        pipelineDescriptor.primitiveTopology = wgpu::PrimitiveTopology::TriangleList;
+        pipelineDescriptor.depthStencilState = &pipelineDescriptor.cDepthStencilState;
+        pipelineDescriptor.cDepthStencilState.format = texDescriptor.format;
+        pipelineDescriptor.cDepthStencilState.depthCompare = wgpu::CompareFunction::Equal;
+        pipelineDescriptor.cColorStates[0].format = colorTexDesc.format;
+
+        utils::ComboRenderPassDescriptor passDescriptor({colorTexture.CreateView()},
+                                                        depthStencilTexture.CreateView());
+        passDescriptor.cDepthStencilAttachmentInfo.depthLoadOp = wgpu::LoadOp::Load;
+
+        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&pipelineDescriptor);
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
+        pass.SetPipeline(pipeline);
+        pass.Draw(3);
+        pass.EndPass();
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+
+        std::vector<uint32_t> colorData(16, 1u);
+        EXPECT_TEXTURE_EQ(colorData.data(), colorTexture, 0, 0, kWidth, kHeight, 0, 0);
+    }
+}
+
+DAWN_INSTANTIATE_TEST(DepthStencilCopyTests, MetalBackend(), VulkanBackend());