From 00dbf0bf67d58fa064bb24787268733718bea34d Mon Sep 17 00:00:00 2001 From: Austin Eng Date: Mon, 30 Jan 2023 17:13:12 +0000 Subject: [PATCH] Add workarounds to blit a buffer to a depth/stencil texture Works around issues on Metal Intel where CopyB2T and WriteTexture with depth/stencil textures do not work correctly. Fixes test failures with depth16unorm in the CTS. Deletes UseTempTextureInStencilTextureToBufferCopy in favor of the stencil blit. The former supposedly fixes a problem where the stencil data is not flushed into the real stencil texture by performing another T2T copy. This only works because the Metal Intel backend also happens to allocate s8 as d32s8. Copying the depth aspect as well seems to make the driver remember to flush the data. The stencil blit is a better fix for the problem since entirely avoids getting the driver into a bad state where the stencil data is not in sync. Fixed: dawn:1389 Change-Id: If34b1d58996157036c164a5bc329e38b5e53f67a Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/117910 Reviewed-by: Loko Kung Kokoro: Kokoro Commit-Queue: Austin Eng --- src/dawn/native/BUILD.gn | 2 + src/dawn/native/BlitBufferToDepthStencil.cpp | 573 ++++++++++++++++++ src/dawn/native/BlitBufferToDepthStencil.h | 76 +++ src/dawn/native/CMakeLists.txt | 2 + src/dawn/native/CommandEncoder.cpp | 83 ++- src/dawn/native/CommandEncoder.h | 21 + src/dawn/native/Device.cpp | 21 +- src/dawn/native/Device.h | 6 +- src/dawn/native/InternalPipelineStore.h | 8 + src/dawn/native/Queue.cpp | 4 +- src/dawn/native/Texture.cpp | 13 + src/dawn/native/Toggles.cpp | 16 +- src/dawn/native/Toggles.h | 3 +- src/dawn/native/d3d12/DeviceD3D12.cpp | 16 +- src/dawn/native/d3d12/DeviceD3D12.h | 2 +- src/dawn/native/metal/DeviceMTL.h | 2 +- src/dawn/native/metal/DeviceMTL.mm | 11 +- src/dawn/native/null/DeviceNull.cpp | 2 +- src/dawn/native/null/DeviceNull.h | 2 +- src/dawn/native/opengl/DeviceGL.cpp | 2 +- src/dawn/native/opengl/DeviceGL.h | 2 +- src/dawn/native/vulkan/DeviceVk.cpp | 16 +- src/dawn/native/vulkan/DeviceVk.h | 2 +- .../tests/end2end/DepthStencilCopyTests.cpp | 11 +- src/dawn/tests/end2end/QueueTests.cpp | 2 + src/dawn/tests/end2end/RenderPassTests.cpp | 141 ++++- .../tests/end2end/TextureZeroInitTests.cpp | 3 +- .../tests/unittests/native/mocks/DeviceMock.h | 2 +- webgpu-cts/expectations.txt | 8 - 29 files changed, 951 insertions(+), 101 deletions(-) create mode 100644 src/dawn/native/BlitBufferToDepthStencil.cpp create mode 100644 src/dawn/native/BlitBufferToDepthStencil.h diff --git a/src/dawn/native/BUILD.gn b/src/dawn/native/BUILD.gn index 95bf2640bc..9f4cb17bbe 100644 --- a/src/dawn/native/BUILD.gn +++ b/src/dawn/native/BUILD.gn @@ -199,6 +199,8 @@ source_set("sources") { "BindGroupTracker.h", "BindingInfo.cpp", "BindingInfo.h", + "BlitBufferToDepthStencil.cpp", + "BlitBufferToDepthStencil.h", "Blob.cpp", "Blob.h", "BlobCache.cpp", diff --git a/src/dawn/native/BlitBufferToDepthStencil.cpp b/src/dawn/native/BlitBufferToDepthStencil.cpp new file mode 100644 index 0000000000..173919f328 --- /dev/null +++ b/src/dawn/native/BlitBufferToDepthStencil.cpp @@ -0,0 +1,573 @@ +// Copyright 2023 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dawn/native/BlitBufferToDepthStencil.h" + +#include + +#include "dawn/common/Assert.h" +#include "dawn/native/BindGroup.h" +#include "dawn/native/CommandBuffer.h" +#include "dawn/native/CommandEncoder.h" +#include "dawn/native/Device.h" +#include "dawn/native/InternalPipelineStore.h" +#include "dawn/native/Queue.h" +#include "dawn/native/RenderPassEncoder.h" +#include "dawn/native/RenderPipeline.h" + +namespace dawn::native { + +namespace { + +constexpr char kBlitRG8ToDepthShaders[] = R"( + +@vertex fn vert_fullscreen_quad( + @builtin(vertex_index) vertex_index : u32 +) -> @builtin(position) vec4 { + const pos = array, 3>( + vec2(-1.0, -1.0), + vec2( 3.0, -1.0), + vec2(-1.0, 3.0)); + return vec4(pos[vertex_index], 0.0, 1.0); +} + +struct Params { + origin : vec2 +}; + +@group(0) @binding(0) var src_tex : texture_2d; +@group(0) @binding(1) var params : Params; + +@fragment fn blit_to_depth( + @builtin(position) position : vec4 +) -> @builtin(frag_depth) f32 { + // Load the source texel. + let src_texel = textureLoad( + src_tex, vec2(position.xy) - params.origin, 0u); + + let depth_u16_val = (src_texel.y << 8u) + src_texel.x; + + const one_over_max : f32 = 1.0 / f32(0xFFFFu); + return f32(depth_u16_val) * one_over_max; +} + +)"; + +constexpr char kBlitStencilShaders[] = R"( + +struct VertexOutputs { + @location(0) @interpolate(flat) stencil_val : u32, + @builtin(position) position : vec4, +}; + +// The instance_index here is not used for instancing. +// It represents the current stencil mask we're testing in the +// source. +// This is a cheap way to get the stencil value into the shader +// since WebGPU doesn't have push constants. +@vertex fn vert_fullscreen_quad( + @builtin(vertex_index) vertex_index : u32, + @builtin(instance_index) instance_index: u32, +) -> VertexOutputs { + const pos = array, 3>( + vec2(-1.0, -1.0), + vec2( 3.0, -1.0), + vec2(-1.0, 3.0)); + return VertexOutputs( + instance_index, + vec4(pos[vertex_index], 0.0, 1.0), + ); +} + +struct Params { + origin : vec2 +}; + +@group(0) @binding(0) var src_tex : texture_2d; +@group(0) @binding(1) var params : Params; + +// Do nothing (but also don't discard). Used for clearing +// stencil to 0. +@fragment fn frag_noop() {} + +// Discard the fragment if the source texture doesn't +// have the stencil_val. +@fragment fn frag_check_src_stencil(input : VertexOutputs) { + // Load the source stencil value. + let src_val : u32 = textureLoad( + src_tex, vec2(input.position.xy) - params.origin, 0u)[0]; + + // Discard it if it doesn't contain the stencil reference. + if ((src_val & input.stencil_val) == 0u) { + discard; + } +} + +)"; + +ResultOrError> GetOrCreateRG8ToDepth16UnormPipeline(DeviceBase* device) { + InternalPipelineStore* store = device->GetInternalPipelineStore(); + if (store->blitRG8ToDepth16UnormPipeline != nullptr) { + return store->blitRG8ToDepth16UnormPipeline; + } + + ShaderModuleWGSLDescriptor wgslDesc = {}; + ShaderModuleDescriptor shaderModuleDesc = {}; + shaderModuleDesc.nextInChain = &wgslDesc; + wgslDesc.source = kBlitRG8ToDepthShaders; + + Ref shaderModule; + DAWN_TRY_ASSIGN(shaderModule, device->CreateShaderModule(&shaderModuleDesc)); + + FragmentState fragmentState = {}; + fragmentState.module = shaderModule.Get(); + fragmentState.entryPoint = "blit_to_depth"; + + DepthStencilState dsState = {}; + dsState.format = wgpu::TextureFormat::Depth16Unorm; + dsState.depthWriteEnabled = true; + + RenderPipelineDescriptor renderPipelineDesc = {}; + renderPipelineDesc.vertex.module = shaderModule.Get(); + renderPipelineDesc.vertex.entryPoint = "vert_fullscreen_quad"; + renderPipelineDesc.depthStencil = &dsState; + renderPipelineDesc.fragment = &fragmentState; + + Ref pipeline; + DAWN_TRY_ASSIGN(pipeline, device->CreateRenderPipeline(&renderPipelineDesc)); + + store->blitRG8ToDepth16UnormPipeline = pipeline; + return pipeline; +} + +ResultOrError GetOrCreateR8ToStencilPipelines( + DeviceBase* device, + wgpu::TextureFormat format, + BindGroupLayoutBase* bgl) { + InternalPipelineStore* store = device->GetInternalPipelineStore(); + { + auto it = store->blitR8ToStencilPipelines.find(format); + if (it != store->blitR8ToStencilPipelines.end()) { + return InternalPipelineStore::BlitR8ToStencilPipelines{it->second}; + } + } + + Ref pipelineLayout; + { + PipelineLayoutDescriptor plDesc = {}; + plDesc.bindGroupLayoutCount = 1; + + plDesc.bindGroupLayouts = &bgl; + DAWN_TRY_ASSIGN(pipelineLayout, device->CreatePipelineLayout(&plDesc)); + } + + ShaderModuleWGSLDescriptor wgslDesc = {}; + ShaderModuleDescriptor shaderModuleDesc = {}; + shaderModuleDesc.nextInChain = &wgslDesc; + wgslDesc.source = kBlitStencilShaders; + + Ref shaderModule; + DAWN_TRY_ASSIGN(shaderModule, device->CreateShaderModule(&shaderModuleDesc)); + + FragmentState fragmentState = {}; + fragmentState.module = shaderModule.Get(); + + DepthStencilState dsState = {}; + dsState.format = format; + dsState.depthWriteEnabled = false; + dsState.stencilFront.passOp = wgpu::StencilOperation::Replace; + + RenderPipelineDescriptor renderPipelineDesc = {}; + renderPipelineDesc.layout = pipelineLayout.Get(); + renderPipelineDesc.vertex.module = shaderModule.Get(); + renderPipelineDesc.vertex.entryPoint = "vert_fullscreen_quad"; + renderPipelineDesc.depthStencil = &dsState; + renderPipelineDesc.fragment = &fragmentState; + + // Build a pipeline to clear stencil to 0. We need a pipeline, and not just a render pass load + // op because the copy region may be a subregion of the stencil texture. + Ref clearPipeline; + fragmentState.entryPoint = "frag_noop"; + DAWN_TRY_ASSIGN(clearPipeline, device->CreateRenderPipeline(&renderPipelineDesc)); + + // Build 8 pipelines masked to replace each bit of the stencil. + std::array, 8> setStencilPipelines; + fragmentState.entryPoint = "frag_check_src_stencil"; + for (uint32_t bit = 0; bit < 8; ++bit) { + dsState.stencilWriteMask = 1u << bit; + DAWN_TRY_ASSIGN(setStencilPipelines[bit], + device->CreateRenderPipeline(&renderPipelineDesc)); + } + + InternalPipelineStore::BlitR8ToStencilPipelines pipelines{std::move(clearPipeline), + std::move(setStencilPipelines)}; + store->blitR8ToStencilPipelines[format] = pipelines; + return pipelines; +} + +MaybeError BlitRG8ToDepth16Unorm(DeviceBase* device, + CommandEncoder* commandEncoder, + TextureBase* dataTexture, + const TextureCopy& dst, + const Extent3D& copyExtent) { + ASSERT(dst.texture->GetFormat().format == wgpu::TextureFormat::Depth16Unorm); + ASSERT(dataTexture->GetFormat().format == wgpu::TextureFormat::RG8Uint); + + // Allow internal usages since we need to use the destination + // as a render attachment. + auto scope = commandEncoder->MakeInternalUsageScope(); + + Ref pipeline; + DAWN_TRY_ASSIGN(pipeline, GetOrCreateRG8ToDepth16UnormPipeline(device)); + + Ref bgl; + DAWN_TRY_ASSIGN(bgl, pipeline->GetBindGroupLayout(0)); + + for (uint32_t z = 0; z < copyExtent.depthOrArrayLayers; ++z) { + Ref srcView; + { + TextureViewDescriptor viewDesc = {}; + viewDesc.dimension = wgpu::TextureViewDimension::e2D; + viewDesc.baseArrayLayer = z; + viewDesc.arrayLayerCount = 1; + viewDesc.mipLevelCount = 1; + DAWN_TRY_ASSIGN(srcView, dataTexture->CreateView(&viewDesc)); + } + + Ref dstView; + { + TextureViewDescriptor viewDesc = {}; + viewDesc.dimension = wgpu::TextureViewDimension::e2D; + viewDesc.baseArrayLayer = dst.origin.z + z; + viewDesc.arrayLayerCount = 1; + viewDesc.baseMipLevel = dst.mipLevel; + viewDesc.mipLevelCount = 1; + DAWN_TRY_ASSIGN(dstView, dst.texture->CreateView(&viewDesc)); + } + + Ref paramsBuffer; + { + BufferDescriptor bufferDesc = {}; + bufferDesc.size = sizeof(uint32_t) * 2; + bufferDesc.usage = wgpu::BufferUsage::Uniform; + bufferDesc.mappedAtCreation = true; + DAWN_TRY_ASSIGN(paramsBuffer, device->CreateBuffer(&bufferDesc)); + + uint32_t* params = + static_cast(paramsBuffer->GetMappedRange(0, bufferDesc.size)); + params[0] = dst.origin.x; + params[1] = dst.origin.y; + paramsBuffer->Unmap(); + } + + Ref bindGroup; + { + std::array bgEntries = {}; + bgEntries[0].binding = 0; + bgEntries[0].textureView = srcView.Get(); + bgEntries[1].binding = 1; + bgEntries[1].buffer = paramsBuffer.Get(); + + BindGroupDescriptor bgDesc = {}; + bgDesc.layout = bgl.Get(); + bgDesc.entryCount = bgEntries.size(); + bgDesc.entries = bgEntries.data(); + DAWN_TRY_ASSIGN(bindGroup, device->CreateBindGroup(&bgDesc)); + } + + RenderPassDepthStencilAttachment dsAttachment; + dsAttachment.view = dstView.Get(); + dsAttachment.depthLoadOp = wgpu::LoadOp::Load; + dsAttachment.depthStoreOp = wgpu::StoreOp::Store; + + RenderPassDescriptor rpDesc = {}; + rpDesc.depthStencilAttachment = &dsAttachment; + + Ref pass = AcquireRef(commandEncoder->APIBeginRenderPass(&rpDesc)); + // Bind the resources. + pass->APISetBindGroup(0, bindGroup.Get()); + // Discard all fragments outside the copy region. + pass->APISetScissorRect(dst.origin.x, dst.origin.y, copyExtent.width, copyExtent.height); + + // Draw to perform the blit. + pass->APISetPipeline(pipeline.Get()); + pass->APIDraw(3, 1, 0, 0); + + pass->APIEnd(); + } + return {}; +} + +MaybeError BlitR8ToStencil(DeviceBase* device, + CommandEncoder* commandEncoder, + TextureBase* dataTexture, + const TextureCopy& dst, + const Extent3D& copyExtent) { + const Format& format = dst.texture->GetFormat(); + ASSERT(dst.aspect == Aspect::Stencil); + + // Allow internal usages since we need to use the destination + // as a render attachment. + auto scope = commandEncoder->MakeInternalUsageScope(); + + // This bgl is the same for all the render pipelines. + Ref bgl; + { + std::array bglEntries = {}; + // Binding 0: the r8uint texture. + bglEntries[0].binding = 0; + bglEntries[0].visibility = wgpu::ShaderStage::Fragment; + bglEntries[0].texture.sampleType = wgpu::TextureSampleType::Uint; + // Binding 1: the params buffer. + bglEntries[1].binding = 1; + bglEntries[1].visibility = wgpu::ShaderStage::Fragment; + bglEntries[1].buffer.type = wgpu::BufferBindingType::Uniform; + bglEntries[1].buffer.minBindingSize = 2 * sizeof(uint32_t); + + BindGroupLayoutDescriptor bglDesc = {}; + bglDesc.entryCount = bglEntries.size(); + bglDesc.entries = bglEntries.data(); + + DAWN_TRY_ASSIGN(bgl, device->CreateBindGroupLayout(&bglDesc)); + } + + InternalPipelineStore::BlitR8ToStencilPipelines pipelines; + DAWN_TRY_ASSIGN(pipelines, GetOrCreateR8ToStencilPipelines(device, format.format, bgl.Get())); + + // Build the params buffer, containing the copy dst origin. + Ref paramsBuffer; + { + BufferDescriptor bufferDesc = {}; + bufferDesc.size = sizeof(uint32_t) * 2; + bufferDesc.usage = wgpu::BufferUsage::Uniform; + bufferDesc.mappedAtCreation = true; + DAWN_TRY_ASSIGN(paramsBuffer, device->CreateBuffer(&bufferDesc)); + + uint32_t* params = static_cast(paramsBuffer->GetMappedRange(0, bufferDesc.size)); + params[0] = dst.origin.x; + params[1] = dst.origin.y; + paramsBuffer->Unmap(); + } + + // For each layer, blit the stencil data. + for (uint32_t z = 0; z < copyExtent.depthOrArrayLayers; ++z) { + Ref srcView; + { + TextureViewDescriptor viewDesc = {}; + viewDesc.dimension = wgpu::TextureViewDimension::e2D; + viewDesc.baseArrayLayer = z; + viewDesc.arrayLayerCount = 1; + viewDesc.mipLevelCount = 1; + DAWN_TRY_ASSIGN(srcView, dataTexture->CreateView(&viewDesc)); + } + + Ref dstView; + { + TextureViewDescriptor viewDesc = {}; + viewDesc.dimension = wgpu::TextureViewDimension::e2D; + viewDesc.baseArrayLayer = dst.origin.z + z; + viewDesc.arrayLayerCount = 1; + viewDesc.baseMipLevel = dst.mipLevel; + viewDesc.mipLevelCount = 1; + DAWN_TRY_ASSIGN(dstView, dst.texture->CreateView(&viewDesc)); + } + + Ref bindGroup; + { + std::array bgEntries = {}; + bgEntries[0].binding = 0; + bgEntries[0].textureView = srcView.Get(); + bgEntries[1].binding = 1; + bgEntries[1].buffer = paramsBuffer.Get(); + + BindGroupDescriptor bgDesc = {}; + bgDesc.layout = bgl.Get(); + bgDesc.entryCount = bgEntries.size(); + bgDesc.entries = bgEntries.data(); + DAWN_TRY_ASSIGN(bindGroup, + device->CreateBindGroup(&bgDesc, UsageValidationMode::Internal)); + } + + RenderPassDepthStencilAttachment dsAttachment; + dsAttachment.view = dstView.Get(); + if (format.HasDepth()) { + dsAttachment.depthLoadOp = wgpu::LoadOp::Load; + dsAttachment.depthStoreOp = wgpu::StoreOp::Store; + } + dsAttachment.stencilLoadOp = wgpu::LoadOp::Load; + dsAttachment.stencilStoreOp = wgpu::StoreOp::Store; + + RenderPassDescriptor rpDesc = {}; + rpDesc.depthStencilAttachment = &dsAttachment; + + Ref pass = AcquireRef(commandEncoder->APIBeginRenderPass(&rpDesc)); + // Bind the resources. + pass->APISetBindGroup(0, bindGroup.Get()); + // Discard all fragments outside the copy region. + pass->APISetScissorRect(dst.origin.x, dst.origin.y, copyExtent.width, copyExtent.height); + + // Clear the copy region to 0. + pass->APISetStencilReference(0); + pass->APISetPipeline(pipelines.clearPipeline.Get()); + pass->APIDraw(3, 1, 0, 0); + + // Perform 8 draws. Each will load the source stencil data, and will + // set the bit index in the destination stencil attachment if it the + // source also has that bit using stencil operation `Replace`. + // If it doesn't match, the fragment will be discarded. + pass->APISetStencilReference(255); + for (uint32_t bit = 0; bit < 8; ++bit) { + pass->APISetPipeline(pipelines.setStencilPipelines[bit].Get()); + // Draw one instance, and use the stencil value as firstInstance. + // This is a cheap way to get the stencil value into the shader + // since WebGPU doesn't have push constants. + pass->APIDraw(3, 1, 0, 1u << bit); + } + pass->APIEnd(); + } + return {}; +} + +} // anonymous namespace + +MaybeError BlitStagingBufferToDepth(DeviceBase* device, + BufferBase* buffer, + const TextureDataLayout& src, + const TextureCopy& dst, + const Extent3D& copyExtent) { + const Format& format = dst.texture->GetFormat(); + ASSERT(format.format == wgpu::TextureFormat::Depth16Unorm); + + TextureDescriptor dataTextureDesc = {}; + dataTextureDesc.format = wgpu::TextureFormat::RG8Uint; + dataTextureDesc.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::TextureBinding; + dataTextureDesc.size = copyExtent; + + Ref dataTexture; + DAWN_TRY_ASSIGN(dataTexture, device->CreateTexture(&dataTextureDesc)); + { + TextureCopy rg8Dst; + rg8Dst.texture = dataTexture.Get(); + rg8Dst.mipLevel = 0; + rg8Dst.origin = {}; + rg8Dst.aspect = Aspect::Color; + DAWN_TRY(device->CopyFromStagingToTexture(buffer, src, rg8Dst, copyExtent)); + } + + Ref commandEncoder; + DAWN_TRY_ASSIGN(commandEncoder, device->CreateCommandEncoder()); + + DAWN_TRY( + BlitRG8ToDepth16Unorm(device, commandEncoder.Get(), dataTexture.Get(), dst, copyExtent)); + + Ref commandBuffer; + DAWN_TRY_ASSIGN(commandBuffer, commandEncoder->Finish()); + + CommandBufferBase* commands = commandBuffer.Get(); + device->GetQueue()->APISubmit(1, &commands); + return {}; +} + +MaybeError BlitBufferToDepth(DeviceBase* device, + CommandEncoder* commandEncoder, + BufferBase* buffer, + const TextureDataLayout& src, + const TextureCopy& dst, + const Extent3D& copyExtent) { + const Format& format = dst.texture->GetFormat(); + ASSERT(format.format == wgpu::TextureFormat::Depth16Unorm); + + TextureDescriptor dataTextureDesc = {}; + dataTextureDesc.format = wgpu::TextureFormat::RG8Uint; + dataTextureDesc.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::TextureBinding; + dataTextureDesc.size = copyExtent; + + Ref dataTexture; + DAWN_TRY_ASSIGN(dataTexture, device->CreateTexture(&dataTextureDesc)); + { + ImageCopyBuffer bufferSrc; + bufferSrc.buffer = buffer; + bufferSrc.layout = src; + + ImageCopyTexture textureDst; + textureDst.texture = dataTexture.Get(); + commandEncoder->APICopyBufferToTexture(&bufferSrc, &textureDst, ©Extent); + } + + DAWN_TRY(BlitRG8ToDepth16Unorm(device, commandEncoder, dataTexture.Get(), dst, copyExtent)); + return {}; +} + +MaybeError BlitStagingBufferToStencil(DeviceBase* device, + BufferBase* buffer, + const TextureDataLayout& src, + const TextureCopy& dst, + const Extent3D& copyExtent) { + TextureDescriptor dataTextureDesc = {}; + dataTextureDesc.format = wgpu::TextureFormat::R8Uint; + dataTextureDesc.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::TextureBinding; + dataTextureDesc.size = copyExtent; + + Ref dataTexture; + DAWN_TRY_ASSIGN(dataTexture, device->CreateTexture(&dataTextureDesc)); + { + TextureCopy r8Dst; + r8Dst.texture = dataTexture.Get(); + r8Dst.mipLevel = 0; + r8Dst.origin = {}; + r8Dst.aspect = Aspect::Color; + DAWN_TRY(device->CopyFromStagingToTexture(buffer, src, r8Dst, copyExtent)); + } + + Ref commandEncoder; + DAWN_TRY_ASSIGN(commandEncoder, device->CreateCommandEncoder()); + + DAWN_TRY(BlitR8ToStencil(device, commandEncoder.Get(), dataTexture.Get(), dst, copyExtent)); + + Ref commandBuffer; + DAWN_TRY_ASSIGN(commandBuffer, commandEncoder->Finish()); + + CommandBufferBase* commands = commandBuffer.Get(); + device->GetQueue()->APISubmit(1, &commands); + return {}; +} + +MaybeError BlitBufferToStencil(DeviceBase* device, + CommandEncoder* commandEncoder, + BufferBase* buffer, + const TextureDataLayout& src, + const TextureCopy& dst, + const Extent3D& copyExtent) { + TextureDescriptor dataTextureDesc = {}; + dataTextureDesc.format = wgpu::TextureFormat::R8Uint; + dataTextureDesc.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::TextureBinding; + dataTextureDesc.size = copyExtent; + + Ref dataTexture; + DAWN_TRY_ASSIGN(dataTexture, device->CreateTexture(&dataTextureDesc)); + { + ImageCopyBuffer bufferSrc; + bufferSrc.buffer = buffer; + bufferSrc.layout = src; + + ImageCopyTexture textureDst; + textureDst.texture = dataTexture.Get(); + commandEncoder->APICopyBufferToTexture(&bufferSrc, &textureDst, ©Extent); + } + + DAWN_TRY(BlitR8ToStencil(device, commandEncoder, dataTexture.Get(), dst, copyExtent)); + return {}; +} + +} // namespace dawn::native diff --git a/src/dawn/native/BlitBufferToDepthStencil.h b/src/dawn/native/BlitBufferToDepthStencil.h new file mode 100644 index 0000000000..ada2d8b78b --- /dev/null +++ b/src/dawn/native/BlitBufferToDepthStencil.h @@ -0,0 +1,76 @@ +// Copyright 2023 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_DAWN_NATIVE_BLITBUFFERTODEPTHSTENCIL_H_ +#define SRC_DAWN_NATIVE_BLITBUFFERTODEPTHSTENCIL_H_ + +#include "dawn/native/Error.h" + +namespace dawn::native { + +struct TextureCopy; + +// BlitBufferToDepth works around issues where copying from a buffer +// to depth does not work on some drivers. +// Currently, only depth16unorm textures can be CopyDst, so only depth16unorm +// is supported. +// It does the following: +// - Copies buffer data to an rg8uint texture. +// - Sets the viewport to the copy rect. +// - Uploads the copy origin to a uniform buffer. +// - For each destination layer: +// - Performs a draw to sample the rg8uint data, computes the +// floating point depth value, and writes the frag depth. + +MaybeError BlitStagingBufferToDepth(DeviceBase* device, + BufferBase* buffer, + const TextureDataLayout& src, + const TextureCopy& dst, + const Extent3D& copyExtent); + +MaybeError BlitBufferToDepth(DeviceBase* device, + CommandEncoder* commandEncoder, + BufferBase* buffer, + const TextureDataLayout& src, + const TextureCopy& dst, + const Extent3D& copyExtent); + +// BlitBufferToStencil works around issues where copying from a buffer +// to stencil does not work on some drivers. +// It does the following: +// - Copies buffer data to an r8uint texture. +// - Sets the viewport to the copy rect. +// - Uploads the copy origin to a uniform buffer. +// - For each destination layer: +// - Performs a draw to clear stencil to 0. +// - Performs 8 draws for each bit of stencil to set the respective +// stencil bit to 1, if the source r8 texture also has that bit set. +// If the source r8 texture does not, the fragment is discarded. + +MaybeError BlitStagingBufferToStencil(DeviceBase* device, + BufferBase* buffer, + const TextureDataLayout& src, + const TextureCopy& dst, + const Extent3D& copyExtent); + +MaybeError BlitBufferToStencil(DeviceBase* device, + CommandEncoder* commandEncoder, + BufferBase* buffer, + const TextureDataLayout& src, + const TextureCopy& dst, + const Extent3D& copyExtent); + +} // namespace dawn::native + +#endif // SRC_DAWN_NATIVE_BLITBUFFERTODEPTHSTENCIL_H_ diff --git a/src/dawn/native/CMakeLists.txt b/src/dawn/native/CMakeLists.txt index 7f5eb2e30b..0628bfeb92 100644 --- a/src/dawn/native/CMakeLists.txt +++ b/src/dawn/native/CMakeLists.txt @@ -47,6 +47,8 @@ target_sources(dawn_native PRIVATE "BindGroupTracker.h" "BindingInfo.cpp" "BindingInfo.h" + "BlitBufferToDepthStencil.cpp" + "BlitBufferToDepthStencil.h" "Blob.cpp" "Blob.h" "BlobCache.cpp" diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp index df7db6d778..d0a58debaf 100644 --- a/src/dawn/native/CommandEncoder.cpp +++ b/src/dawn/native/CommandEncoder.cpp @@ -22,6 +22,7 @@ #include "dawn/common/Math.h" #include "dawn/native/ApplyClearColorValueWithDrawHelper.h" #include "dawn/native/BindGroup.h" +#include "dawn/native/BlitBufferToDepthStencil.h" #include "dawn/native/Buffer.h" #include "dawn/native/ChainUtils_autogen.h" #include "dawn/native/CommandBuffer.h" @@ -1218,17 +1219,35 @@ void CommandEncoder::APICopyBufferToTexture(const ImageCopyBuffer* source, TextureDataLayout srcLayout = source->layout; ApplyDefaultTextureDataLayoutOptions(&srcLayout, blockInfo, *copySize); + TextureCopy dst; + dst.texture = destination->texture; + dst.origin = destination->origin; + dst.mipLevel = destination->mipLevel; + dst.aspect = ConvertAspect(destination->texture->GetFormat(), destination->aspect); + + if (dst.aspect == Aspect::Depth && + GetDevice()->IsToggleEnabled(Toggle::UseBlitForBufferToDepthTextureCopy)) { + DAWN_TRY_CONTEXT( + BlitBufferToDepth(GetDevice(), this, source->buffer, srcLayout, dst, *copySize), + "copying from %s to depth aspect of %s using blit workaround.", source->buffer, + dst.texture.Get()); + return {}; + } else if (dst.aspect == Aspect::Stencil && + GetDevice()->IsToggleEnabled(Toggle::UseBlitForBufferToStencilTextureCopy)) { + DAWN_TRY_CONTEXT(BlitBufferToStencil(GetDevice(), this, source->buffer, srcLayout, + dst, *copySize), + "copying from %s to stencil aspect of %s using blit workaround.", + source->buffer, dst.texture.Get()); + return {}; + } + CopyBufferToTextureCmd* copy = allocator->Allocate(Command::CopyBufferToTexture); copy->source.buffer = source->buffer; copy->source.offset = srcLayout.offset; copy->source.bytesPerRow = srcLayout.bytesPerRow; copy->source.rowsPerImage = srcLayout.rowsPerImage; - copy->destination.texture = destination->texture; - copy->destination.origin = destination->origin; - copy->destination.mipLevel = destination->mipLevel; - copy->destination.aspect = - ConvertAspect(destination->texture->GetFormat(), destination->aspect); + copy->destination = dst; copy->copySize = *copySize; return {}; @@ -1277,45 +1296,12 @@ void CommandEncoder::APICopyTextureToBuffer(const ImageCopyTexture* source, TextureDataLayout dstLayout = destination->layout; ApplyDefaultTextureDataLayoutOptions(&dstLayout, blockInfo, *copySize); - TextureCopy copySrc; - copySrc.texture = source->texture; - copySrc.origin = source->origin; - copySrc.mipLevel = source->mipLevel; - copySrc.aspect = ConvertAspect(source->texture->GetFormat(), source->aspect); - - if (copySrc.aspect == Aspect::Stencil && - GetDevice()->IsToggleEnabled(Toggle::UseTempTextureInStencilTextureToBufferCopy)) { - // Encode a copy to an intermediate texture. - TextureDescriptor desc = {}; - desc.format = source->texture->GetFormat().format; - desc.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst; - desc.size = *copySize; - - Ref intermediateTexture; - DAWN_TRY_ASSIGN(intermediateTexture, GetDevice()->CreateTexture(&desc)); - - // Allocate the intermediate t2t command. - Aspect aspect = - ConvertAspect(source->texture->GetFormat(), wgpu::TextureAspect::All); - CopyTextureToTextureCmd* t2t = - allocator->Allocate(Command::CopyTextureToTexture); - t2t->source = copySrc; - t2t->source.aspect = aspect; - t2t->destination.texture = intermediateTexture; - t2t->destination.origin = {}; - t2t->destination.mipLevel = 0; - t2t->destination.aspect = aspect; - t2t->copySize = *copySize; - - // Replace the `copySrc` with the intermediate texture. - copySrc.texture = intermediateTexture; - copySrc.mipLevel = 0; - copySrc.origin = {}; - } - CopyTextureToBufferCmd* t2b = allocator->Allocate(Command::CopyTextureToBuffer); - t2b->source = copySrc; + t2b->source.texture = source->texture; + t2b->source.origin = source->origin; + t2b->source.mipLevel = source->mipLevel; + t2b->source.aspect = ConvertAspect(source->texture->GetFormat(), source->aspect); t2b->destination.buffer = destination->buffer; t2b->destination.offset = dstLayout.offset; t2b->destination.bytesPerRow = dstLayout.bytesPerRow; @@ -1657,4 +1643,17 @@ MaybeError CommandEncoder::ValidateFinish() const { return {}; } +CommandEncoder::InternalUsageScope CommandEncoder::MakeInternalUsageScope() { + return InternalUsageScope(this); +} + +CommandEncoder::InternalUsageScope::InternalUsageScope(CommandEncoder* encoder) + : mEncoder(encoder), mUsageValidationMode(mEncoder->mUsageValidationMode) { + mEncoder->mUsageValidationMode = UsageValidationMode::Internal; +} + +CommandEncoder::InternalUsageScope::~InternalUsageScope() { + mEncoder->mUsageValidationMode = mUsageValidationMode; +} + } // namespace dawn::native diff --git a/src/dawn/native/CommandEncoder.h b/src/dawn/native/CommandEncoder.h index d1e20f353a..3b8f5fd52e 100644 --- a/src/dawn/native/CommandEncoder.h +++ b/src/dawn/native/CommandEncoder.h @@ -96,6 +96,27 @@ class CommandEncoder final : public ApiObjectBase { ResultOrError> Finish( const CommandBufferDescriptor* descriptor = nullptr); + // `InternalUsageScope` is a scoped class that temporarily changes validation such that the + // command encoder includes internal resource usages. + friend class InternalUsageScope; + class [[nodiscard]] InternalUsageScope : public NonMovable { + public: + ~InternalUsageScope(); + + private: + // Disable heap allocation + void* operator new(size_t) = delete; + + // Only CommandEncoder can make this class. + friend class CommandEncoder; + InternalUsageScope(CommandEncoder* encoder); + + CommandEncoder* mEncoder; + UsageValidationMode mUsageValidationMode; + }; + + InternalUsageScope MakeInternalUsageScope(); + private: CommandEncoder(DeviceBase* device, const CommandEncoderDescriptor* descriptor); CommandEncoder(DeviceBase* device, ObjectBase::ErrorTag tag); diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp index e924d538f2..12bf8db8ba 100644 --- a/src/dawn/native/Device.cpp +++ b/src/dawn/native/Device.cpp @@ -26,6 +26,7 @@ #include "dawn/native/AttachmentState.h" #include "dawn/native/BindGroup.h" #include "dawn/native/BindGroupLayout.h" +#include "dawn/native/BlitBufferToDepthStencil.h" #include "dawn/native/BlobCache.h" #include "dawn/native/Buffer.h" #include "dawn/native/ChainUtils_autogen.h" @@ -1938,11 +1939,25 @@ MaybeError DeviceBase::CopyFromStagingToBuffer(BufferBase* source, return {}; } -MaybeError DeviceBase::CopyFromStagingToTexture(const BufferBase* source, +MaybeError DeviceBase::CopyFromStagingToTexture(BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) { - DAWN_TRY(CopyFromStagingToTextureImpl(source, src, dst, copySizePixels)); + if (dst.aspect == Aspect::Depth && + IsToggleEnabled(Toggle::UseBlitForBufferToDepthTextureCopy)) { + DAWN_TRY_CONTEXT(BlitStagingBufferToDepth(this, source, src, dst, copySizePixels), + "copying from staging buffer to depth aspect of %s using blit workaround.", + dst.texture.Get()); + } else if (dst.aspect == Aspect::Stencil && + IsToggleEnabled(Toggle::UseBlitForBufferToStencilTextureCopy)) { + DAWN_TRY_CONTEXT( + BlitStagingBufferToStencil(this, source, src, dst, copySizePixels), + "copying from staging buffer to stencil aspect of %s using blit workaround.", + dst.texture.Get()); + } else { + DAWN_TRY(CopyFromStagingToTextureImpl(source, src, dst, copySizePixels)); + } + if (GetDynamicUploader()->ShouldFlush()) { ForceEventualFlushOfCommands(); } diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h index ac8fd447c8..d04ff48618 100644 --- a/src/dawn/native/Device.h +++ b/src/dawn/native/Device.h @@ -302,9 +302,9 @@ class DeviceBase : public RefCountedWithExternalCount { BufferBase* destination, uint64_t destinationOffset, uint64_t size); - MaybeError CopyFromStagingToTexture(const BufferBase* source, + MaybeError CopyFromStagingToTexture(BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels); DynamicUploader* GetDynamicUploader() const; @@ -532,7 +532,7 @@ class DeviceBase : public RefCountedWithExternalCount { uint64_t size) = 0; virtual MaybeError CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) = 0; wgpu::ErrorCallback mUncapturedErrorCallback = nullptr; diff --git a/src/dawn/native/InternalPipelineStore.h b/src/dawn/native/InternalPipelineStore.h index 6234ec0110..e2560f15a3 100644 --- a/src/dawn/native/InternalPipelineStore.h +++ b/src/dawn/native/InternalPipelineStore.h @@ -57,6 +57,14 @@ struct InternalPipelineStore { Ref renderValidationPipeline; Ref renderValidationShader; Ref dispatchIndirectValidationPipeline; + + Ref blitRG8ToDepth16UnormPipeline; + + struct BlitR8ToStencilPipelines { + Ref clearPipeline; + std::array, 8> setStencilPipelines; + }; + std::unordered_map blitR8ToStencilPipelines; }; } // namespace dawn::native diff --git a/src/dawn/native/Queue.cpp b/src/dawn/native/Queue.cpp index acce952e31..a8d92e304b 100644 --- a/src/dawn/native/Queue.cpp +++ b/src/dawn/native/Queue.cpp @@ -381,8 +381,8 @@ MaybeError QueueBase::WriteTextureImpl(const ImageCopyTexture& destination, DeviceBase* device = GetDevice(); - return device->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout, - &textureCopy, writeSizePixel); + return device->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout, textureCopy, + writeSizePixel); } void QueueBase::APICopyTextureForBrowser(const ImageCopyTexture* source, diff --git a/src/dawn/native/Texture.cpp b/src/dawn/native/Texture.cpp index ad57774100..85b1889304 100644 --- a/src/dawn/native/Texture.cpp +++ b/src/dawn/native/Texture.cpp @@ -562,6 +562,19 @@ TextureBase::TextureBase(DeviceBase* device, if (applyAlwaysResolveIntoZeroLevelAndLayerToggle) { AddInternalUsage(wgpu::TextureUsage::CopyDst); } + + if (mFormat.HasStencil() && (mInternalUsage & wgpu::TextureUsage::CopyDst) && + device->IsToggleEnabled(Toggle::UseBlitForBufferToStencilTextureCopy)) { + // Add render attachment usage so we can blit to the stencil texture + // in a render pass. + AddInternalUsage(wgpu::TextureUsage::RenderAttachment); + } + if (mFormat.HasDepth() && (mInternalUsage & wgpu::TextureUsage::CopyDst) && + device->IsToggleEnabled(Toggle::UseBlitForBufferToDepthTextureCopy)) { + // Add render attachment usage so we can blit to the depth texture + // in a render pass. + AddInternalUsage(wgpu::TextureUsage::RenderAttachment); + } } TextureBase::~TextureBase() = default; diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp index c39ce0554d..4b9d44eed3 100644 --- a/src/dawn/native/Toggles.cpp +++ b/src/dawn/native/Toggles.cpp @@ -342,11 +342,17 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{ "for stencil8 formats if metal_use_combined_depth_stencil_format_for_stencil8 is also " "enabled.", "https://crbug.com/dawn/1389"}}, - {Toggle::UseTempTextureInStencilTextureToBufferCopy, - {"use_temp_texture_in_stencil_texture_to_buffer_copy", - "Use an intermediate temporary texture when copying the stencil aspect of a texture to a " - "buffer. Works around an issue where stencil writes from a render pass are not reflected in " - "the destination buffer.", + {Toggle::UseBlitForBufferToDepthTextureCopy, + {"use_blit_for_buffer_to_depth_texture_copy", + "Use a blit instead of a copy command to copy buffer data to the depth aspect of a " + "texture. Works around an issue where depth writes by copy commands are not visible " + "to a render or compute pass.", + "https://crbug.com/dawn/1389"}}, + {Toggle::UseBlitForBufferToStencilTextureCopy, + {"use_blit_for_buffer_to_stencil_texture_copy", + "Use a blit instead of a copy command to copy buffer data to the stencil aspect of a " + "texture. Works around an issue where stencil writes by copy commands are not visible " + "to a render or compute pass.", "https://crbug.com/dawn/1389"}}, {Toggle::DisallowDeprecatedAPIs, {"disallow_deprecated_apis", diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h index b785c5afb1..665c38575b 100644 --- a/src/dawn/native/Toggles.h +++ b/src/dawn/native/Toggles.h @@ -84,7 +84,8 @@ enum class Toggle { D3D12Allocate2DTexturewithCopyDstAsCommittedResource, MetalUseCombinedDepthStencilFormatForStencil8, MetalUseBothDepthAndStencilAttachmentsForCombinedDepthStencilFormats, - UseTempTextureInStencilTextureToBufferCopy, + UseBlitForBufferToDepthTextureCopy, + UseBlitForBufferToStencilTextureCopy, DisallowDeprecatedAPIs, // Unresolved issues. diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp index e2752b51d3..2f4e3eb34f 100644 --- a/src/dawn/native/d3d12/DeviceD3D12.cpp +++ b/src/dawn/native/d3d12/DeviceD3D12.cpp @@ -524,15 +524,15 @@ void Device::CopyFromStagingToBufferHelper(CommandRecordingContext* commandConte MaybeError Device::CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) { CommandRecordingContext* commandContext; DAWN_TRY_ASSIGN(commandContext, GetPendingCommandContext(Device::SubmitMode::Passive)); - Texture* texture = ToBackend(dst->texture.Get()); + Texture* texture = ToBackend(dst.texture.Get()); - SubresourceRange range = GetSubresourcesAffectedByCopy(*dst, copySizePixels); + SubresourceRange range = GetSubresourcesAffectedByCopy(dst, copySizePixels); - if (IsCompleteSubresourceCopiedTo(texture, copySizePixels, dst->mipLevel)) { + if (IsCompleteSubresourceCopiedTo(texture, copySizePixels, dst.mipLevel)) { texture->SetIsSubresourceContentInitialized(true, range); } else { texture->EnsureSubresourceContentInitialized(commandContext, range); @@ -540,10 +540,10 @@ MaybeError Device::CopyFromStagingToTextureImpl(const BufferBase* source, texture->TrackUsageAndTransitionNow(commandContext, wgpu::TextureUsage::CopyDst, range); - RecordBufferTextureCopyWithBufferHandle( - BufferTextureCopyDirection::B2T, commandContext->GetCommandList(), - ToBackend(source)->GetD3D12Resource(), src.offset, src.bytesPerRow, src.rowsPerImage, *dst, - copySizePixels); + RecordBufferTextureCopyWithBufferHandle(BufferTextureCopyDirection::B2T, + commandContext->GetCommandList(), + ToBackend(source)->GetD3D12Resource(), src.offset, + src.bytesPerRow, src.rowsPerImage, dst, copySizePixels); return {}; } diff --git a/src/dawn/native/d3d12/DeviceD3D12.h b/src/dawn/native/d3d12/DeviceD3D12.h index f053ede58d..f39ed406dc 100644 --- a/src/dawn/native/d3d12/DeviceD3D12.h +++ b/src/dawn/native/d3d12/DeviceD3D12.h @@ -111,7 +111,7 @@ class Device final : public DeviceBase { MaybeError CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) override; ResultOrError AllocateMemory( diff --git a/src/dawn/native/metal/DeviceMTL.h b/src/dawn/native/metal/DeviceMTL.h index 2bfa3bb886..2b94c3d4f1 100644 --- a/src/dawn/native/metal/DeviceMTL.h +++ b/src/dawn/native/metal/DeviceMTL.h @@ -70,7 +70,7 @@ class Device final : public DeviceBase { uint64_t size) override; MaybeError CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& dataLayout, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) override; uint32_t GetOptimalBytesPerRowAlignment() const override; diff --git a/src/dawn/native/metal/DeviceMTL.mm b/src/dawn/native/metal/DeviceMTL.mm index 77de03e43b..3bc776e91b 100644 --- a/src/dawn/native/metal/DeviceMTL.mm +++ b/src/dawn/native/metal/DeviceMTL.mm @@ -259,9 +259,10 @@ void Device::InitTogglesFromDriver() { #if DAWN_PLATFORM_IS(MACOS) if (gpu_info::IsIntel(vendorId)) { - SetToggle(Toggle::UseTempTextureInStencilTextureToBufferCopy, true); SetToggle(Toggle::MetalUseBothDepthAndStencilAttachmentsForCombinedDepthStencilFormats, true); + SetToggle(Toggle::UseBlitForBufferToStencilTextureCopy, true); + SetToggle(Toggle::UseBlitForBufferToDepthTextureCopy, true); if ([NSProcessInfo.processInfo isOperatingSystemAtLeastVersion:NSOperatingSystemVersion{12, 0, 0}]) { @@ -496,17 +497,17 @@ MaybeError Device::CopyFromStagingToBufferImpl(BufferBase* source, // sets the private storage mode by default for all textures except IOSurfaces on macOS. MaybeError Device::CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& dataLayout, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) { - Texture* texture = ToBackend(dst->texture.Get()); + Texture* texture = ToBackend(dst.texture.Get()); texture->SynchronizeTextureBeforeUse(GetPendingCommandContext()); EnsureDestinationTextureInitialized(GetPendingCommandContext(DeviceBase::SubmitMode::Passive), - texture, *dst, copySizePixels); + texture, dst, copySizePixels); RecordCopyBufferToTexture(GetPendingCommandContext(DeviceBase::SubmitMode::Passive), ToBackend(source)->GetMTLBuffer(), source->GetSize(), dataLayout.offset, dataLayout.bytesPerRow, dataLayout.rowsPerImage, - texture, dst->mipLevel, dst->origin, dst->aspect, copySizePixels); + texture, dst.mipLevel, dst.origin, dst.aspect, copySizePixels); return {}; } diff --git a/src/dawn/native/null/DeviceNull.cpp b/src/dawn/native/null/DeviceNull.cpp index 0d191abd6b..7288de0177 100644 --- a/src/dawn/native/null/DeviceNull.cpp +++ b/src/dawn/native/null/DeviceNull.cpp @@ -234,7 +234,7 @@ MaybeError Device::CopyFromStagingToBufferImpl(BufferBase* source, MaybeError Device::CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) { return {}; } diff --git a/src/dawn/native/null/DeviceNull.h b/src/dawn/native/null/DeviceNull.h index 6d5fb15548..2a8565a788 100644 --- a/src/dawn/native/null/DeviceNull.h +++ b/src/dawn/native/null/DeviceNull.h @@ -111,7 +111,7 @@ class Device final : public DeviceBase { uint64_t size) override; MaybeError CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) override; MaybeError IncrementMemoryUsage(uint64_t bytes); diff --git a/src/dawn/native/opengl/DeviceGL.cpp b/src/dawn/native/opengl/DeviceGL.cpp index fabfd84465..c1cf4fb03f 100644 --- a/src/dawn/native/opengl/DeviceGL.cpp +++ b/src/dawn/native/opengl/DeviceGL.cpp @@ -423,7 +423,7 @@ MaybeError Device::CopyFromStagingToBufferImpl(BufferBase* source, MaybeError Device::CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) { return DAWN_UNIMPLEMENTED_ERROR("Device unable to copy from staging buffer to texture."); } diff --git a/src/dawn/native/opengl/DeviceGL.h b/src/dawn/native/opengl/DeviceGL.h index 207ffe69c3..8061db3204 100644 --- a/src/dawn/native/opengl/DeviceGL.h +++ b/src/dawn/native/opengl/DeviceGL.h @@ -75,7 +75,7 @@ class Device final : public DeviceBase { MaybeError CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) override; uint32_t GetOptimalBytesPerRowAlignment() const override; diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp index 9375be3e47..120801edb8 100644 --- a/src/dawn/native/vulkan/DeviceVk.cpp +++ b/src/dawn/native/vulkan/DeviceVk.cpp @@ -864,7 +864,7 @@ MaybeError Device::CopyFromStagingToBufferImpl(BufferBase* source, MaybeError Device::CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) { // There is no need of a barrier to make host writes available and visible to the copy // operation for HOST_COHERENT memory. The Vulkan spec for vkQueueSubmit describes that it @@ -873,22 +873,22 @@ MaybeError Device::CopyFromStagingToTextureImpl(const BufferBase* source, CommandRecordingContext* recordingContext = GetPendingRecordingContext(DeviceBase::SubmitMode::Passive); - VkBufferImageCopy region = ComputeBufferImageCopyRegion(src, *dst, copySizePixels); + VkBufferImageCopy region = ComputeBufferImageCopyRegion(src, dst, copySizePixels); VkImageSubresourceLayers subresource = region.imageSubresource; - SubresourceRange range = GetSubresourcesAffectedByCopy(*dst, copySizePixels); + SubresourceRange range = GetSubresourcesAffectedByCopy(dst, copySizePixels); - if (IsCompleteSubresourceCopiedTo(dst->texture.Get(), copySizePixels, subresource.mipLevel)) { + if (IsCompleteSubresourceCopiedTo(dst.texture.Get(), copySizePixels, subresource.mipLevel)) { // Since texture has been overwritten, it has been "initialized" - dst->texture->SetIsSubresourceContentInitialized(true, range); + dst.texture->SetIsSubresourceContentInitialized(true, range); } else { - ToBackend(dst->texture)->EnsureSubresourceContentInitialized(recordingContext, range); + ToBackend(dst.texture)->EnsureSubresourceContentInitialized(recordingContext, range); } // Insert pipeline barrier to ensure correct ordering with previous memory operations on the // texture. - ToBackend(dst->texture) + ToBackend(dst.texture) ->TransitionUsageNow(recordingContext, wgpu::TextureUsage::CopyDst, range); - VkImage dstImage = ToBackend(dst->texture)->GetHandle(); + VkImage dstImage = ToBackend(dst.texture)->GetHandle(); // Dawn guarantees dstImage be in the TRANSFER_DST_OPTIMAL layout after the // copy command. diff --git a/src/dawn/native/vulkan/DeviceVk.h b/src/dawn/native/vulkan/DeviceVk.h index e6011a62ca..c42375ab66 100644 --- a/src/dawn/native/vulkan/DeviceVk.h +++ b/src/dawn/native/vulkan/DeviceVk.h @@ -96,7 +96,7 @@ class Device final : public DeviceBase { uint64_t size) override; MaybeError CopyFromStagingToTextureImpl(const BufferBase* source, const TextureDataLayout& src, - TextureCopy* dst, + const TextureCopy& dst, const Extent3D& copySizePixels) override; // Return the fixed subgroup size to use for compute shaders on this device or 0 if none diff --git a/src/dawn/tests/end2end/DepthStencilCopyTests.cpp b/src/dawn/tests/end2end/DepthStencilCopyTests.cpp index cafb44d0c0..3adb3c7f46 100644 --- a/src/dawn/tests/end2end/DepthStencilCopyTests.cpp +++ b/src/dawn/tests/end2end/DepthStencilCopyTests.cpp @@ -918,7 +918,8 @@ TEST_P(StencilCopyTests, CopyNonzeroMipThenReadWithStencilTest) { DAWN_INSTANTIATE_TEST_P(DepthStencilCopyTests, {D3D12Backend(), MetalBackend(), - MetalBackend({"use_temp_texture_in_stencil_texture_to_buffer_copy"}), + MetalBackend({"use_blit_for_buffer_to_depth_texture_copy", + "use_blit_for_buffer_to_stencil_texture_copy"}), OpenGLBackend(), OpenGLESBackend(), // Test with the vulkan_use_s8 toggle forced on and off. VulkanBackend({"vulkan_use_s8"}, {}), @@ -938,7 +939,9 @@ DAWN_INSTANTIATE_TEST_P(DepthCopyFromBufferTests, {D3D12Backend(), D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_" "copy_with_non_zero_buffer_offset"}), - MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()}, + MetalBackend(), + MetalBackend({"use_blit_for_buffer_to_depth_texture_copy"}), + OpenGLBackend(), OpenGLESBackend(), VulkanBackend()}, std::vector(kValidDepthCopyFromBufferFormats.begin(), kValidDepthCopyFromBufferFormats.end())); @@ -948,10 +951,10 @@ DAWN_INSTANTIATE_TEST_P( D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_" "copy_with_non_zero_buffer_offset"}), MetalBackend(), MetalBackend({"metal_use_combined_depth_stencil_format_for_stencil8"}), - MetalBackend({"use_temp_texture_in_stencil_texture_to_buffer_copy"}), MetalBackend( {"metal_use_both_depth_and_stencil_attachments_for_combined_depth_stencil_formats"}), - OpenGLBackend(), OpenGLESBackend(), + MetalBackend({"use_blit_for_buffer_to_stencil_texture_copy"}), OpenGLBackend(), + OpenGLESBackend(), // Test with the vulkan_use_s8 toggle forced on and off. VulkanBackend({"vulkan_use_s8"}, {}), VulkanBackend({}, {"vulkan_use_s8"})}, std::vector(utils::kStencilFormats.begin(), utils::kStencilFormats.end())); diff --git a/src/dawn/tests/end2end/QueueTests.cpp b/src/dawn/tests/end2end/QueueTests.cpp index 3e843ceea5..23e6f40ffc 100644 --- a/src/dawn/tests/end2end/QueueTests.cpp +++ b/src/dawn/tests/end2end/QueueTests.cpp @@ -775,6 +775,8 @@ DAWN_INSTANTIATE_TEST(QueueWriteTextureTests, D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_" "copy_with_non_zero_buffer_offset"}), MetalBackend(), + MetalBackend({"use_blit_for_buffer_to_depth_texture_copy", + "use_blit_for_buffer_to_stencil_texture_copy"}), OpenGLBackend(), OpenGLESBackend(), VulkanBackend()); diff --git a/src/dawn/tests/end2end/RenderPassTests.cpp b/src/dawn/tests/end2end/RenderPassTests.cpp index d6606acc54..523ab6f45a 100644 --- a/src/dawn/tests/end2end/RenderPassTests.cpp +++ b/src/dawn/tests/end2end/RenderPassTests.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "dawn/tests/DawnTest.h" +#include +#include +#include "dawn/tests/DawnTest.h" #include "dawn/utils/ComboRenderPipelineDescriptor.h" #include "dawn/utils/WGPUHelpers.h" @@ -163,8 +165,6 @@ TEST_P(RenderPassTest, NoCorrespondingFragmentShaderOutputs) { EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8::kRed, renderTarget, kRTSize - 1, 1); } -class RenderPassTest_RegressionDawn1071 : public RenderPassTest {}; - DAWN_INSTANTIATE_TEST(RenderPassTest, D3D12Backend(), D3D12Backend({}, {"use_d3d12_render_pass"}), @@ -175,6 +175,7 @@ DAWN_INSTANTIATE_TEST(RenderPassTest, // Test that clearing the lower mips of an R8Unorm texture works. This is a regression test for // dawn:1071 where Intel Metal devices fail to do that correctly, requiring a workaround. +class RenderPassTest_RegressionDawn1071 : public RenderPassTest {}; TEST_P(RenderPassTest_RegressionDawn1071, ClearLowestMipOfR8Unorm) { const uint32_t kLastMipLevel = 2; @@ -230,3 +231,137 @@ DAWN_INSTANTIATE_TEST(RenderPassTest_RegressionDawn1071, OpenGLBackend(), OpenGLESBackend(), VulkanBackend()); + +// Test that clearing a depth16unorm texture with multiple subresources works. This is a regression +// test for dawn:1389 where Intel Metal devices fail to do that correctly, requiring a workaround. +class RenderPassTest_RegressionDawn1389 : public RenderPassTest {}; +TEST_P(RenderPassTest_RegressionDawn1389, ClearMultisubresourceAfterWriteDepth16Unorm) { + // TODO(crbug.com/dawn/1492): Support copying to Depth16Unorm on GL. + DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES()); + + // Test all combinatons of multi-mip, multi-layer + for (uint32_t mipLevelCount : {1, 5}) { + for (uint32_t arrayLayerCount : {1, 7}) { + // Only clear some of the subresources. + const auto& clearedMips = + mipLevelCount == 1 ? std::vector>{{0, 1}} + : std::vector>{{0, 2}, {3, 4}}; + const auto& clearedLayers = + arrayLayerCount == 1 ? std::vector>{{0, 1}} + : std::vector>{{2, 4}, {6, 7}}; + + // Compute the texture size. + uint32_t width = 1u << (mipLevelCount - 1); + uint32_t height = 1u << (mipLevelCount - 1); + + // Create the texture. + wgpu::TextureDescriptor texDesc; + texDesc.format = wgpu::TextureFormat::Depth16Unorm; + texDesc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc | + wgpu::TextureUsage::CopyDst; + texDesc.size = {width, height, arrayLayerCount}; + texDesc.mipLevelCount = mipLevelCount; + wgpu::Texture tex = device.CreateTexture(&texDesc); + + // Initialize all subresources with WriteTexture. + for (uint32_t level = 0; level < mipLevelCount; ++level) { + for (uint32_t layer = 0; layer < arrayLayerCount; ++layer) { + wgpu::ImageCopyTexture imageCopyTexture = + utils::CreateImageCopyTexture(tex, level, {0, 0, layer}); + wgpu::Extent3D copySize = {width >> level, height >> level, 1}; + + wgpu::TextureDataLayout textureDataLayout; + textureDataLayout.offset = 0; + textureDataLayout.bytesPerRow = copySize.width * sizeof(uint16_t); + textureDataLayout.rowsPerImage = copySize.height; + + // Use a distinct value for each subresource. + uint16_t value = level * 10 + layer; + std::vector data(copySize.width * copySize.height, value); + queue.WriteTexture(&imageCopyTexture, data.data(), + data.size() * sizeof(uint16_t), &textureDataLayout, + ©Size); + } + } + + // Prep a viewDesc for rendering to depth. The base layer and level + // will be set later. + wgpu::TextureViewDescriptor viewDesc = {}; + viewDesc.mipLevelCount = 1u; + viewDesc.arrayLayerCount = 1u; + + // Overwrite some subresources with a render pass + { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + for (const auto& clearedMipRange : clearedMips) { + for (const auto& clearedLayerRange : clearedLayers) { + for (uint32_t level = clearedMipRange.first; level < clearedMipRange.second; + ++level) { + for (uint32_t layer = clearedLayerRange.first; + layer < clearedLayerRange.second; ++layer) { + viewDesc.baseMipLevel = level; + viewDesc.baseArrayLayer = layer; + + utils::ComboRenderPassDescriptor renderPass( + {}, tex.CreateView(&viewDesc)); + renderPass.UnsetDepthStencilLoadStoreOpsForFormat(texDesc.format); + renderPass.cDepthStencilAttachmentInfo.depthClearValue = 0.8; + renderPass.cDepthStencilAttachmentInfo.depthLoadOp = + wgpu::LoadOp::Clear; + renderPass.cDepthStencilAttachmentInfo.depthStoreOp = + wgpu::StoreOp::Store; + encoder.BeginRenderPass(&renderPass).End(); + } + } + } + } + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + } + + // Iterate all subresources. + for (uint32_t level = 0; level < mipLevelCount; ++level) { + for (uint32_t layer = 0; layer < arrayLayerCount; ++layer) { + bool cleared = false; + for (const auto& clearedMipRange : clearedMips) { + for (const auto& clearedLayerRange : clearedLayers) { + if (level >= clearedMipRange.first && level < clearedMipRange.second && + layer >= clearedLayerRange.first && + layer < clearedLayerRange.second) { + cleared = true; + } + } + } + uint32_t mipWidth = width >> level; + uint32_t mipHeight = height >> level; + if (cleared) { + // Check the subresource is cleared as expected. + std::vector data(mipWidth * mipHeight, 0xCCCC); + EXPECT_TEXTURE_EQ(data.data(), tex, {0, 0, layer}, {mipWidth, mipHeight}, + level) + << "cleared texture data should have been 0xCCCC at:" + << "\nlayer: " << layer << "\nlevel: " << level; + } else { + // Otherwise, check the other subresources have the orignal contents. + // Without the workaround, they are 0. + uint16_t value = + level * 10 + layer; // Compute the expected value for the subresource. + std::vector data(mipWidth * mipHeight, value); + EXPECT_TEXTURE_EQ(data.data(), tex, {0, 0, layer}, {mipWidth, mipHeight}, + level) + << "written texture data should still be " << value << " at:" + << "\nlayer: " << layer << "\nlevel: " << level; + } + } + } + } + } +} + +DAWN_INSTANTIATE_TEST(RenderPassTest_RegressionDawn1389, + D3D12Backend(), + MetalBackend(), + MetalBackend({"use_blit_for_buffer_to_depth_texture_copy"}), + OpenGLBackend(), + OpenGLESBackend(), + VulkanBackend()); diff --git a/src/dawn/tests/end2end/TextureZeroInitTests.cpp b/src/dawn/tests/end2end/TextureZeroInitTests.cpp index f883ef2d31..0d33cef707 100644 --- a/src/dawn/tests/end2end/TextureZeroInitTests.cpp +++ b/src/dawn/tests/end2end/TextureZeroInitTests.cpp @@ -2319,7 +2319,8 @@ DAWN_INSTANTIATE_TEST(CompressedTextureZeroInitTest, D3D12Backend({"nonzero_clear_resources_on_creation_for_testing"}), MetalBackend({"nonzero_clear_resources_on_creation_for_testing"}), MetalBackend({"nonzero_clear_resources_on_creation_for_testing", - "use_temp_texture_in_stencil_texture_to_buffer_copy"}), + "use_blit_for_buffer_to_depth_texture_copy", + "use_blit_for_buffer_to_stencil_texture_copy"}), OpenGLBackend({"nonzero_clear_resources_on_creation_for_testing"}), OpenGLESBackend({"nonzero_clear_resources_on_creation_for_testing"}), VulkanBackend({"nonzero_clear_resources_on_creation_for_testing"})); diff --git a/src/dawn/tests/unittests/native/mocks/DeviceMock.h b/src/dawn/tests/unittests/native/mocks/DeviceMock.h index 92c4883c7c..4cabb78aa9 100644 --- a/src/dawn/tests/unittests/native/mocks/DeviceMock.h +++ b/src/dawn/tests/unittests/native/mocks/DeviceMock.h @@ -40,7 +40,7 @@ class DeviceMock : public DeviceBase { (override)); MOCK_METHOD(MaybeError, CopyFromStagingToTextureImpl, - (const BufferBase*, const TextureDataLayout&, TextureCopy*, const Extent3D&), + (const BufferBase*, const TextureDataLayout&, const TextureCopy&, const Extent3D&), (override)); MOCK_METHOD(uint32_t, GetOptimalBytesPerRowAlignment, (), (const, override)); diff --git a/webgpu-cts/expectations.txt b/webgpu-cts/expectations.txt index e61ae0650e..37f4252a8d 100644 --- a/webgpu-cts/expectations.txt +++ b/webgpu-cts/expectations.txt @@ -314,14 +314,6 @@ crbug.com/dawn/1083 [ monterey ] webgpu:api,operation,command_buffer,copyTexture ################################################################################ crbug.com/dawn/1500 [ intel-gen-9 monterey ] webgpu:api,operation,command_buffer,image_copy:origins_and_extents:initMethod="WriteTexture";checkMethod="PartialCopyT2B";* [ RetryOnFailure ] -################################################################################ -# webgpu:api,operation,resource_init,texture_zero:uninitialized_texture_is_zero -# Failures on Mac Intel, likely also due to crbug.com/dawn/1083 -################################################################################ -crbug.com/dawn/1389 [ monterey ] webgpu:api,operation,resource_init,texture_zero:uninitialized_texture_is_zero:dimension="2d";readMethod="CopyToBuffer";format="depth16unorm" [ Failure ] -crbug.com/dawn/1389 [ monterey ] webgpu:api,operation,resource_init,texture_zero:uninitialized_texture_is_zero:dimension="2d";readMethod="CopyToTexture";format="depth16unorm" [ Failure ] -crbug.com/dawn/1389 [ monterey ] webgpu:api,operation,resource_init,texture_zero:uninitialized_texture_is_zero:dimension="2d";readMethod="DepthTest";format="depth16unorm" [ Failure ] - ################################################################################ # copyToTexture,canvas:color_space_conversion:* fail with swiftshader # The other tests about canvas and image bitmap fail with swiftshader on Linux