Compat GL/GLES: blit a depth texture to a buffer using compute

Add compute blit emulation path for Depth16Unorm/Depth32Float
depth textures in OpenGL/OpenGLES backend.

Extend DepthCopyTests for better test coverage, especially for
testing Depth16Unorm compute emulation path.

Bug: dawn:1782, dawn:1291, dawn:1237
Change-Id: I02f6e10d13e8b0080b412a9f9a6d62b1e470ac9c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/130820
Auto-Submit: Shrek Shao <shrekshao@google.com>
Reviewed-by: Stephen White <senorblanco@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Shrek Shao <shrekshao@google.com>
This commit is contained in:
shrekshao 2023-05-17 22:25:34 +00:00 committed by Dawn LUCI CQ
parent 8c361e560e
commit e121931563
17 changed files with 888 additions and 116 deletions

View File

@ -203,6 +203,8 @@ source_set("sources") {
"BindingInfo.h", "BindingInfo.h",
"BlitBufferToDepthStencil.cpp", "BlitBufferToDepthStencil.cpp",
"BlitBufferToDepthStencil.h", "BlitBufferToDepthStencil.h",
"BlitDepthStencilToBuffer.cpp",
"BlitDepthStencilToBuffer.h",
"BlitDepthToDepth.cpp", "BlitDepthToDepth.cpp",
"BlitDepthToDepth.h", "BlitDepthToDepth.h",
"Blob.cpp", "Blob.cpp",

View File

@ -0,0 +1,361 @@
// Copyright 2023 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn/native/BlitDepthStencilToBuffer.h"
#include <utility>
#include "dawn/common/Assert.h"
#include "dawn/native/BindGroup.h"
#include "dawn/native/CommandBuffer.h"
#include "dawn/native/CommandEncoder.h"
#include "dawn/native/ComputePassEncoder.h"
#include "dawn/native/ComputePipeline.h"
#include "dawn/native/Device.h"
#include "dawn/native/InternalPipelineStore.h"
#include "dawn/native/Queue.h"
#include "dawn/native/utils/WGPUHelpers.h"
namespace dawn::native {
namespace {
constexpr uint32_t kWorkgroupSizeX = 8;
constexpr uint32_t kWorkgroupSizeY = 8;
constexpr char kBlitDepth32FloatToBufferShaders[] = R"(
@group(0) @binding(0) var src_tex : texture_depth_2d_array;
@group(0) @binding(1) var<storage, read_write> dst_buf : array<f32>;
struct Params {
// copyExtent
srcOrigin: vec3u,
pad0: u32,
srcExtent: vec3u,
pad1: u32,
// GPUImageDataLayout
indicesPerRow: u32,
rowsPerImage: u32,
indicesOffset: u32,
};
@group(0) @binding(2) var<uniform> params : Params;
override workgroupSizeX: u32;
override workgroupSizeY: u32;
// Load the depth value and write to storage buffer.
@compute @workgroup_size(workgroupSizeX, workgroupSizeY, 1) fn blit_depth_to_buffer(@builtin(global_invocation_id) id : vec3u) {
let srcBoundary = params.srcOrigin + params.srcExtent;
let coord = id + params.srcOrigin;
if (any(coord >= srcBoundary)) {
return;
}
let dstOffset = params.indicesOffset + id.x + id.y * params.indicesPerRow + id.z * params.indicesPerRow * params.rowsPerImage;
dst_buf[dstOffset] = textureLoad(src_tex, coord.xy, coord.z, 0);
}
)";
// ShaderF16 extension is only enabled by GL_AMD_gpu_shader_half_float for GL
// so we should not use it generally for the emulation.
// As a result we are using f32 and array<u32> to do all the math and byte manipulation.
// If we have 2-byte scalar type (f16, u16) it can be a bit easier when writing to the storage
// buffer.
constexpr char kBlitDepth16UnormToBufferShaders[] = R"(
@group(0) @binding(0) var src_tex : texture_depth_2d_array;
@group(0) @binding(1) var<storage, read_write> dst_buf : array<u32>;
struct Params {
// copyExtent
srcOrigin: vec3u,
pad0: u32,
srcExtent: vec3u,
pad1: u32,
// GPUImageDataLayout
indicesPerRow: u32,
rowsPerImage: u32,
indicesOffset: u32,
};
@group(0) @binding(2) var<uniform> params : Params;
// Range of v is [0.0, 1.0]
fn getUnorm16Bits(v: f32) -> u32 {
var bits: u32 = u32(v * 65535.0);
return bits;
}
override workgroupSizeX: u32;
override workgroupSizeY: u32;
// Load the depth value and write to storage buffer.
// Each thread is responsible for reading 2 u16 values and packing them into 1 u32 value.
@compute @workgroup_size(workgroupSizeX, workgroupSizeY, 1) fn blit_depth_to_buffer(@builtin(global_invocation_id) id : vec3u) {
let srcBoundary = params.srcOrigin + params.srcExtent;
let coord0 = vec3u(id.x * 2, id.y, id.z) + params.srcOrigin;
if (any(coord0 >= srcBoundary)) {
return;
}
let v0: f32 = textureLoad(src_tex, coord0.xy, coord0.z, 0);
let r0: u32 = getUnorm16Bits(v0);
let dstOffset = params.indicesOffset + id.x + id.y * params.indicesPerRow + id.z * params.indicesPerRow * params.rowsPerImage;
var result: u32 = r0;
let coord1 = coord0 + vec3u(1, 0, 0);
if (coord1.x < srcBoundary.x) {
// Make sure coord1 is still within the copy boundary
// then read and write this value.
let v1: f32 = textureLoad(src_tex, coord1.xy, coord1.z, 0);
let r1: u32 = getUnorm16Bits(v1);
result += (r1 << 16);
} else {
// Otherwise, srcExtent.x is an odd number and this thread is at right edge of the texture
// To preserve the original buffer content, we need to read from the buffer and pack it
// together with r0 to avoid it being overwritten.
// TODO(dawn:1782): profiling against making a separate pass for this edge case
// as it require reading from dst_buf.
let original: u32 = dst_buf[dstOffset];
result += original & 0xffff0000;
}
dst_buf[dstOffset] = result;
}
)";
ResultOrError<Ref<ComputePipelineBase>> CreateDepthBlitComputePipeline(DeviceBase* device,
InternalPipelineStore* store,
wgpu::TextureFormat format) {
ShaderModuleWGSLDescriptor wgslDesc = {};
ShaderModuleDescriptor shaderModuleDesc = {};
shaderModuleDesc.nextInChain = &wgslDesc;
switch (format) {
case wgpu::TextureFormat::Depth16Unorm:
wgslDesc.source = kBlitDepth16UnormToBufferShaders;
break;
case wgpu::TextureFormat::Depth32Float:
wgslDesc.source = kBlitDepth32FloatToBufferShaders;
break;
default:
UNREACHABLE();
break;
}
Ref<ShaderModuleBase> shaderModule;
DAWN_TRY_ASSIGN(shaderModule, device->CreateShaderModule(&shaderModuleDesc));
Ref<BindGroupLayoutBase> bindGroupLayout;
DAWN_TRY_ASSIGN(bindGroupLayout,
utils::MakeBindGroupLayout(
device,
{
{0, wgpu::ShaderStage::Compute, wgpu::TextureSampleType::Depth,
wgpu::TextureViewDimension::e2DArray},
{1, wgpu::ShaderStage::Compute, kInternalStorageBufferBinding},
{2, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Uniform},
},
/* allowInternalBinding */ true));
Ref<PipelineLayoutBase> pipelineLayout;
DAWN_TRY_ASSIGN(pipelineLayout, utils::MakeBasicPipelineLayout(device, bindGroupLayout));
ComputePipelineDescriptor computePipelineDescriptor = {};
computePipelineDescriptor.layout = pipelineLayout.Get();
computePipelineDescriptor.compute.module = shaderModule.Get();
computePipelineDescriptor.compute.entryPoint = "blit_depth_to_buffer";
constexpr std::array<ConstantEntry, 2> constants = {{
{nullptr, "workgroupSizeX", kWorkgroupSizeX},
{nullptr, "workgroupSizeY", kWorkgroupSizeY},
}};
computePipelineDescriptor.compute.constantCount = constants.size();
computePipelineDescriptor.compute.constants = constants.data();
Ref<ComputePipelineBase> pipeline;
DAWN_TRY_ASSIGN(pipeline, device->CreateComputePipeline(&computePipelineDescriptor));
switch (format) {
case wgpu::TextureFormat::Depth16Unorm:
store->blitDepth16UnormToBufferComputePipeline = pipeline;
break;
case wgpu::TextureFormat::Depth32Float:
store->blitDepth32FloatToBufferComputePipeline = pipeline;
break;
default:
UNREACHABLE();
break;
}
return pipeline;
}
ResultOrError<Ref<ComputePipelineBase>> GetOrCreateDepth32FloatToBufferPipeline(
DeviceBase* device) {
InternalPipelineStore* store = device->GetInternalPipelineStore();
if (store->blitDepth32FloatToBufferComputePipeline != nullptr) {
return store->blitDepth32FloatToBufferComputePipeline;
}
Ref<ComputePipelineBase> pipeline;
DAWN_TRY_ASSIGN(
pipeline, CreateDepthBlitComputePipeline(device, store, wgpu::TextureFormat::Depth32Float));
return pipeline;
}
ResultOrError<Ref<ComputePipelineBase>> GetOrCreateDepth16UnormToBufferPipeline(
DeviceBase* device) {
InternalPipelineStore* store = device->GetInternalPipelineStore();
if (store->blitDepth16UnormToBufferComputePipeline != nullptr) {
return store->blitDepth16UnormToBufferComputePipeline;
}
Ref<ComputePipelineBase> pipeline;
DAWN_TRY_ASSIGN(
pipeline, CreateDepthBlitComputePipeline(device, store, wgpu::TextureFormat::Depth16Unorm));
return pipeline;
}
} // anonymous namespace
MaybeError BlitDepthToBuffer(DeviceBase* device,
CommandEncoder* commandEncoder,
const TextureCopy& src,
const BufferCopy& dst,
const Extent3D& copyExtent) {
const Format& format = src.texture->GetFormat();
Ref<BufferBase> destinationBuffer = dst.buffer;
bool useIntermediateCopyBuffer = false;
if (format.format == wgpu::TextureFormat::Depth16Unorm && dst.buffer->GetSize() % 4 != 0 &&
copyExtent.width % 2 != 0) {
// This path is made for OpenGL/GLES depth16unorm bliting a texture with an odd width,
// to a compact buffer. When we copy the last texel, we inevitably need to access an
// out of bounds location given by dst.buffer.size as we use array<u32> in the shader for
// the storage buffer. Although the allocated size of dst.buffer is aligned to 4 bytes for
// OpenGL/GLES backend, the size of the storage buffer binding for the shader is not. Thus
// we make an intermediate buffer aligned to 4 bytes for the compute shader to safely
// access, and perform an additional buffer to buffer copy at the end. This path should be
// hit rarely.
useIntermediateCopyBuffer = true;
BufferDescriptor descriptor = {};
descriptor.size = Align(dst.buffer->GetSize(), 4);
// TODO(dawn:1485): adding CopyDst usage to add kInternalStorageBuffer usage internally.
descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
DAWN_TRY_ASSIGN(destinationBuffer, device->CreateBuffer(&descriptor));
}
Ref<ComputePipelineBase> pipeline;
uint32_t workgroupCountX = 1;
uint32_t workgroupCountY = 1;
uint32_t workgroupCountZ = copyExtent.depthOrArrayLayers;
switch (format.format) {
case wgpu::TextureFormat::Depth16Unorm:
// One thread is responsible for writing two texel values (x, y) and (x+1, y).
workgroupCountX = (copyExtent.width + 2 * kWorkgroupSizeX - 1) / (2 * kWorkgroupSizeX);
workgroupCountY = (copyExtent.height + kWorkgroupSizeY - 1) / kWorkgroupSizeY;
DAWN_TRY_ASSIGN(pipeline, GetOrCreateDepth16UnormToBufferPipeline(device));
break;
case wgpu::TextureFormat::Depth32Float:
workgroupCountX = (copyExtent.width + kWorkgroupSizeX - 1) / kWorkgroupSizeX;
workgroupCountY = (copyExtent.height + kWorkgroupSizeY - 1) / kWorkgroupSizeY;
DAWN_TRY_ASSIGN(pipeline, GetOrCreateDepth32FloatToBufferPipeline(device));
break;
default:
// Other formats (e.g. Depth32FloatStencil8) are not supported on OpenGL/OpenGLES where
// we enabled this workaround. They only support Depth24PlusStencil8.
UNREACHABLE();
}
// Allow internal usages since we need to use the source as a texture binding
// and buffer as a storage binding.
auto scope = commandEncoder->MakeInternalUsageScope();
Ref<BindGroupLayoutBase> bindGroupLayout;
DAWN_TRY_ASSIGN(bindGroupLayout, pipeline->GetBindGroupLayout(0));
Ref<BufferBase> uniformBuffer;
{
BufferDescriptor bufferDesc = {};
// Uniform buffer size needs to be multiple of 16 bytes
bufferDesc.size = sizeof(uint32_t) * 12;
bufferDesc.usage = wgpu::BufferUsage::Uniform;
bufferDesc.mappedAtCreation = true;
DAWN_TRY_ASSIGN(uniformBuffer, device->CreateBuffer(&bufferDesc));
uint32_t* params =
static_cast<uint32_t*>(uniformBuffer->GetMappedRange(0, bufferDesc.size));
// srcOrigin: vec3u
params[0] = src.origin.x;
params[1] = src.origin.y;
// src.origin.z is set at textureView.baseArrayLayer
params[2] = 0;
// srcExtent: vec3u
params[4] = copyExtent.width;
params[5] = copyExtent.height;
params[6] = copyExtent.depthOrArrayLayers;
// Turn bytesPerRow, (bytes)offset to use array index as unit
// We use array<u32> for depth16unorm copy and array<f32> for depth32float copy
// Both array element sizes are 4 bytes.
params[8] = dst.bytesPerRow / 4;
params[9] = dst.rowsPerImage;
params[10] = dst.offset / 4;
DAWN_TRY(uniformBuffer->Unmap());
}
TextureViewDescriptor viewDesc = {};
viewDesc.aspect = wgpu::TextureAspect::DepthOnly;
viewDesc.dimension = wgpu::TextureViewDimension::e2DArray;
viewDesc.baseMipLevel = src.mipLevel;
viewDesc.mipLevelCount = 1;
viewDesc.baseArrayLayer = src.origin.z;
viewDesc.arrayLayerCount = copyExtent.depthOrArrayLayers;
Ref<TextureViewBase> srcView;
DAWN_TRY_ASSIGN(srcView, src.texture->CreateView(&viewDesc));
Ref<BindGroupBase> bindGroup;
DAWN_TRY_ASSIGN(bindGroup, utils::MakeBindGroup(device, bindGroupLayout,
{
{0, srcView},
{1, destinationBuffer},
{2, uniformBuffer},
},
UsageValidationMode::Internal));
Ref<ComputePassEncoder> pass = commandEncoder->BeginComputePass();
pass->APISetPipeline(pipeline.Get());
pass->APISetBindGroup(0, bindGroup.Get());
pass->APIDispatchWorkgroups(workgroupCountX, workgroupCountY, workgroupCountZ);
pass->APIEnd();
if (useIntermediateCopyBuffer) {
ASSERT(destinationBuffer->GetSize() <= dst.buffer->GetAllocatedSize());
commandEncoder->InternalCopyBufferToBufferWithAllocatedSize(
destinationBuffer.Get(), 0, dst.buffer.Get(), 0, destinationBuffer->GetSize());
}
return {};
}
} // namespace dawn::native

View File

@ -0,0 +1,38 @@
// Copyright 2023 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SRC_DAWN_NATIVE_BLITDEPTHSTENCILTOBUFFER_H_
#define SRC_DAWN_NATIVE_BLITDEPTHSTENCILTOBUFFER_H_
#include "dawn/native/Error.h"
namespace dawn::native {
struct TextureCopy;
struct BufferCopy;
// BlitDepthToBuffer works around OpenGL/GLES issues of copying depth textures to a buffer.
// Supported depth texture format: depth16unorm, depth32float
// It dispatches a compute shader textureLoad from the depth texture and writes to the buffer as a
// storage buffer.
MaybeError BlitDepthToBuffer(DeviceBase* device,
CommandEncoder* commandEncoder,
const TextureCopy& src,
const BufferCopy& dst,
const Extent3D& copyExtent);
} // namespace dawn::native
#endif // SRC_DAWN_NATIVE_BLITDEPTHSTENCILTOBUFFER_H_

View File

@ -172,6 +172,13 @@ BufferBase::BufferBase(DeviceBase* device, const BufferDescriptor* descriptor)
mUsage |= kInternalStorageBuffer; mUsage |= kInternalStorageBuffer;
} }
if (mUsage & wgpu::BufferUsage::CopyDst) {
if (device->IsToggleEnabled(Toggle::UseBlitForDepth16UnormTextureToBufferCopy) ||
device->IsToggleEnabled(Toggle::UseBlitForDepth32FloatTextureToBufferCopy)) {
mUsage |= kInternalStorageBuffer;
}
}
GetObjectTrackingList()->Track(this); GetObjectTrackingList()->Track(this);
} }

View File

@ -24,6 +24,7 @@
#include "dawn/native/Forward.h" #include "dawn/native/Forward.h"
#include "dawn/native/IntegerTypes.h" #include "dawn/native/IntegerTypes.h"
#include "dawn/native/ObjectBase.h" #include "dawn/native/ObjectBase.h"
#include "dawn/native/UsageValidationMode.h"
#include "dawn/native/dawn_platform.h" #include "dawn/native/dawn_platform.h"

View File

@ -49,6 +49,8 @@ target_sources(dawn_native PRIVATE
"BindingInfo.h" "BindingInfo.h"
"BlitBufferToDepthStencil.cpp" "BlitBufferToDepthStencil.cpp"
"BlitBufferToDepthStencil.h" "BlitBufferToDepthStencil.h"
"BlitDepthStencilToBuffer.cpp"
"BlitDepthStencilToBuffer.h"
"BlitDepthToDepth.cpp" "BlitDepthToDepth.cpp"
"BlitDepthToDepth.h" "BlitDepthToDepth.h"
"Blob.cpp" "Blob.cpp"

View File

@ -23,6 +23,7 @@
#include "dawn/native/ApplyClearColorValueWithDrawHelper.h" #include "dawn/native/ApplyClearColorValueWithDrawHelper.h"
#include "dawn/native/BindGroup.h" #include "dawn/native/BindGroup.h"
#include "dawn/native/BlitBufferToDepthStencil.h" #include "dawn/native/BlitBufferToDepthStencil.h"
#include "dawn/native/BlitDepthStencilToBuffer.h"
#include "dawn/native/BlitDepthToDepth.h" #include "dawn/native/BlitDepthToDepth.h"
#include "dawn/native/Buffer.h" #include "dawn/native/Buffer.h"
#include "dawn/native/ChainUtils_autogen.h" #include "dawn/native/ChainUtils_autogen.h"
@ -1159,6 +1160,55 @@ void CommandEncoder::APICopyBufferToBuffer(BufferBase* source,
destination, destinationOffset, size); destination, destinationOffset, size);
} }
// The internal version of APICopyBufferToBuffer which validates against mAllocatedSize instead of
// mSize of buffers.
void CommandEncoder::InternalCopyBufferToBufferWithAllocatedSize(BufferBase* source,
uint64_t sourceOffset,
BufferBase* destination,
uint64_t destinationOffset,
uint64_t size) {
mEncodingContext.TryEncode(
this,
[&](CommandAllocator* allocator) -> MaybeError {
if (GetDevice()->IsValidationEnabled()) {
DAWN_TRY(GetDevice()->ValidateObject(source));
DAWN_TRY(GetDevice()->ValidateObject(destination));
DAWN_INVALID_IF(source == destination,
"Source and destination are the same buffer (%s).", source);
DAWN_TRY_CONTEXT(ValidateCopySizeFitsInBuffer(source, sourceOffset, size,
BufferSizeType::AllocatedSize),
"validating source %s copy size against allocated size.", source);
DAWN_TRY_CONTEXT(ValidateCopySizeFitsInBuffer(destination, destinationOffset, size,
BufferSizeType::AllocatedSize),
"validating destination %s copy size against allocated size.",
destination);
DAWN_TRY(ValidateB2BCopyAlignment(size, sourceOffset, destinationOffset));
DAWN_TRY_CONTEXT(ValidateCanUseAs(source, wgpu::BufferUsage::CopySrc),
"validating source %s usage.", source);
DAWN_TRY_CONTEXT(ValidateCanUseAs(destination, wgpu::BufferUsage::CopyDst),
"validating destination %s usage.", destination);
}
mTopLevelBuffers.insert(source);
mTopLevelBuffers.insert(destination);
CopyBufferToBufferCmd* copy =
allocator->Allocate<CopyBufferToBufferCmd>(Command::CopyBufferToBuffer);
copy->source = source;
copy->sourceOffset = sourceOffset;
copy->destination = destination;
copy->destinationOffset = destinationOffset;
copy->size = size;
return {};
},
"encoding internal %s.CopyBufferToBuffer(%s, %u, %s, %u, %u).", this, source, sourceOffset,
destination, destinationOffset, size);
}
void CommandEncoder::APICopyBufferToTexture(const ImageCopyBuffer* source, void CommandEncoder::APICopyBufferToTexture(const ImageCopyBuffer* source,
const ImageCopyTexture* destination, const ImageCopyTexture* destination,
const Extent3D* copySize) { const Extent3D* copySize) {
@ -1286,6 +1336,35 @@ void CommandEncoder::APICopyTextureToBuffer(const ImageCopyTexture* source,
TextureDataLayout dstLayout = destination->layout; TextureDataLayout dstLayout = destination->layout;
ApplyDefaultTextureDataLayoutOptions(&dstLayout, blockInfo, *copySize); ApplyDefaultTextureDataLayoutOptions(&dstLayout, blockInfo, *copySize);
auto format = source->texture->GetFormat();
auto aspect = ConvertAspect(format, source->aspect);
if (aspect == Aspect::Depth) {
if ((format.format == wgpu::TextureFormat::Depth16Unorm &&
GetDevice()->IsToggleEnabled(
Toggle::UseBlitForDepth16UnormTextureToBufferCopy)) ||
(format.format == wgpu::TextureFormat::Depth32Float &&
GetDevice()->IsToggleEnabled(
Toggle::UseBlitForDepth32FloatTextureToBufferCopy))) {
TextureCopy src;
src.texture = source->texture;
src.origin = source->origin;
src.mipLevel = source->mipLevel;
src.aspect = aspect;
BufferCopy dst;
dst.buffer = destination->buffer;
dst.bytesPerRow = destination->layout.bytesPerRow;
dst.rowsPerImage = destination->layout.rowsPerImage;
dst.offset = destination->layout.offset;
DAWN_TRY_CONTEXT(BlitDepthToBuffer(GetDevice(), this, src, dst, *copySize),
"copying depth aspect from %s to %s using blit workaround.",
src.texture.Get(), destination->buffer);
return {};
}
}
// TODO(crbug.com/dawn/1782): implement emulation for stencil
CopyTextureToBufferCmd* t2b = CopyTextureToBufferCmd* t2b =
allocator->Allocate<CopyTextureToBufferCmd>(Command::CopyTextureToBuffer); allocator->Allocate<CopyTextureToBufferCmd>(Command::CopyTextureToBuffer);
t2b->source.texture = source->texture; t2b->source.texture = source->texture;

View File

@ -59,6 +59,11 @@ class CommandEncoder final : public ApiObjectBase {
BufferBase* destination, BufferBase* destination,
uint64_t destinationOffset, uint64_t destinationOffset,
uint64_t size); uint64_t size);
void InternalCopyBufferToBufferWithAllocatedSize(BufferBase* source,
uint64_t sourceOffset,
BufferBase* destination,
uint64_t destinationOffset,
uint64_t size);
void APICopyBufferToTexture(const ImageCopyBuffer* source, void APICopyBufferToTexture(const ImageCopyBuffer* source,
const ImageCopyTexture* destination, const ImageCopyTexture* destination,
const Extent3D* copySize); const Extent3D* copySize);

View File

@ -175,8 +175,17 @@ ResultOrError<uint64_t> ComputeRequiredBytesInCopy(const TexelBlockInfo& blockIn
MaybeError ValidateCopySizeFitsInBuffer(const Ref<BufferBase>& buffer, MaybeError ValidateCopySizeFitsInBuffer(const Ref<BufferBase>& buffer,
uint64_t offset, uint64_t offset,
uint64_t size) { uint64_t size,
uint64_t bufferSize = buffer->GetSize(); BufferSizeType checkBufferSizeType) {
uint64_t bufferSize = 0;
switch (checkBufferSizeType) {
case BufferSizeType::Size:
bufferSize = buffer->GetSize();
break;
case BufferSizeType::AllocatedSize:
bufferSize = buffer->GetAllocatedSize();
break;
}
bool fitsInBuffer = offset <= bufferSize && (size <= (bufferSize - offset)); bool fitsInBuffer = offset <= bufferSize && (size <= (bufferSize - offset));
DAWN_INVALID_IF(!fitsInBuffer, DAWN_INVALID_IF(!fitsInBuffer,
"Copy range (offset: %u, size: %u) does not fit in %s size (%u).", offset, size, "Copy range (offset: %u, size: %u) does not fit in %s size (%u).", offset, size,

View File

@ -27,6 +27,8 @@
namespace dawn::native { namespace dawn::native {
enum class BufferSizeType { Size, AllocatedSize };
class QuerySetBase; class QuerySetBase;
struct SyncScopeResourceUsage; struct SyncScopeResourceUsage;
struct TexelBlockInfo; struct TexelBlockInfo;
@ -76,7 +78,8 @@ MaybeError ValidateImageCopyTexture(DeviceBase const* device,
MaybeError ValidateCopySizeFitsInBuffer(const Ref<BufferBase>& buffer, MaybeError ValidateCopySizeFitsInBuffer(const Ref<BufferBase>& buffer,
uint64_t offset, uint64_t offset,
uint64_t size); uint64_t size,
BufferSizeType checkBufferSizeType = BufferSizeType::Size);
bool IsRangeOverlapped(uint32_t startA, uint32_t startB, uint32_t length); bool IsRangeOverlapped(uint32_t startA, uint32_t startB, uint32_t length);

View File

@ -60,6 +60,9 @@ struct InternalPipelineStore {
Ref<RenderPipelineBase> blitRG8ToDepth16UnormPipeline; Ref<RenderPipelineBase> blitRG8ToDepth16UnormPipeline;
Ref<ComputePipelineBase> blitDepth16UnormToBufferComputePipeline;
Ref<ComputePipelineBase> blitDepth32FloatToBufferComputePipeline;
struct BlitR8ToStencilPipelines { struct BlitR8ToStencilPipelines {
Ref<RenderPipelineBase> clearPipeline; Ref<RenderPipelineBase> clearPipeline;
std::array<Ref<RenderPipelineBase>, 8> setStencilPipelines; std::array<Ref<RenderPipelineBase>, 8> setStencilPipelines;

View File

@ -592,6 +592,13 @@ TextureBase::TextureBase(DeviceBase* device,
AddInternalUsage(wgpu::TextureUsage::RenderAttachment); AddInternalUsage(wgpu::TextureUsage::RenderAttachment);
} }
} }
if (mFormat.HasDepth() &&
(device->IsToggleEnabled(Toggle::UseBlitForDepth16UnormTextureToBufferCopy) ||
device->IsToggleEnabled(Toggle::UseBlitForDepth32FloatTextureToBufferCopy))) {
if (mInternalUsage & wgpu::TextureUsage::CopySrc) {
AddInternalUsage(wgpu::TextureUsage::TextureBinding);
}
}
} }
TextureBase::~TextureBase() = default; TextureBase::~TextureBase() = default;

View File

@ -386,6 +386,16 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
"Use a blit to copy from a depth texture to the nonzero subresource of a depth texture. " "Use a blit to copy from a depth texture to the nonzero subresource of a depth texture. "
"Works around an issue where nonzero layers are not written.", "Works around an issue where nonzero layers are not written.",
"https://crbug.com/dawn/1083", ToggleStage::Device}}, "https://crbug.com/dawn/1083", ToggleStage::Device}},
{Toggle::UseBlitForDepth16UnormTextureToBufferCopy,
{"use_blit_for_depth16unorm_texture_to_buffer_copy",
"Use a blit instead of a copy command to copy depth aspect of a texture to a buffer."
"Workaround for OpenGL and OpenGLES.",
"https://crbug.com/dawn/1782", ToggleStage::Device}},
{Toggle::UseBlitForDepth32FloatTextureToBufferCopy,
{"use_blit_for_depth32float_texture_to_buffer_copy",
"Use a blit instead of a copy command to copy depth aspect of a texture to a buffer."
"Workaround for OpenGLES.",
"https://crbug.com/dawn/1782", ToggleStage::Device}},
{Toggle::D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha, {Toggle::D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha,
{"d3d12_replace_add_with_minus_when_dst_factor_is_zero_and_src_factor_is_dst_alpha", {"d3d12_replace_add_with_minus_when_dst_factor_is_zero_and_src_factor_is_dst_alpha",
"Replace the blending operation 'Add' with 'Minus' when dstBlendFactor is 'Zero' and " "Replace the blending operation 'Add' with 'Minus' when dstBlendFactor is 'Zero' and "

View File

@ -92,6 +92,8 @@ enum class Toggle {
UseBlitForBufferToDepthTextureCopy, UseBlitForBufferToDepthTextureCopy,
UseBlitForBufferToStencilTextureCopy, UseBlitForBufferToStencilTextureCopy,
UseBlitForDepthTextureToTextureCopyToNonzeroSubresource, UseBlitForDepthTextureToTextureCopyToNonzeroSubresource,
UseBlitForDepth16UnormTextureToBufferCopy,
UseBlitForDepth32FloatTextureToBufferCopy,
D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha, D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha,
D3D12PolyfillReflectVec2F32, D3D12PolyfillReflectVec2F32,
VulkanClearGen12TextureWithCCSAmbiguateOnCreation, VulkanClearGen12TextureWithCCSAmbiguateOnCreation,

View File

@ -41,7 +41,9 @@ Buffer::Buffer(Device* device, const BufferDescriptor* descriptor)
: BufferBase(device, descriptor) { : BufferBase(device, descriptor) {
const OpenGLFunctions& gl = device->GetGL(); const OpenGLFunctions& gl = device->GetGL();
// Allocate at least 4 bytes so clamped accesses are always in bounds. // Allocate at least 4 bytes so clamped accesses are always in bounds.
mAllocatedSize = std::max(GetSize(), uint64_t(4u)); // Align with 4 byte to avoid out-of-bounds access issue in compute emulation for 2 byte
// element.
mAllocatedSize = Align(std::max(GetSize(), uint64_t(4u)), uint64_t(4u));
gl.GenBuffers(1, &mBuffer); gl.GenBuffers(1, &mBuffer);
gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer); gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer);

View File

@ -213,6 +213,19 @@ void PhysicalDevice::SetupBackendDeviceToggles(TogglesState* deviceToggles) cons
// For OpenGL ES, we must use a placeholder fragment shader for vertex-only render pipeline. // For OpenGL ES, we must use a placeholder fragment shader for vertex-only render pipeline.
deviceToggles->Default(Toggle::UsePlaceholderFragmentInVertexOnlyPipeline, deviceToggles->Default(Toggle::UsePlaceholderFragmentInVertexOnlyPipeline,
gl.GetVersion().IsES()); gl.GetVersion().IsES());
// For OpenGL/OpenGL ES, use compute shader blit to emulate depth16unorm texture to buffer
// copies.
// Disable Angle on windows as it seems to have side-effect.
#if DAWN_PLATFORM_IS(WINDOWS)
const bool kIsAngleOnWindows = mName.find("ANGLE") != std::string::npos;
#else
constexpr bool kIsAngleOnWindows = false;
#endif
deviceToggles->Default(Toggle::UseBlitForDepth16UnormTextureToBufferCopy, !kIsAngleOnWindows);
// For OpenGL ES, use compute shader blit to emulate depth32float texture to buffer copies.
deviceToggles->Default(Toggle::UseBlitForDepth32FloatTextureToBufferCopy,
gl.GetVersion().IsES() && !kIsAngleOnWindows);
} }
ResultOrError<Ref<DeviceBase>> PhysicalDevice::CreateDeviceImpl(AdapterBase* adapter, ResultOrError<Ref<DeviceBase>> PhysicalDevice::CreateDeviceImpl(AdapterBase* adapter,

View File

@ -41,8 +41,63 @@ constexpr std::array<wgpu::TextureFormat, 1> kValidDepthCopyFromBufferFormats =
wgpu::TextureFormat::Depth16Unorm, wgpu::TextureFormat::Depth16Unorm,
}; };
uint32_t GetBytesPerPixel(wgpu::TextureFormat format, wgpu::TextureAspect aspect) {
uint32_t bytesPerPixel = 0;
switch (format) {
case wgpu::TextureFormat::Depth24PlusStencil8: {
ASSERT(aspect == wgpu::TextureAspect::StencilOnly);
bytesPerPixel = 1;
break;
}
case wgpu::TextureFormat::Depth32FloatStencil8: {
switch (aspect) {
case wgpu::TextureAspect::DepthOnly:
bytesPerPixel = 4;
break;
case wgpu::TextureAspect::StencilOnly:
bytesPerPixel = 1;
break;
default:
UNREACHABLE();
break;
}
break;
}
default:
bytesPerPixel = utils::GetTexelBlockSizeInBytes(format);
break;
}
return bytesPerPixel;
}
// Bytes of unorm 16 of 0.23 is 0x3AE1, of which the 2 bytes are different.
// This helps better test unorm 16 compute emulation path.
constexpr float kInitDepth = 0.23f;
// Bytes of unorm 16 of 0.23 is 0xB0A3.
// Use a non-zero clear depth to better test unorm16 compute emulation path.
constexpr float kClearDepth = 0.69f;
class DepthStencilCopyTests : public DawnTestWithParams<DepthStencilCopyTestParams> { class DepthStencilCopyTests : public DawnTestWithParams<DepthStencilCopyTestParams> {
protected: protected:
void MapAsyncAndWait(const wgpu::Buffer& buffer,
wgpu::MapMode mode,
size_t offset,
size_t size) {
bool done = false;
buffer.MapAsync(
mode, offset, size,
[](WGPUBufferMapAsyncStatus status, void* userdata) {
ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
*static_cast<bool*>(userdata) = true;
},
&done);
while (!done) {
WaitABit();
}
}
void SetUp() override { void SetUp() override {
DawnTestWithParams<DepthStencilCopyTestParams>::SetUp(); DawnTestWithParams<DepthStencilCopyTestParams>::SetUp();
@ -91,9 +146,10 @@ class DepthStencilCopyTests : public DawnTestWithParams<DepthStencilCopyTestPara
wgpu::Texture CreateTexture(uint32_t width, wgpu::Texture CreateTexture(uint32_t width,
uint32_t height, uint32_t height,
wgpu::TextureUsage usage, wgpu::TextureUsage usage,
uint32_t mipLevelCount = 1) { uint32_t mipLevelCount = 1,
uint32_t arrayLayerCount = 1) {
wgpu::TextureDescriptor texDescriptor = {}; wgpu::TextureDescriptor texDescriptor = {};
texDescriptor.size = {width, height, 1}; texDescriptor.size = {width, height, arrayLayerCount};
texDescriptor.format = GetParam().mTextureFormat; texDescriptor.format = GetParam().mTextureFormat;
texDescriptor.usage = usage; texDescriptor.usage = usage;
texDescriptor.mipLevelCount = mipLevelCount; texDescriptor.mipLevelCount = mipLevelCount;
@ -133,7 +189,8 @@ class DepthStencilCopyTests : public DawnTestWithParams<DepthStencilCopyTestPara
uint8_t clearStencil, uint8_t clearStencil,
uint8_t regionStencil, uint8_t regionStencil,
uint32_t mipLevel = 0, uint32_t mipLevel = 0,
uint32_t arrayLayer = 0) { uint32_t arrayLayer = 0,
uint32_t arrayLayerCount = 1) {
wgpu::TextureFormat format = GetParam().mTextureFormat; wgpu::TextureFormat format = GetParam().mTextureFormat;
// Create the render pass used for the initialization. // Create the render pass used for the initialization.
utils::ComboRenderPipelineDescriptor renderPipelineDesc; utils::ComboRenderPipelineDescriptor renderPipelineDesc;
@ -160,26 +217,29 @@ class DepthStencilCopyTests : public DawnTestWithParams<DepthStencilCopyTestPara
} }
wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc); wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
for (uint32_t curArrayLayer = 0; curArrayLayer < arrayLayerCount; curArrayLayer++) {
// Build the render pass used for initialization. // Build the render pass used for initialization.
wgpu::TextureViewDescriptor viewDesc = {}; wgpu::TextureViewDescriptor viewDesc = {};
viewDesc.baseMipLevel = mipLevel; viewDesc.baseMipLevel = mipLevel;
viewDesc.mipLevelCount = 1; viewDesc.mipLevelCount = 1;
viewDesc.baseArrayLayer = arrayLayer; viewDesc.baseArrayLayer = arrayLayer + curArrayLayer;
viewDesc.arrayLayerCount = 1; viewDesc.arrayLayerCount = 1;
utils::ComboRenderPassDescriptor renderPassDesc({}, texture.CreateView(&viewDesc)); utils::ComboRenderPassDescriptor renderPassDesc({}, texture.CreateView(&viewDesc));
renderPassDesc.UnsetDepthStencilLoadStoreOpsForFormat(format); renderPassDesc.UnsetDepthStencilLoadStoreOpsForFormat(format);
// TODO(dawn:1782): use different clear values for each array layer.
renderPassDesc.cDepthStencilAttachmentInfo.depthClearValue = clearDepth; renderPassDesc.cDepthStencilAttachmentInfo.depthClearValue = clearDepth;
renderPassDesc.cDepthStencilAttachmentInfo.stencilClearValue = clearStencil; renderPassDesc.cDepthStencilAttachmentInfo.stencilClearValue = clearStencil;
// Draw the quad (two triangles) // Draw the quad (two triangles)
wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc); wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
pass.SetPipeline(pipeline); pass.SetPipeline(pipeline);
pass.SetStencilReference(regionStencil); pass.SetStencilReference(regionStencil);
pass.Draw(6); pass.Draw(6);
pass.End(); pass.End();
}
wgpu::CommandBuffer commands = commandEncoder.Finish(); wgpu::CommandBuffer commands = commandEncoder.Finish();
queue.Submit(1, &commands); queue.Submit(1, &commands);
@ -224,35 +284,19 @@ class DepthStencilCopyTests : public DawnTestWithParams<DepthStencilCopyTestPara
uint32_t height, uint32_t height,
uint32_t depth, uint32_t depth,
wgpu::TextureFormat format = wgpu::TextureFormat::RGBA8Unorm, wgpu::TextureFormat format = wgpu::TextureFormat::RGBA8Unorm,
wgpu::TextureAspect aspect = wgpu::TextureAspect::All) { wgpu::TextureAspect aspect = wgpu::TextureAspect::All,
uint32_t bytesPerPixel = 0; bool alignForMultipleOf4Bytes = true) {
switch (format) { uint32_t bytesPerPixel = GetBytesPerPixel(format, aspect);
case wgpu::TextureFormat::Depth24PlusStencil8: {
ASSERT(aspect == wgpu::TextureAspect::StencilOnly);
bytesPerPixel = 1;
break;
}
case wgpu::TextureFormat::Depth32FloatStencil8: {
switch (aspect) {
case wgpu::TextureAspect::DepthOnly:
bytesPerPixel = 4;
break;
case wgpu::TextureAspect::StencilOnly:
bytesPerPixel = 1;
break;
default:
UNREACHABLE();
break;
}
break;
}
default:
bytesPerPixel = utils::GetTexelBlockSizeInBytes(format);
break;
}
uint32_t bytesPerRow = Align(width * bytesPerPixel, kTextureBytesPerRowAlignment); uint32_t bytesPerRow = Align(width * bytesPerPixel, kTextureBytesPerRowAlignment);
return (bytesPerRow * (height - 1) + width * bytesPerPixel) * depth;
// Bytes per image before last array layer
uint32_t bytesPerImage = bytesPerRow * height;
uint32_t result =
bytesPerImage * (depth - 1) + (bytesPerRow * (height - 1) + width * bytesPerPixel);
return alignForMultipleOf4Bytes ? Align(result, uint64_t(4)) : result;
} }
wgpu::ShaderModule mVertexModule; wgpu::ShaderModule mVertexModule;
@ -447,70 +491,105 @@ class DepthCopyTests : public DepthStencilCopyTests {
float initDepth, float initDepth,
uint32_t textureWidth, uint32_t textureWidth,
uint32_t textureHeight, uint32_t textureHeight,
uint32_t textureArrayLayerCount,
uint32_t testLevel) { uint32_t testLevel) {
// TODO(crbug.com/dawn/1237): Depth16Unorm test failed on OpenGL and OpenGLES which says
// Invalid format and type combination in glReadPixels
DAWN_TEST_UNSUPPORTED_IF(GetParam().mTextureFormat == wgpu::TextureFormat::Depth16Unorm &&
(IsOpenGL() || IsOpenGLES()));
// TODO(crbug.com/dawn/1291): These tests are failing on GLES (both native and ANGLE)
// when using Tint/GLSL.
DAWN_TEST_UNSUPPORTED_IF(IsOpenGLES());
uint32_t mipLevelCount = testLevel + 1;
wgpu::Texture texture = CreateTexture(
textureWidth, textureHeight,
wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc, mipLevelCount);
InitializeDepthStencilTextureRegion(texture, 0.f, initDepth, 0, 0, testLevel);
uint32_t copyWidth = textureWidth >> testLevel; uint32_t copyWidth = textureWidth >> testLevel;
uint32_t copyHeight = textureHeight >> testLevel; uint32_t copyHeight = textureHeight >> testLevel;
wgpu::Extent3D copySize = {copyWidth, copyHeight, 1};
constexpr uint32_t kBytesPerRow = kTextureBytesPerRowAlignment;
wgpu::BufferDescriptor bufferDescriptor = {}; wgpu::BufferDescriptor bufferDescriptor = {};
bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
bufferDescriptor.size = bufferDescriptor.size =
bufferCopyOffset + BufferSizeForTextureCopy(copyWidth, copyHeight, 1, bufferCopyOffset +
GetParam().mTextureFormat, BufferSizeForTextureCopy(copyWidth, copyHeight, textureArrayLayerCount,
wgpu::TextureAspect::DepthOnly); GetParam().mTextureFormat, wgpu::TextureAspect::DepthOnly);
wgpu::Buffer destinationBuffer = device.CreateBuffer(&bufferDescriptor); wgpu::Buffer destinationBuffer = device.CreateBuffer(&bufferDescriptor);
DoCopyFromDepthTestWithBuffer(destinationBuffer, bufferCopyOffset, initDepth, textureWidth,
textureHeight, textureArrayLayerCount, testLevel, true);
}
void DoCopyFromDepthTestWithBuffer(wgpu::Buffer destinationBuffer,
uint32_t bufferCopyOffset,
float initDepth,
uint32_t textureWidth,
uint32_t textureHeight,
uint32_t textureArrayLayerCount,
uint32_t testLevel,
bool checkBufferContent) {
// TODO(crbug.com/dawn/1291): Compute emulation path fails for Angle on Windows.
DAWN_SUPPRESS_TEST_IF(IsANGLE() && IsWindows());
uint32_t mipLevelCount = testLevel + 1;
wgpu::Texture texture =
CreateTexture(textureWidth, textureHeight,
wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc,
mipLevelCount, textureArrayLayerCount);
// Initialize other mip levels with different init values for better testing
constexpr float garbageDepth = 0.123456789f;
ASSERT(initDepth != garbageDepth);
for (uint32_t level = 0; level < mipLevelCount; level++) {
float regionDepth = (level == testLevel) ? initDepth : garbageDepth;
InitializeDepthStencilTextureRegion(texture, kClearDepth, regionDepth, 0, 0, level, 0,
textureArrayLayerCount);
}
uint32_t copyWidth = textureWidth >> testLevel;
uint32_t copyHeight = textureHeight >> testLevel;
wgpu::Extent3D copySize = {copyWidth, copyHeight, textureArrayLayerCount};
wgpu::TextureFormat format = GetParam().mTextureFormat;
constexpr wgpu::TextureAspect aspect = wgpu::TextureAspect::DepthOnly;
uint32_t bytesPerPixel = GetBytesPerPixel(format, aspect);
uint32_t bytesPerRow = Align(copyWidth * bytesPerPixel, kTextureBytesPerRowAlignment);
uint32_t bytesPerImage = bytesPerRow * copyHeight;
wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ImageCopyTexture imageCopyTexture = utils::CreateImageCopyTexture( wgpu::ImageCopyTexture imageCopyTexture =
texture, testLevel, {0, 0, 0}, wgpu::TextureAspect::DepthOnly); utils::CreateImageCopyTexture(texture, testLevel, {0, 0, 0}, aspect);
wgpu::ImageCopyBuffer imageCopyBuffer = utils::CreateImageCopyBuffer( wgpu::ImageCopyBuffer imageCopyBuffer = utils::CreateImageCopyBuffer(
destinationBuffer, bufferCopyOffset, kBytesPerRow, copyHeight); destinationBuffer, bufferCopyOffset, bytesPerRow, copyHeight);
encoder.CopyTextureToBuffer(&imageCopyTexture, &imageCopyBuffer, &copySize); encoder.CopyTextureToBuffer(&imageCopyTexture, &imageCopyBuffer, &copySize);
wgpu::CommandBuffer commandBuffer = encoder.Finish(); wgpu::CommandBuffer commandBuffer = encoder.Finish();
queue.Submit(1, &commandBuffer); queue.Submit(1, &commandBuffer);
if (GetParam().mTextureFormat == wgpu::TextureFormat::Depth16Unorm) { if (checkBufferContent) {
// Expected data pattern is that initDepth value at bottom left corner, while other
// region is kClearDepth. Data of each layer is the same.
if (format == wgpu::TextureFormat::Depth16Unorm) {
uint16_t expected = FloatToUnorm<uint16_t>(initDepth); uint16_t expected = FloatToUnorm<uint16_t>(initDepth);
std::vector<uint16_t> expectedData = { uint16_t cleared = FloatToUnorm<uint16_t>(kClearDepth);
0, 0, 0, 0, // std::vector<uint16_t> expectedData(copyWidth * copyHeight, cleared);
0, 0, 0, 0, // for (uint32_t y = copyHeight / 2; y < copyHeight; y++) {
expected, expected, 0, 0, // auto rowStart = expectedData.data() + y * copyWidth;
expected, expected, 0, 0, // std::fill(rowStart, rowStart + copyWidth / 2, expected);
};
for (uint32_t y = 0; y < copyHeight; ++y) {
EXPECT_BUFFER_U16_RANGE_EQ(expectedData.data() + copyWidth * y, destinationBuffer,
bufferCopyOffset + y * kBytesPerRow, copyWidth);
} }
} else { for (uint32_t z = 0; z < textureArrayLayerCount; ++z) {
std::vector<float> expectedData = { uint32_t bufferOffsetPerArrayLayer = bytesPerImage * z;
0.0, 0.0, 0.0, 0.0, //
0.0, 0.0, 0.0, 0.0, //
initDepth, initDepth, 0.0, 0.0, //
initDepth, initDepth, 0.0, 0.0, //
};
for (uint32_t y = 0; y < copyHeight; ++y) { for (uint32_t y = 0; y < copyHeight; ++y) {
EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedData.data() + copyWidth * y, destinationBuffer, EXPECT_BUFFER_U16_RANGE_EQ(
bufferCopyOffset + y * kBytesPerRow, copyWidth); expectedData.data() + copyWidth * y, destinationBuffer,
bufferCopyOffset + bufferOffsetPerArrayLayer + y * bytesPerRow,
copyWidth);
}
}
} else {
std::vector<float> expectedData(copyWidth * copyHeight, kClearDepth);
for (uint32_t y = copyHeight / 2; y < copyHeight; y++) {
auto rowStart = expectedData.data() + y * copyWidth;
std::fill(rowStart, rowStart + copyWidth / 2, initDepth);
}
for (uint32_t z = 0; z < textureArrayLayerCount; ++z) {
uint32_t bufferOffsetPerArrayLayer = bytesPerImage * z;
for (uint32_t y = 0; y < copyHeight; ++y) {
EXPECT_BUFFER_FLOAT_RANGE_EQ(
expectedData.data() + copyWidth * y, destinationBuffer,
bufferCopyOffset + bufferOffsetPerArrayLayer + y * bytesPerRow,
copyWidth);
}
}
} }
} }
} }
@ -518,34 +597,185 @@ class DepthCopyTests : public DepthStencilCopyTests {
// Test copying the depth-only aspect into a buffer. // Test copying the depth-only aspect into a buffer.
TEST_P(DepthCopyTests, FromDepthAspect) { TEST_P(DepthCopyTests, FromDepthAspect) {
constexpr float kInitDepth = 0.2f;
constexpr uint32_t kBufferCopyOffset = 0; constexpr uint32_t kBufferCopyOffset = 0;
constexpr uint32_t kWidth = 4;
constexpr uint32_t kHeight = 4;
constexpr uint32_t kTestLevel = 0; constexpr uint32_t kTestLevel = 0;
DoCopyFromDepthTest(kBufferCopyOffset, kInitDepth, kWidth, kHeight, kTestLevel); constexpr uint32_t kTestTextureSizes[][2] = {
// Original test parameter
{4, 4},
// Only 1 pixel at bottom left has value, test compute emulation path for unorm 16
{2, 2},
// Odd number texture width to test compute emulation path for unorm 16
{3, 3},
// float 32 needs bytesPerRow alignment
{65, 1},
// unorm 16 and float 32 need bytesPerRow alignment
{129, 1},
};
constexpr uint32_t kTestTextureArrayLayerCounts[] = {
1,
2,
};
for (const uint32_t sizeZ : kTestTextureArrayLayerCounts) {
for (const auto& size : kTestTextureSizes) {
DoCopyFromDepthTest(kBufferCopyOffset, kInitDepth, size[0], size[1], sizeZ, kTestLevel);
}
}
} }
// Test copying the depth-only aspect into a buffer at a non-zero offset. // Test copying the depth-only aspect into a buffer at a non-zero offset.
TEST_P(DepthCopyTests, FromDepthAspectToBufferAtNonZeroOffset) { TEST_P(DepthCopyTests, FromDepthAspectToBufferAtNonZeroOffset) {
constexpr float kInitDepth = 0.2f;
constexpr uint32_t kWidth = 4;
constexpr uint32_t kHeight = 4;
constexpr uint32_t kTestLevel = 0; constexpr uint32_t kTestLevel = 0;
constexpr std::array<uint32_t, 2> kBufferCopyOffsets = {4u, 512u}; constexpr uint32_t kBufferCopyOffsets[] = {4u, 512u};
constexpr uint32_t kTestTextureSizes[][2] = {
// Original test parameter
{4, 4},
// Only 1 pixel at bottom left has value, test compute emulation path for unorm 16
{2, 2},
// Odd number texture width to test compute emulation path for unorm 16
{3, 3},
// float 32 needs bytesPerRow alignment
{65, 1},
// unorm 16 and float 32 need bytesPerRow alignment
{129, 1},
};
constexpr uint32_t kTestTextureArrayLayerCounts[] = {
1,
2,
};
for (const uint32_t sizeZ : kTestTextureArrayLayerCounts) {
for (uint32_t offset : kBufferCopyOffsets) { for (uint32_t offset : kBufferCopyOffsets) {
DoCopyFromDepthTest(offset, kInitDepth, kWidth, kHeight, kTestLevel); for (const auto& size : kTestTextureSizes) {
DoCopyFromDepthTest(offset, kInitDepth, size[0], size[1], sizeZ, kTestLevel);
}
}
} }
} }
// Test copying the non-zero mip, depth-only aspect into a buffer. // Test copying the non-zero mip, depth-only aspect into a buffer.
TEST_P(DepthCopyTests, FromNonZeroMipDepthAspect) { TEST_P(DepthCopyTests, FromNonZeroMipDepthAspect) {
constexpr float kInitDepth = 0.2f;
constexpr uint32_t kBufferCopyOffset = 0; constexpr uint32_t kBufferCopyOffset = 0;
constexpr uint32_t kWidth = 9; constexpr uint32_t kWidth = 9;
constexpr uint32_t kHeight = 9; constexpr uint32_t kHeight = 9;
constexpr uint32_t kTestLevel = 1; DoCopyFromDepthTest(kBufferCopyOffset, kInitDepth, kWidth, kHeight, 1, 1);
DoCopyFromDepthTest(kBufferCopyOffset, kInitDepth, kWidth, kHeight, kTestLevel); DoCopyFromDepthTest(kBufferCopyOffset, kInitDepth, kWidth, kHeight, 2, 2);
}
// Test buffer content outside of copy extent is preserved.
// This test is made specifially for compute blit for depth16unorm emulation path.
// The texel size is 2 byte, while in the compute shader we have to write 4 byte at a time.
// When the copy extent width is an odd number, buffer content outside of the copy range is
// inevitably written. So we need to make sure the original content of the buffer that's outside of
// the copy extent is still correctly preserved.
TEST_P(DepthCopyTests, PreserveBufferContent) {
constexpr uint32_t kBufferCopyOffsets[] = {0u, 4u, 512u};
constexpr uint32_t kTestTextureSizes[][2] = {
{1, 1},
{1, 2},
{3, 3},
};
constexpr uint32_t kExtraBufferSize[] = {0u, 4u};
const uint32_t kSizeZ = 1;
constexpr uint32_t kTestLevel = 0;
wgpu::TextureFormat format = GetParam().mTextureFormat;
constexpr wgpu::TextureAspect aspect = wgpu::TextureAspect::DepthOnly;
for (uint32_t extraBufferSize : kExtraBufferSize) {
for (uint32_t offset : kBufferCopyOffsets) {
for (const auto& size : kTestTextureSizes) {
wgpu::BufferDescriptor bufferDescriptor = {};
// Add wgpu::BufferUsage::MapRead to check the buffer content with mapAsync
bufferDescriptor.usage = wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead;
bufferDescriptor.size =
extraBufferSize + offset +
BufferSizeForTextureCopy(size[0], size[1], kSizeZ, format, aspect);
bufferDescriptor.mappedAtCreation = true;
wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor);
constexpr uint8_t kOriginalValue = 0xff;
{
// Fill the buffer with an original value other than 0 to check they are
// incorrectly overwritten outside of the copy range.
uint8_t* ptr = static_cast<uint8_t*>(buffer.GetMappedRange());
std::fill(ptr, ptr + bufferDescriptor.size, kOriginalValue);
buffer.Unmap();
}
// Don't check copy region content because the buffer doesn't have
// wgpu::BufferUsage::CopySrc usage.
DoCopyFromDepthTestWithBuffer(buffer, offset, kInitDepth, size[0], size[1], kSizeZ,
kTestLevel, false);
std::vector<uint8_t> expected(bufferDescriptor.size, kOriginalValue);
// Get the offset of the end of the copy range (without aligning with 4 bytes)
uint32_t bufferEndOffset =
offset +
BufferSizeForTextureCopy(size[0], size[1], kSizeZ, format, aspect, false);
if (bufferDescriptor.size > bufferEndOffset) {
// Cannot use EXPECT_BUFFER_* helper here because it needs to align the copy
// size to a multiple of 4 bytes to call CopyBufferToBuffer. We are checking
// against format like Depth16Unorm so we need offset to be multiple of 2.
MapAsyncAndWait(buffer, wgpu::MapMode::Read, 0, wgpu::kWholeMapSize);
const uint8_t* ptr = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
// Check the content before copy range.
for (uint32_t i = 0; i < offset; i++) {
EXPECT_EQ(ptr[i], kOriginalValue);
}
// Check the content after copy range.
uint32_t checkSize = bufferDescriptor.size - bufferEndOffset;
for (uint32_t i = 0; i < checkSize; i++) {
EXPECT_EQ(ptr[bufferEndOffset + i], kOriginalValue);
}
buffer.Unmap();
}
}
}
}
}
// Test compact buffer size edge case.
// This test is made specifially for compute blit for depth16unorm emulation path.
// When format is depth16unorm and width is an odd number, the size of the most compact buffer copy
// target can be something that's not a multiple of 4. We need to make sure access don't go out of
// bounds in the shader, when still writing to array<u32> in the compute shader.
TEST_P(DepthCopyTests, BufferCopySizeEdgeCase) {
constexpr uint32_t kBufferCopyOffsets[] = {0u, 4u, 512u};
constexpr uint32_t kTestTextureSizes[][2] = {
// Cannot create compact copy buffer for {1, 1} here as storage buffer binding requires size
// of at least 4 bytes.
{3, 1},
{1, 2},
{3, 3},
};
const uint32_t kSizeZ = 1;
constexpr uint32_t kTestLevel = 0;
wgpu::TextureFormat format = GetParam().mTextureFormat;
constexpr wgpu::TextureAspect aspect = wgpu::TextureAspect::DepthOnly;
for (uint32_t offset : kBufferCopyOffsets) {
for (const auto& size : kTestTextureSizes) {
wgpu::BufferDescriptor bufferDescriptor = {};
// Add wgpu::BufferUsage::MapRead to check the buffer content with mapAsync
bufferDescriptor.usage = wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead;
bufferDescriptor.size =
offset +
// Don't align for 4 bytes to get smallest possible buffer for depth16unorm.
BufferSizeForTextureCopy(size[0], size[1], kSizeZ, format, aspect, false);
wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor);
// Don't check copy region content because the buffer doesn't have
// wgpu::BufferUsage::CopySrc usage.
DoCopyFromDepthTestWithBuffer(buffer, offset, kInitDepth, size[0], size[1], kSizeZ,
kTestLevel, false);
// Unable to check the result since either MapAsync and CopyBufferToBuffer requires size
// to be multiple of 4 bytes.
// Just run and don't crash on ASSERT.
}
}
} }
class DepthCopyFromBufferTests : public DepthStencilCopyTests { class DepthCopyFromBufferTests : public DepthStencilCopyTests {
@ -582,8 +812,6 @@ class DepthCopyFromBufferTests : public DepthStencilCopyTests {
destTexture, 0, {0, 0, 0}, wgpu::TextureAspect::DepthOnly); destTexture, 0, {0, 0, 0}, wgpu::TextureAspect::DepthOnly);
wgpu::Extent3D extent = {kWidth, kHeight, 1}; wgpu::Extent3D extent = {kWidth, kHeight, 1};
constexpr float kInitDepth = 0.2f;
// This expectation is the test as it performs the CopyTextureToBuffer. // This expectation is the test as it performs the CopyTextureToBuffer.
if (GetParam().mTextureFormat == wgpu::TextureFormat::Depth16Unorm) { if (GetParam().mTextureFormat == wgpu::TextureFormat::Depth16Unorm) {
uint16_t expected = FloatToUnorm<uint16_t>(kInitDepth); uint16_t expected = FloatToUnorm<uint16_t>(kInitDepth);