diff --git a/src/dawn_native/CommandBuffer.cpp b/src/dawn_native/CommandBuffer.cpp index 19b80103f9..b6ba4f05f3 100644 --- a/src/dawn_native/CommandBuffer.cpp +++ b/src/dawn_native/CommandBuffer.cpp @@ -17,6 +17,7 @@ #include "common/BitSetIterator.h" #include "dawn_native/Buffer.h" #include "dawn_native/CommandEncoder.h" +#include "dawn_native/CommandValidation.h" #include "dawn_native/Commands.h" #include "dawn_native/Format.h" #include "dawn_native/Texture.h" @@ -171,27 +172,37 @@ namespace dawn_native { ASSERT(copy != nullptr); if (copy->destination.offset > 0) { + // The copy doesn't touch the start of the buffer. return false; } const TextureBase* texture = copy->source.texture.Get(); const TexelBlockInfo& blockInfo = texture->GetFormat().GetAspectInfo(copy->source.aspect).block; + const uint64_t widthInBlocks = copy->copySize.width / blockInfo.width; const uint64_t heightInBlocks = copy->copySize.height / blockInfo.height; + const bool multiSlice = copy->copySize.depthOrArrayLayers > 1; + const bool multiRow = multiSlice || heightInBlocks > 1; - if (copy->destination.rowsPerImage > heightInBlocks) { + if (multiSlice && copy->destination.rowsPerImage > heightInBlocks) { + // There are gaps between slices that aren't overwritten return false; } - const uint64_t copyTextureDataSizePerRow = - copy->copySize.width / blockInfo.width * blockInfo.byteSize; - if (copy->destination.bytesPerRow > copyTextureDataSizePerRow) { + const uint64_t copyTextureDataSizePerRow = widthInBlocks * blockInfo.byteSize; + if (multiRow && copy->destination.bytesPerRow > copyTextureDataSizePerRow) { + // There are gaps between rows that aren't overwritten return false; } - const uint64_t overwrittenRangeSize = - copyTextureDataSizePerRow * heightInBlocks * copy->copySize.depthOrArrayLayers; - if (copy->destination.buffer->GetSize() > overwrittenRangeSize) { + // After the above checks, we're sure the copy has no gaps. + // Now, compute the total number of bytes written. + const uint64_t writtenBytes = + ComputeRequiredBytesInCopy(blockInfo, copy->copySize, copy->destination.bytesPerRow, + copy->destination.rowsPerImage) + .AcquireSuccess(); + if (!copy->destination.buffer->IsFullBufferRange(copy->destination.offset, writtenBytes)) { + // The written bytes don't cover the whole buffer. return false; } diff --git a/src/dawn_native/d3d12/BufferD3D12.cpp b/src/dawn_native/d3d12/BufferD3D12.cpp index 97a90bed19..399a02c39a 100644 --- a/src/dawn_native/d3d12/BufferD3D12.cpp +++ b/src/dawn_native/d3d12/BufferD3D12.cpp @@ -168,6 +168,21 @@ namespace dawn_native { namespace d3d12 { DAWN_TRY(ClearBuffer(commandRecordingContext, uint8_t(1u))); } + // Initialize the padding bytes to zero. + if (GetDevice()->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) && + !mappedAtCreation) { + uint32_t paddingBytes = GetAllocatedSize() - GetSize(); + if (paddingBytes > 0) { + CommandRecordingContext* commandRecordingContext; + DAWN_TRY_ASSIGN(commandRecordingContext, + ToBackend(GetDevice())->GetPendingCommandContext()); + + uint32_t clearSize = paddingBytes; + uint64_t clearOffset = GetSize(); + DAWN_TRY(ClearBuffer(commandRecordingContext, 0, clearOffset, clearSize)); + } + } + return {}; } @@ -444,29 +459,33 @@ namespace dawn_native { namespace d3d12 { return {}; } - MaybeError Buffer::ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue) { + MaybeError Buffer::ClearBuffer(CommandRecordingContext* commandContext, + uint8_t clearValue, + uint64_t offset, + uint64_t size) { Device* device = ToBackend(GetDevice()); + size = size > 0 ? size : GetAllocatedSize(); // The state of the buffers on UPLOAD heap must always be GENERIC_READ and cannot be // changed away, so we can only clear such buffer with buffer mapping. if (D3D12HeapType(GetUsage()) == D3D12_HEAP_TYPE_UPLOAD) { - DAWN_TRY(MapInternal(true, 0, size_t(GetAllocatedSize()), "D3D12 map at clear buffer")); - memset(mMappedData, clearValue, GetAllocatedSize()); + DAWN_TRY(MapInternal(true, static_cast(offset), static_cast(size), + "D3D12 map at clear buffer")); + memset(mMappedData, clearValue, size); UnmapImpl(); } else { // TODO(crbug.com/dawn/852): use ClearUnorderedAccessView*() when the buffer usage // includes STORAGE. DynamicUploader* uploader = device->GetDynamicUploader(); UploadHandle uploadHandle; - DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(GetAllocatedSize(), - device->GetPendingCommandSerial(), - kCopyBufferToBufferOffsetAlignment)); + DAWN_TRY_ASSIGN(uploadHandle, + uploader->Allocate(size, device->GetPendingCommandSerial(), + kCopyBufferToBufferOffsetAlignment)); - memset(uploadHandle.mappedBuffer, clearValue, GetAllocatedSize()); + memset(uploadHandle.mappedBuffer, clearValue, size); device->CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer, - uploadHandle.startOffset, this, 0, - GetAllocatedSize()); + uploadHandle.startOffset, this, offset, size); } return {}; diff --git a/src/dawn_native/d3d12/BufferD3D12.h b/src/dawn_native/d3d12/BufferD3D12.h index 4cd3d512ca..ed19efac67 100644 --- a/src/dawn_native/d3d12/BufferD3D12.h +++ b/src/dawn_native/d3d12/BufferD3D12.h @@ -68,7 +68,10 @@ namespace dawn_native { namespace d3d12 { wgpu::BufferUsage newUsage); MaybeError InitializeToZero(CommandRecordingContext* commandContext); - MaybeError ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue); + MaybeError ClearBuffer(CommandRecordingContext* commandContext, + uint8_t clearValue, + uint64_t offset = 0, + uint64_t size = 0); ResourceHeapAllocation mResourceAllocation; bool mFixedResourceState = false; diff --git a/src/dawn_native/metal/BufferMTL.h b/src/dawn_native/metal/BufferMTL.h index 409b89b5ab..0c7c5e28b4 100644 --- a/src/dawn_native/metal/BufferMTL.h +++ b/src/dawn_native/metal/BufferMTL.h @@ -52,7 +52,10 @@ namespace dawn_native { namespace metal { MaybeError MapAtCreationImpl() override; void InitializeToZero(CommandRecordingContext* commandContext); - void ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue); + void ClearBuffer(CommandRecordingContext* commandContext, + uint8_t clearValue, + uint64_t offset = 0, + uint64_t size = 0); NSPRef> mMtlBuffer; }; diff --git a/src/dawn_native/metal/BufferMTL.mm b/src/dawn_native/metal/BufferMTL.mm index 237fcd01f6..c2c5a313ad 100644 --- a/src/dawn_native/metal/BufferMTL.mm +++ b/src/dawn_native/metal/BufferMTL.mm @@ -45,10 +45,6 @@ namespace dawn_native { namespace metal { storageMode = MTLResourceStorageModePrivate; } - if (GetSize() > std::numeric_limits::max()) { - return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); - } - uint32_t alignment = 1; #ifdef DAWN_PLATFORM_MACOS // [MTLBlitCommandEncoder fillBuffer] requires the size to be a multiple of 4 on MacOS. @@ -64,13 +60,25 @@ namespace dawn_native { namespace metal { alignment = kMinUniformOrStorageBufferAlignment; } - // Allocate at least 4 bytes so clamped accesses are always in bounds. - NSUInteger currentSize = static_cast(std::max(GetSize(), uint64_t(4u))); + // The vertex pulling transform requires at least 4 bytes in the buffer. + // 0-sized vertex buffer bindings are allowed, so we always need an additional 4 bytes + // after the end. + NSUInteger extraBytes = 0u; + if ((GetUsage() & wgpu::BufferUsage::Vertex) != 0) { + extraBytes = 4u; + } + + if (GetSize() > std::numeric_limits::max() - extraBytes) { + return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); + } + NSUInteger currentSize = + std::max(static_cast(GetSize()) + extraBytes, NSUInteger(4)); + if (currentSize > std::numeric_limits::max() - alignment) { // Alignment would overlow. return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); } - currentSize = Align(currentSize, kMinUniformOrStorageBufferAlignment); + currentSize = Align(currentSize, alignment); if (@available(iOS 12, macOS 10.14, *)) { NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength]; @@ -109,6 +117,19 @@ namespace dawn_native { namespace metal { ClearBuffer(commandContext, uint8_t(1u)); } + // Initialize the padding bytes to zero. + if (GetDevice()->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) && + !mappedAtCreation) { + uint32_t paddingBytes = GetAllocatedSize() - GetSize(); + if (paddingBytes > 0) { + uint32_t clearSize = Align(paddingBytes, 4); + uint64_t clearOffset = GetAllocatedSize() - clearSize; + + CommandRecordingContext* commandContext = + ToBackend(GetDevice())->GetPendingCommandContext(); + ClearBuffer(commandContext, 0, clearOffset, clearSize); + } + } return {}; } @@ -197,11 +218,15 @@ namespace dawn_native { namespace metal { GetDevice()->IncrementLazyClearCountForTesting(); } - void Buffer::ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue) { + void Buffer::ClearBuffer(CommandRecordingContext* commandContext, + uint8_t clearValue, + uint64_t offset, + uint64_t size) { ASSERT(commandContext != nullptr); - ASSERT(GetAllocatedSize() > 0); + size = size > 0 ? size : GetAllocatedSize(); + ASSERT(size > 0); [commandContext->EnsureBlit() fillBuffer:mMtlBuffer.Get() - range:NSMakeRange(0, GetAllocatedSize()) + range:NSMakeRange(offset, size) value:clearValue]; } diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm index e49ab1052f..f41cff71ee 100644 --- a/src/dawn_native/metal/CommandBufferMTL.mm +++ b/src/dawn_native/metal/CommandBufferMTL.mm @@ -548,7 +548,8 @@ namespace dawn_native { namespace metal { mVertexBufferOffsets[slot] = offset; ASSERT(buffer->GetSize() < std::numeric_limits::max()); - mVertexBufferBindingSizes[slot] = static_cast(buffer->GetSize() - offset); + mVertexBufferBindingSizes[slot] = + static_cast(buffer->GetAllocatedSize() - offset); mDirtyVertexBuffers.set(slot); } diff --git a/src/dawn_native/opengl/BufferGL.cpp b/src/dawn_native/opengl/BufferGL.cpp index dd90fccb08..1e102d63c5 100644 --- a/src/dawn_native/opengl/BufferGL.cpp +++ b/src/dawn_native/opengl/BufferGL.cpp @@ -49,6 +49,7 @@ namespace dawn_native { namespace opengl { device->gl.BufferData(GL_ARRAY_BUFFER, mAllocatedSize, clearValues.data(), GL_STATIC_DRAW); } else { + // Buffers start zeroed if you pass nullptr to glBufferData. device->gl.BufferData(GL_ARRAY_BUFFER, mAllocatedSize, nullptr, GL_STATIC_DRAW); } } diff --git a/src/dawn_native/vulkan/BufferVk.cpp b/src/dawn_native/vulkan/BufferVk.cpp index 7b796f96a7..8139b29062 100644 --- a/src/dawn_native/vulkan/BufferVk.cpp +++ b/src/dawn_native/vulkan/BufferVk.cpp @@ -137,16 +137,34 @@ namespace dawn_native { namespace vulkan { } MaybeError Buffer::Initialize(bool mappedAtCreation) { + // vkCmdFillBuffer requires the size to be a multiple of 4. + constexpr size_t kAlignment = 4u; + + uint32_t extraBytes = 0u; + if (GetUsage() & (wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Index)) { + // vkCmdSetIndexBuffer and vkCmdSetVertexBuffer are invalid if the offset + // is equal to the whole buffer size. Allocate at least one more byte so it + // is valid to setVertex/IndexBuffer with a zero-sized range at the end + // of the buffer with (offset=buffer.size, size=0). + extraBytes = 1u; + } + + uint64_t size = GetSize(); + if (size > std::numeric_limits::max() - extraBytes) { + return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); + } + + size += extraBytes; + // Allocate at least 4 bytes so clamped accesses are always in bounds. // Also, Vulkan requires the size to be non-zero. - uint64_t size = std::max(GetSize(), uint64_t(4u)); - // vkCmdFillBuffer requires the size to be a multiple of 4. - size_t alignment = 4u; - if (size > std::numeric_limits::max() - alignment) { + size = std::max(size, uint64_t(4u)); + + if (size > std::numeric_limits::max() - kAlignment) { // Alignment would overlow. return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); } - mAllocatedSize = Align(size, alignment); + mAllocatedSize = Align(size, kAlignment); // Avoid passing ludicrously large sizes to drivers because it causes issues: drivers add // some constants to the size passed and align it, but for values close to the maximum @@ -200,6 +218,17 @@ namespace dawn_native { namespace vulkan { ClearBuffer(device->GetPendingRecordingContext(), 0x01010101); } + // Initialize the padding bytes to zero. + if (device->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) && !mappedAtCreation) { + uint32_t paddingBytes = GetAllocatedSize() - GetSize(); + if (paddingBytes > 0) { + uint32_t clearSize = Align(paddingBytes, 4); + uint64_t clearOffset = GetAllocatedSize() - clearSize; + + CommandRecordingContext* recordingContext = device->GetPendingRecordingContext(); + ClearBuffer(recordingContext, 0, clearOffset, clearSize); + } + } return {}; } @@ -354,17 +383,21 @@ namespace dawn_native { namespace vulkan { SetIsDataInitialized(); } - void Buffer::ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue) { + void Buffer::ClearBuffer(CommandRecordingContext* recordingContext, + uint32_t clearValue, + uint64_t offset, + uint64_t size) { ASSERT(recordingContext != nullptr); - ASSERT(GetAllocatedSize() > 0); + size = size > 0 ? size : GetAllocatedSize(); + ASSERT(size > 0); TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); Device* device = ToBackend(GetDevice()); // VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it. // Note: Allocated size must be a multiple of 4. - ASSERT(GetAllocatedSize() % 4 == 0); - device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, 0, GetAllocatedSize(), + ASSERT(size % 4 == 0); + device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, offset, size, clearValue); } }} // namespace dawn_native::vulkan diff --git a/src/dawn_native/vulkan/BufferVk.h b/src/dawn_native/vulkan/BufferVk.h index e9a8951e30..1c40140f12 100644 --- a/src/dawn_native/vulkan/BufferVk.h +++ b/src/dawn_native/vulkan/BufferVk.h @@ -55,7 +55,10 @@ namespace dawn_native { namespace vulkan { MaybeError Initialize(bool mappedAtCreation); void InitializeToZero(CommandRecordingContext* recordingContext); - void ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue); + void ClearBuffer(CommandRecordingContext* recordingContext, + uint32_t clearValue, + uint64_t offset = 0, + uint64_t size = 0); MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override; void UnmapImpl() override; diff --git a/src/tests/end2end/BufferZeroInitTests.cpp b/src/tests/end2end/BufferZeroInitTests.cpp index 57be4d566a..04c961b839 100644 --- a/src/tests/end2end/BufferZeroInitTests.cpp +++ b/src/tests/end2end/BufferZeroInitTests.cpp @@ -14,6 +14,7 @@ #include "tests/DawnTest.h" +#include "common/Math.h" #include "utils/ComboRenderPipelineDescriptor.h" #include "utils/TestUtils.h" #include "utils/WGPUHelpers.h" @@ -201,8 +202,10 @@ class BufferZeroInitTest : public DawnTest { EXPECT_PIXEL_RGBA8_EQ(kExpectedColor, outputTexture, 0u, 0u); } - wgpu::RenderPipeline CreateRenderPipelineForTest(const char* vertexShader, - uint32_t vertexBufferCount = 1u) { + wgpu::RenderPipeline CreateRenderPipelineForTest( + const char* vertexShader, + uint32_t vertexBufferCount = 1u, + wgpu::VertexFormat vertexFormat = wgpu::VertexFormat::Float32x4) { constexpr wgpu::TextureFormat kColorAttachmentFormat = wgpu::TextureFormat::RGBA8Unorm; wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, vertexShader); @@ -219,9 +222,9 @@ class BufferZeroInitTest : public DawnTest { descriptor.cFragment.module = fsModule; descriptor.primitive.topology = wgpu::PrimitiveTopology::PointList; descriptor.vertex.bufferCount = vertexBufferCount; - descriptor.cBuffers[0].arrayStride = 4 * sizeof(float); + descriptor.cBuffers[0].arrayStride = Align(utils::VertexFormatSize(vertexFormat), 4); descriptor.cBuffers[0].attributeCount = 1; - descriptor.cAttributes[0].format = wgpu::VertexFormat::Float32x4; + descriptor.cAttributes[0].format = vertexFormat; descriptor.cTargets[0].format = kColorAttachmentFormat; return device.CreateRenderPipeline(&descriptor); } @@ -1128,6 +1131,114 @@ TEST_P(BufferZeroInitTest, SetVertexBuffer) { } } +// Test for crbug.com/dawn/837. +// Test that the padding after a buffer allocation is initialized to 0. +// This test makes an unaligned vertex buffer which should be padded in the backend +// allocation. It then tries to index off the end of the vertex buffer in an indexed +// draw call. A backend which implements robust buffer access via clamping should +// still see zeros at the end of the buffer. +TEST_P(BufferZeroInitTest, PaddingInitialized) { + DAWN_SUPPRESS_TEST_IF(IsANGLE()); // TODO(crbug.com/dawn/1084). + + constexpr wgpu::TextureFormat kColorAttachmentFormat = wgpu::TextureFormat::RGBA8Unorm; + // A small sub-4-byte format means a single vertex can fit entirely within the padded buffer, + // touching some of the padding. Test a small format, as well as larger formats. + for (wgpu::VertexFormat vertexFormat : + {wgpu::VertexFormat::Unorm8x2, wgpu::VertexFormat::Float16x2, + wgpu::VertexFormat::Float32x2}) { + wgpu::RenderPipeline renderPipeline = + CreateRenderPipelineForTest(R"( + struct VertexOut { + [[location(0)]] color : vec4; + [[builtin(position)]] position : vec4; + }; + + [[stage(vertex)]] fn main([[location(0)]] pos : vec2) -> VertexOut { + var output : VertexOut; + if (all(pos == vec2(0.0, 0.0))) { + output.color = vec4(0.0, 1.0, 0.0, 1.0); + } else { + output.color = vec4(1.0, 0.0, 0.0, 1.0); + } + output.position = vec4(0.0, 0.0, 0.0, 1.0); + return output; + })", + /* vertexBufferCount */ 1u, vertexFormat); + + // Create an index buffer the indexes off the end of the vertex buffer. + wgpu::Buffer indexBuffer = + utils::CreateBufferFromData(device, wgpu::BufferUsage::Index, {1}); + + const uint32_t vertexFormatSize = utils::VertexFormatSize(vertexFormat); + + // Create an 8-bit texture to use to initialize buffer contents. + wgpu::TextureDescriptor initTextureDesc = {}; + initTextureDesc.size = {vertexFormatSize + 4, 1, 1}; + initTextureDesc.format = wgpu::TextureFormat::R8Unorm; + initTextureDesc.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst; + wgpu::ImageCopyTexture zeroTextureSrc = + utils::CreateImageCopyTexture(device.CreateTexture(&initTextureDesc), 0, {0, 0, 0}); + { + wgpu::TextureDataLayout layout = + utils::CreateTextureDataLayout(0, wgpu::kCopyStrideUndefined); + std::vector data(initTextureDesc.size.width); + queue.WriteTexture(&zeroTextureSrc, data.data(), data.size(), &layout, + &initTextureDesc.size); + } + + for (uint32_t extraBytes : {0, 1, 2, 3, 4}) { + // Create a vertex buffer to hold a single vertex attribute. + // Uniform usage is added to force even more padding on D3D12. + // The buffer is internally padded and allocated as a larger buffer. + const uint32_t vertexBufferSize = vertexFormatSize + extraBytes; + for (uint32_t vertexBufferOffset = 0; vertexBufferOffset <= vertexBufferSize; + vertexBufferOffset += 4u) { + wgpu::Buffer vertexBuffer = CreateBuffer( + vertexBufferSize, wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Uniform | + wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst); + + // "Fully" initialize the buffer with a copy from an 8-bit texture, touching + // everything except the padding. From the point-of-view of the API, all + // |vertexBufferSize| bytes are initialized. Note: Uses CopyTextureToBuffer because + // it does not require 4-byte alignment. + { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::ImageCopyBuffer dst = + utils::CreateImageCopyBuffer(vertexBuffer, 0, wgpu::kCopyStrideUndefined); + wgpu::Extent3D extent = {vertexBufferSize, 1, 1}; + encoder.CopyTextureToBuffer(&zeroTextureSrc, &dst, &extent); + + wgpu::CommandBuffer commandBuffer = encoder.Finish(); + EXPECT_LAZY_CLEAR(0u, queue.Submit(1, &commandBuffer)); + } + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + + wgpu::Texture colorAttachment = + CreateAndInitializeTexture({1, 1, 1}, kColorAttachmentFormat); + utils::ComboRenderPassDescriptor renderPassDescriptor( + {colorAttachment.CreateView()}); + + wgpu::RenderPassEncoder renderPass = encoder.BeginRenderPass(&renderPassDescriptor); + + renderPass.SetVertexBuffer(0, vertexBuffer, vertexBufferOffset); + renderPass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32); + + renderPass.SetPipeline(renderPipeline); + renderPass.DrawIndexed(1); + renderPass.EndPass(); + + wgpu::CommandBuffer commandBuffer = encoder.Finish(); + + EXPECT_LAZY_CLEAR(0u, queue.Submit(1, &commandBuffer)); + + constexpr RGBA8 kExpectedPixelValue = {0, 255, 0, 255}; + EXPECT_PIXEL_RGBA8_EQ(kExpectedPixelValue, colorAttachment, 0, 0); + } + } + } +} + // Test the buffer will be lazily initialized correctly when its first use is in SetIndexBuffer. TEST_P(BufferZeroInitTest, SetIndexBuffer) { // Bind the whole buffer as an index buffer. diff --git a/src/tests/end2end/TextureZeroInitTests.cpp b/src/tests/end2end/TextureZeroInitTests.cpp index 56084a35fd..e43a5305fa 100644 --- a/src/tests/end2end/TextureZeroInitTests.cpp +++ b/src/tests/end2end/TextureZeroInitTests.cpp @@ -2060,6 +2060,84 @@ TEST_P(CompressedTextureZeroInitTest, HalfCopyTextureToTextureMipLevel) { kViewMipLevel, 0, true); } +// Test uploading then reading back from a 2D array compressed texture. +// This is a regression test for a bug where the final destination buffer +// was considered fully initialized even though there was a 256-byte +// stride between images. +TEST_P(CompressedTextureZeroInitTest, Copy2DArrayCompressedB2T2B) { + // TODO(crbug.com/dawn/643): diagnose and fix this failure on OpenGL. + DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES()); + + // create srcTexture with data + wgpu::TextureDescriptor textureDescriptor = CreateTextureDescriptor( + 4, 5, wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst, utils::kBCFormats[0]); + textureDescriptor.size = {8, 8, 5}; + wgpu::Texture srcTexture = device.CreateTexture(&textureDescriptor); + + uint32_t mipLevel = 2; + wgpu::Extent3D copyExtent3D = {4, 4, 5}; + + uint32_t copyWidthInBlock = copyExtent3D.width / kFormatBlockByteSize; + uint32_t copyHeightInBlock = copyExtent3D.height / kFormatBlockByteSize; + uint32_t copyRowsPerImage = copyHeightInBlock; + uint32_t copyBytesPerRow = + Align(copyWidthInBlock * utils::GetTexelBlockSizeInBytes(textureDescriptor.format), + kTextureBytesPerRowAlignment); + + // Generate data to upload + std::vector data(utils::RequiredBytesInCopy(copyBytesPerRow, copyRowsPerImage, + copyExtent3D, textureDescriptor.format)); + for (size_t i = 0; i < data.size(); ++i) { + data[i] = i % 255; + } + + // Copy texture data from a staging buffer to the destination texture. + wgpu::Buffer stagingBuffer = + utils::CreateBufferFromData(device, data.data(), data.size(), wgpu::BufferUsage::CopySrc); + wgpu::ImageCopyBuffer imageCopyBufferSrc = + utils::CreateImageCopyBuffer(stagingBuffer, 0, copyBytesPerRow, copyRowsPerImage); + + wgpu::ImageCopyTexture imageCopyTexture = + utils::CreateImageCopyTexture(srcTexture, mipLevel, {0, 0, 0}); + + { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.CopyBufferToTexture(&imageCopyBufferSrc, &imageCopyTexture, ©Extent3D); + wgpu::CommandBuffer copy = encoder.Finish(); + EXPECT_LAZY_CLEAR(0u, queue.Submit(1, ©)); + } + + // Create a buffer to read back the data. It is the same size as the upload buffer. + wgpu::BufferDescriptor readbackDesc = {}; + readbackDesc.size = data.size(); + readbackDesc.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; + wgpu::Buffer readbackBuffer = device.CreateBuffer(&readbackDesc); + + // Copy the texture to the readback buffer. + wgpu::ImageCopyBuffer imageCopyBufferDst = + utils::CreateImageCopyBuffer(readbackBuffer, 0, copyBytesPerRow, copyRowsPerImage); + { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.CopyTextureToBuffer(&imageCopyTexture, &imageCopyBufferDst, ©Extent3D); + wgpu::CommandBuffer copy = encoder.Finish(); + + // Expect a lazy clear because the padding in the copy is not touched. + EXPECT_LAZY_CLEAR(1u, queue.Submit(1, ©)); + } + + // Generate expected data. It is the same as the upload data, but padding is zero. + std::vector expected(data.size(), 0); + for (uint32_t z = 0; z < copyExtent3D.depthOrArrayLayers; ++z) { + for (uint32_t y = 0; y < copyHeightInBlock; ++y) { + memcpy(&expected[copyBytesPerRow * y + copyBytesPerRow * copyRowsPerImage * z], + &data[copyBytesPerRow * y + copyBytesPerRow * copyRowsPerImage * z], + copyWidthInBlock * utils::GetTexelBlockSizeInBytes(textureDescriptor.format)); + } + } + // Check final contents + EXPECT_BUFFER_U8_RANGE_EQ(expected.data(), readbackBuffer, 0, expected.size()); +} + DAWN_INSTANTIATE_TEST(CompressedTextureZeroInitTest, D3D12Backend({"nonzero_clear_resources_on_creation_for_testing"}), MetalBackend({"nonzero_clear_resources_on_creation_for_testing"}), diff --git a/src/utils/TestUtils.cpp b/src/utils/TestUtils.cpp index adbdd28be1..8f359abe57 100644 --- a/src/utils/TestUtils.cpp +++ b/src/utils/TestUtils.cpp @@ -134,4 +134,47 @@ namespace utils { device.GetQueue().WriteTexture(&imageCopyTexture, data.data(), 1, &textureDataLayout, ©Extent); } + + uint32_t VertexFormatSize(wgpu::VertexFormat format) { + switch (format) { + case wgpu::VertexFormat::Uint8x2: + case wgpu::VertexFormat::Sint8x2: + case wgpu::VertexFormat::Unorm8x2: + case wgpu::VertexFormat::Snorm8x2: + return 2; + case wgpu::VertexFormat::Uint8x4: + case wgpu::VertexFormat::Sint8x4: + case wgpu::VertexFormat::Unorm8x4: + case wgpu::VertexFormat::Snorm8x4: + case wgpu::VertexFormat::Uint16x2: + case wgpu::VertexFormat::Sint16x2: + case wgpu::VertexFormat::Unorm16x2: + case wgpu::VertexFormat::Snorm16x2: + case wgpu::VertexFormat::Float16x2: + case wgpu::VertexFormat::Float32: + case wgpu::VertexFormat::Uint32: + case wgpu::VertexFormat::Sint32: + return 4; + case wgpu::VertexFormat::Uint16x4: + case wgpu::VertexFormat::Sint16x4: + case wgpu::VertexFormat::Unorm16x4: + case wgpu::VertexFormat::Snorm16x4: + case wgpu::VertexFormat::Float16x4: + case wgpu::VertexFormat::Float32x2: + case wgpu::VertexFormat::Uint32x2: + case wgpu::VertexFormat::Sint32x2: + return 8; + case wgpu::VertexFormat::Float32x3: + case wgpu::VertexFormat::Uint32x3: + case wgpu::VertexFormat::Sint32x3: + return 12; + case wgpu::VertexFormat::Float32x4: + case wgpu::VertexFormat::Uint32x4: + case wgpu::VertexFormat::Sint32x4: + return 16; + case wgpu::VertexFormat::Undefined: + UNREACHABLE(); + } + } + } // namespace utils diff --git a/src/utils/TestUtils.h b/src/utils/TestUtils.h index d611124175..02b0dafb31 100644 --- a/src/utils/TestUtils.h +++ b/src/utils/TestUtils.h @@ -60,6 +60,8 @@ namespace utils { // in it will contain 1 byte of data. void UnalignDynamicUploader(wgpu::Device device); + uint32_t VertexFormatSize(wgpu::VertexFormat format); + } // namespace utils #endif // UTILS_TESTHELPERS_H_