diff --git a/src/dawn_native/Queue.cpp b/src/dawn_native/Queue.cpp index 44adbcf31f..0fb7ab23de 100644 --- a/src/dawn_native/Queue.cpp +++ b/src/dawn_native/Queue.cpp @@ -80,6 +80,7 @@ namespace dawn_native { uint32_t optimallyAlignedBytesPerRow, uint32_t alignedRowsPerImage, const TextureDataLayout& dataLayout, + bool hasDepthOrStencil, const TexelBlockInfo& blockInfo, const Extent3D& writeSizePixel) { uint64_t newDataSizeBytes; @@ -97,6 +98,13 @@ namespace dawn_native { uint64_t offsetAlignment = std::max(optimalOffsetAlignment, uint64_t(blockInfo.byteSize)); + // For depth-stencil texture, buffer offset must be a multiple of 4, which is required + // by WebGPU and Vulkan SPEC. + if (hasDepthOrStencil) { + constexpr uint64_t kOffsetAlignmentForDepthStencil = 4; + offsetAlignment = std::max(offsetAlignment, kOffsetAlignmentForDepthStencil); + } + UploadHandle uploadHandle; DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate( newDataSizeBytes, device->GetPendingCommandSerial(), @@ -315,8 +323,8 @@ namespace dawn_native { const void* data, const TextureDataLayout& dataLayout, const Extent3D& writeSizePixel) { - const TexelBlockInfo& blockInfo = - destination.texture->GetFormat().GetAspectInfo(destination.aspect).block; + const Format& format = destination.texture->GetFormat(); + const TexelBlockInfo& blockInfo = format.GetAspectInfo(destination.aspect).block; // We are only copying the part of the data that will appear in the texture. // Note that validating texture copy range ensures that writeSizePixel->width and @@ -334,7 +342,8 @@ namespace dawn_native { DAWN_TRY_ASSIGN(uploadHandle, UploadTextureDataAligningBytesPerRowAndOffset( GetDevice(), data, alignedBytesPerRow, optimallyAlignedBytesPerRow, - alignedRowsPerImage, dataLayout, blockInfo, writeSizePixel)); + alignedRowsPerImage, dataLayout, format.HasDepthOrStencil(), blockInfo, + writeSizePixel)); TextureDataLayout passDataLayout = dataLayout; passDataLayout.offset = uploadHandle.startOffset; @@ -345,7 +354,7 @@ namespace dawn_native { textureCopy.texture = destination.texture; textureCopy.mipLevel = destination.mipLevel; textureCopy.origin = destination.origin; - textureCopy.aspect = ConvertAspect(destination.texture->GetFormat(), destination.aspect); + textureCopy.aspect = ConvertAspect(format, destination.aspect); DeviceBase* device = GetDevice(); diff --git a/src/tests/end2end/QueueTests.cpp b/src/tests/end2end/QueueTests.cpp index d2288e03b9..014eb529c5 100644 --- a/src/tests/end2end/QueueTests.cpp +++ b/src/tests/end2end/QueueTests.cpp @@ -632,6 +632,77 @@ TEST_P(QueueWriteTextureTests, WriteTo64x1TextureFromUnalignedDynamicUploader) { DoSimpleWriteTextureTest(64, 1); } +// This tests for a bug in the allocation of internal staging buffer, which incorrectly copied depth +// stencil data to the internal offset that is not a multiple of 4. +TEST_P(QueueWriteTextureTests, WriteStencilAspectWithSourceOffsetUnalignedTo4) { + // Copies to a single aspect are unsupported on OpenGL. + DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES()); + + wgpu::TextureDescriptor textureDescriptor; + textureDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8; + textureDescriptor.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst; + textureDescriptor.size = {1, 1, 1}; + wgpu::Texture dstTexture1 = device.CreateTexture(&textureDescriptor); + wgpu::Texture dstTexture2 = device.CreateTexture(&textureDescriptor); + + wgpu::BufferDescriptor bufferDescriptor; + bufferDescriptor.size = 8u; + bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; + wgpu::Buffer outputBuffer = device.CreateBuffer(&bufferDescriptor); + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + + constexpr wgpu::Extent3D kWriteSize = {1, 1, 1}; + constexpr uint8_t kData[] = {1, 2}; + constexpr uint32_t kBytesPerRowForWriteTexture = 1u; + + std::vector expectedData(8, 0); + + // In the first call of queue.writeTexture(), Dawn will allocate a new staging buffer in its + // internal ring buffer and write the user data into it at the offset 0. + { + constexpr uint32_t kDataOffset1 = 0u; + wgpu::TextureDataLayout textureDataLayout = + utils::CreateTextureDataLayout(kDataOffset1, kBytesPerRowForWriteTexture); + wgpu::ImageCopyTexture imageCopyTexture = utils::CreateImageCopyTexture( + dstTexture1, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly); + queue.WriteTexture(&imageCopyTexture, kData, sizeof(kData), &textureDataLayout, + &kWriteSize); + + constexpr uint32_t kOutputBufferOffset1 = 0u; + wgpu::ImageCopyBuffer imageCopyBuffer = utils::CreateImageCopyBuffer( + outputBuffer, kOutputBufferOffset1, kTextureBytesPerRowAlignment); + encoder.CopyTextureToBuffer(&imageCopyTexture, &imageCopyBuffer, &kWriteSize); + + expectedData[kOutputBufferOffset1] = kData[kDataOffset1]; + } + + // In the second call of queue.writeTexture(), Dawn will still use the same staging buffer + // allocated in the first call, whose first 2 bytes have been used in the first call of + // queue.writeTexture(). Dawn should write the user data at the offset 4 bytes since the + // destination texture aspect is stencil. + { + constexpr uint32_t kDataOffset2 = 1u; + wgpu::TextureDataLayout textureDataLayout = + utils::CreateTextureDataLayout(kDataOffset2, kBytesPerRowForWriteTexture); + wgpu::ImageCopyTexture imageCopyTexture = utils::CreateImageCopyTexture( + dstTexture2, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly); + queue.WriteTexture(&imageCopyTexture, kData, sizeof(kData), &textureDataLayout, + &kWriteSize); + + constexpr uint32_t kOutputBufferOffset2 = 4u; + wgpu::ImageCopyBuffer imageCopyBuffer = utils::CreateImageCopyBuffer( + outputBuffer, kOutputBufferOffset2, kTextureBytesPerRowAlignment); + encoder.CopyTextureToBuffer(&imageCopyTexture, &imageCopyBuffer, &kWriteSize); + + expectedData[kOutputBufferOffset2] = kData[kDataOffset2]; + } + + wgpu::CommandBuffer commandBuffer = encoder.Finish(); + queue.Submit(1, &commandBuffer); + + EXPECT_BUFFER_U8_RANGE_EQ(expectedData.data(), outputBuffer, 0, 8); +} + DAWN_INSTANTIATE_TEST(QueueWriteTextureTests, D3D12Backend(), MetalBackend(),