From 9d66c5353f9aa3d52798cdc0edb604d3c63e1753 Mon Sep 17 00:00:00 2001 From: Tomek Ponitka Date: Wed, 15 Jul 2020 18:06:07 +0000 Subject: [PATCH] Implementing Queue::WriteTexture in Metal Added implementation of writeTexture in Metal. It's using a staging buffer instead of writing directly from the CPU to the texture, because Dawn uses the private storage mode for most of the Metal textures. Bug: dawn:483 Change-Id: I6b85ee8bbe343881337bdb203a122dc1f1523177 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24581 Commit-Queue: Tomek Ponitka Reviewed-by: Austin Eng --- dawn_wire.json | 2 +- src/dawn_native/CommandValidation.cpp | 59 ++-- src/dawn_native/CommandValidation.h | 5 + src/dawn_native/Queue.cpp | 5 + src/dawn_native/Texture.cpp | 13 + src/dawn_native/Texture.h | 3 + src/dawn_native/metal/CommandBufferMTL.mm | 158 +-------- src/dawn_native/metal/DeviceMTL.h | 5 + src/dawn_native/metal/DeviceMTL.mm | 50 +++ src/dawn_native/metal/QueueMTL.h | 5 + src/dawn_native/metal/QueueMTL.mm | 87 +++++ src/dawn_native/metal/UtilsMetal.h | 30 ++ src/dawn_native/metal/UtilsMetal.mm | 123 +++++++ src/dawn_wire/client/Queue.cpp | 1 + .../end2end/CompressedTextureFormatTests.cpp | 169 +++++++-- src/tests/end2end/CopyTests.cpp | 14 +- src/tests/end2end/QueueTests.cpp | 320 ++++++++++++++++++ src/utils/WGPUHelpers.cpp | 19 +- src/utils/WGPUHelpers.h | 7 +- 19 files changed, 842 insertions(+), 233 deletions(-) diff --git a/dawn_wire.json b/dawn_wire.json index 8bbb473ceb..21e6988ccd 100644 --- a/dawn_wire.json +++ b/dawn_wire.json @@ -58,7 +58,7 @@ "queue write texture internal": [ {"name": "queue id", "type": "ObjectId" }, {"name": "destination", "type": "texture copy view", "annotation": "const*"}, - {"name": "data", "type": "uint8_t", "annotation": "const*"}, + {"name": "data", "type": "uint8_t", "annotation": "const*", "length": "data size"}, {"name": "data size", "type": "size_t"}, {"name": "data layout", "type": "texture data layout", "annotation": "const*"}, {"name": "writeSize", "type": "extent 3D", "annotation": "const*"} diff --git a/src/dawn_native/CommandValidation.cpp b/src/dawn_native/CommandValidation.cpp index 8444655b30..db8714c53f 100644 --- a/src/dawn_native/CommandValidation.cpp +++ b/src/dawn_native/CommandValidation.cpp @@ -120,36 +120,6 @@ namespace dawn_native { return {}; } - void ComputeRequiredBytesInCopy(const Format& textureFormat, - const Extent3D& copySize, - uint32_t bytesPerRow, - uint32_t rowsPerImage, - uint32_t* result) { - // Default value for rowsPerImage - if (rowsPerImage == 0) { - rowsPerImage = copySize.height; - } - ASSERT(rowsPerImage >= copySize.height); - if (copySize.width == 0 || copySize.height == 0 || copySize.depth == 0) { - *result = 0; - return; - } - - uint32_t blockByteSize = textureFormat.blockByteSize; - uint32_t blockWidth = textureFormat.blockWidth; - uint32_t blockHeight = textureFormat.blockHeight; - - // TODO(cwallez@chromium.org): check for overflows - uint32_t slicePitch = bytesPerRow * rowsPerImage / blockWidth; - - ASSERT(copySize.height >= 1); - uint32_t sliceSize = bytesPerRow * (copySize.height / blockHeight - 1) + - (copySize.width / blockWidth) * blockByteSize; - - ASSERT(copySize.depth >= 1); - *result = (slicePitch * (copySize.depth - 1)) + sliceSize; - } - } // namespace MaybeError ValidateCanPopDebugGroup(uint64_t debugGroupStackSize) { @@ -400,6 +370,30 @@ namespace dawn_native { static_cast(maxStart); } + uint32_t ComputeRequiredBytesInCopy(const Format& textureFormat, + const Extent3D& copySize, + uint32_t bytesPerRow, + uint32_t rowsPerImage) { + // Default value for rowsPerImage + if (rowsPerImage == 0) { + rowsPerImage = copySize.height; + } + ASSERT(rowsPerImage >= copySize.height); + if (copySize.width == 0 || copySize.height == 0 || copySize.depth == 0) { + return 0; + } + + ASSERT(copySize.height >= 1); + ASSERT(copySize.depth >= 1); + + uint64_t texelBlockRowsPerImage = rowsPerImage / textureFormat.blockHeight; + uint64_t bytesPerImage = bytesPerRow * texelBlockRowsPerImage; + uint64_t bytesInLastSlice = + bytesPerRow * (copySize.height / textureFormat.blockHeight - 1) + + (copySize.width / textureFormat.blockWidth * textureFormat.blockByteSize); + return bytesPerImage * (copySize.depth - 1) + bytesInLastSlice; + } + MaybeError ValidateCopySizeFitsInBuffer(const Ref& buffer, uint64_t offset, uint64_t size) { @@ -423,9 +417,8 @@ namespace dawn_native { // TODO(tommek@google.com): to match the spec this should only be checked when // copyExtent.depth > 1. - uint32_t requiredBytesInCopy = 0; - ComputeRequiredBytesInCopy(format, copyExtent, layout.bytesPerRow, layout.rowsPerImage, - &requiredBytesInCopy); + uint32_t requiredBytesInCopy = + ComputeRequiredBytesInCopy(format, copyExtent, layout.bytesPerRow, layout.rowsPerImage); bool fitsInData = layout.offset <= byteSize && (requiredBytesInCopy <= (byteSize - layout.offset)); diff --git a/src/dawn_native/CommandValidation.h b/src/dawn_native/CommandValidation.h index cee2b135a9..72d876d164 100644 --- a/src/dawn_native/CommandValidation.h +++ b/src/dawn_native/CommandValidation.h @@ -40,6 +40,11 @@ namespace dawn_native { MaybeError ValidateTimestampQuery(QuerySetBase* querySet, uint32_t queryIndex); + uint32_t ComputeRequiredBytesInCopy(const Format& textureFormat, + const Extent3D& copySize, + uint32_t bytesPerRow, + uint32_t rowsPerImage); + MaybeError ValidateLinearTextureData(const TextureDataLayout& layout, uint64_t byteSize, const Format& format, diff --git a/src/dawn_native/Queue.cpp b/src/dawn_native/Queue.cpp index e70a3787fa..9ac92f3e5d 100644 --- a/src/dawn_native/Queue.cpp +++ b/src/dawn_native/Queue.cpp @@ -147,6 +147,11 @@ namespace dawn_native { const TextureDataLayout* dataLayout, const Extent3D* writeSize) { DAWN_TRY(ValidateWriteTexture(destination, dataSize, dataLayout, writeSize)); + + if (writeSize->width == 0 || writeSize->height == 0 || writeSize->depth == 0) { + return {}; + } + return WriteTextureImpl(destination, data, dataSize, dataLayout, writeSize); } diff --git a/src/dawn_native/Texture.cpp b/src/dawn_native/Texture.cpp index 948c06d50a..9cf2d689f4 100644 --- a/src/dawn_native/Texture.cpp +++ b/src/dawn_native/Texture.cpp @@ -541,6 +541,19 @@ namespace dawn_native { return extent; } + Extent3D TextureBase::ClampToMipLevelVirtualSize(uint32_t level, + const Origin3D& origin, + const Extent3D& extent) const { + const Extent3D virtualSizeAtLevel = GetMipLevelVirtualSize(level); + uint32_t clampedCopyExtentWidth = (origin.x + extent.width > virtualSizeAtLevel.width) + ? (virtualSizeAtLevel.width - origin.x) + : extent.width; + uint32_t clampedCopyExtentHeight = (origin.y + extent.height > virtualSizeAtLevel.height) + ? (virtualSizeAtLevel.height - origin.y) + : extent.height; + return {clampedCopyExtentWidth, clampedCopyExtentHeight, extent.depth}; + } + TextureViewBase* TextureBase::CreateView(const TextureViewDescriptor* descriptor) { return GetDevice()->CreateTextureView(this, descriptor); } diff --git a/src/dawn_native/Texture.h b/src/dawn_native/Texture.h index ce1bc9058d..7c476ba252 100644 --- a/src/dawn_native/Texture.h +++ b/src/dawn_native/Texture.h @@ -91,6 +91,9 @@ namespace dawn_native { // required to be a multiple of the block size and used in texture sampling. Extent3D GetMipLevelPhysicalSize(uint32_t level) const; Extent3D GetMipLevelVirtualSize(uint32_t level) const; + Extent3D ClampToMipLevelVirtualSize(uint32_t level, + const Origin3D& origin, + const Extent3D& extent) const; // Dawn API TextureViewBase* CreateView(const TextureViewDescriptor* descriptor); diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm index 0f923f4116..97c0ad1b5d 100644 --- a/src/dawn_native/metal/CommandBufferMTL.mm +++ b/src/dawn_native/metal/CommandBufferMTL.mm @@ -26,6 +26,7 @@ #include "dawn_native/metal/RenderPipelineMTL.h" #include "dawn_native/metal/SamplerMTL.h" #include "dawn_native/metal/TextureMTL.h" +#include "dawn_native/metal/UtilsMetal.h" namespace dawn_native { namespace metal { @@ -309,149 +310,6 @@ namespace dawn_native { namespace metal { } }; - struct TextureBufferCopySplit { - static constexpr uint32_t kMaxTextureBufferCopyRegions = 3; - - struct CopyInfo { - NSUInteger bufferOffset; - NSUInteger bytesPerRow; - NSUInteger bytesPerImage; - Origin3D textureOrigin; - Extent3D copyExtent; - }; - - uint32_t count = 0; - std::array copies; - }; - - TextureBufferCopySplit ComputeTextureBufferCopySplit(wgpu::TextureDimension dimension, - Origin3D origin, - Extent3D copyExtent, - Format textureFormat, - Extent3D virtualSizeAtLevel, - uint64_t bufferSize, - uint64_t bufferOffset, - uint32_t bytesPerRow, - uint32_t rowsPerImage) { - TextureBufferCopySplit copy; - - // When copying textures from/to an unpacked buffer, the Metal validation layer doesn't - // compute the correct range when checking if the buffer is big enough to contain the - // data for the whole copy. Instead of looking at the position of the last texel in the - // buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage / - // format.blockHeight) * copySize.depth. For example considering the pixel buffer below - // where in memory, each row data (D) of the texture is followed by some padding data - // (P): - // |DDDDDDD|PP| - // |DDDDDDD|PP| - // |DDDDDDD|PP| - // |DDDDDDD|PP| - // |DDDDDDA|PP| - // The last pixel read will be A, but the driver will think it is the whole last padding - // row, causing it to generate an error when the pixel buffer is just big enough. - - // We work around this limitation by detecting when Metal would complain and copy the - // last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage. - uint32_t dataRowsPerImage = rowsPerImage / textureFormat.blockHeight; - uint32_t bytesPerImage = bytesPerRow * dataRowsPerImage; - - // Metal validation layer requires that if the texture's pixel format is a compressed - // format, the sourceSize must be a multiple of the pixel format's block size or be - // clamped to the edge of the texture if the block extends outside the bounds of a - // texture. - uint32_t clampedCopyExtentWidth = - (origin.x + copyExtent.width > virtualSizeAtLevel.width) - ? (virtualSizeAtLevel.width - origin.x) - : copyExtent.width; - uint32_t clampedCopyExtentHeight = - (origin.y + copyExtent.height > virtualSizeAtLevel.height) - ? (virtualSizeAtLevel.height - origin.y) - : copyExtent.height; - - ASSERT(dimension == wgpu::TextureDimension::e2D); - - // Check whether buffer size is big enough. - bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth; - if (!needWorkaround) { - copy.count = 1; - copy.copies[0].bufferOffset = bufferOffset; - copy.copies[0].bytesPerRow = bytesPerRow; - copy.copies[0].bytesPerImage = bytesPerImage; - copy.copies[0].textureOrigin = origin; - copy.copies[0].copyExtent = {clampedCopyExtentWidth, clampedCopyExtentHeight, - copyExtent.depth}; - return copy; - } - - uint64_t currentOffset = bufferOffset; - - // Doing all the copy except the last image. - if (copyExtent.depth > 1) { - copy.copies[copy.count].bufferOffset = currentOffset; - copy.copies[copy.count].bytesPerRow = bytesPerRow; - copy.copies[copy.count].bytesPerImage = bytesPerImage; - copy.copies[copy.count].textureOrigin = origin; - copy.copies[copy.count].copyExtent = { - clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth - 1}; - - ++copy.count; - - // Update offset to copy to the last image. - currentOffset += (copyExtent.depth - 1) * bytesPerImage; - } - - // Doing all the copy in last image except the last row. - uint32_t copyBlockRowCount = copyExtent.height / textureFormat.blockHeight; - if (copyBlockRowCount > 1) { - copy.copies[copy.count].bufferOffset = currentOffset; - copy.copies[copy.count].bytesPerRow = bytesPerRow; - copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1); - copy.copies[copy.count].textureOrigin = {origin.x, origin.y, - origin.z + copyExtent.depth - 1}; - - ASSERT(copyExtent.height - textureFormat.blockHeight < virtualSizeAtLevel.height); - copy.copies[copy.count].copyExtent = { - clampedCopyExtentWidth, copyExtent.height - textureFormat.blockHeight, 1}; - - ++copy.count; - - // Update offset to copy to the last row. - currentOffset += (copyBlockRowCount - 1) * bytesPerRow; - } - - // Doing the last row copy with the exact number of bytes in last row. - // Workaround this issue in a way just like the copy to a 1D texture. - uint32_t lastRowDataSize = - (copyExtent.width / textureFormat.blockWidth) * textureFormat.blockByteSize; - uint32_t lastRowCopyExtentHeight = - textureFormat.blockHeight + clampedCopyExtentHeight - copyExtent.height; - ASSERT(lastRowCopyExtentHeight <= textureFormat.blockHeight); - - copy.copies[copy.count].bufferOffset = currentOffset; - copy.copies[copy.count].bytesPerRow = lastRowDataSize; - copy.copies[copy.count].bytesPerImage = lastRowDataSize; - copy.copies[copy.count].textureOrigin = { - origin.x, origin.y + copyExtent.height - textureFormat.blockHeight, - origin.z + copyExtent.depth - 1}; - copy.copies[copy.count].copyExtent = {clampedCopyExtentWidth, lastRowCopyExtentHeight, - 1}; - ++copy.count; - - return copy; - } - - void EnsureDestinationTextureInitialized(Texture* texture, - const TextureCopy& dst, - const Extent3D& size) { - ASSERT(texture == dst.texture.Get()); - SubresourceRange range = GetSubresourcesAffectedByCopy(dst, size); - if (IsCompleteSubresourceCopiedTo(dst.texture.Get(), size, dst.mipLevel)) { - texture->SetIsSubresourceContentInitialized(true, range); - } else { - texture->EnsureSubresourceContentInitialized(range); - } - } - // Keeps track of the dirty bind groups so they can be lazily applied when we know the // pipeline state. // Bind groups may be inherited because bind groups are packed in the buffer / @@ -745,13 +603,9 @@ namespace dawn_native { namespace metal { EnsureDestinationTextureInitialized(texture, copy->destination, copy->copySize); - const Extent3D virtualSizeAtLevel = - texture->GetMipLevelVirtualSize(dst.mipLevel); - TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit( - texture->GetDimension(), dst.origin, copySize, texture->GetFormat(), - virtualSizeAtLevel, buffer->GetSize(), src.offset, src.bytesPerRow, - src.rowsPerImage); + texture, dst.mipLevel, dst.origin, copySize, buffer->GetSize(), src.offset, + src.bytesPerRow, src.rowsPerImage); for (uint32_t i = 0; i < splitCopies.count; ++i) { const TextureBufferCopySplit::CopyInfo& copyInfo = splitCopies.copies[i]; @@ -793,11 +647,9 @@ namespace dawn_native { namespace metal { texture->EnsureSubresourceContentInitialized( GetSubresourcesAffectedByCopy(src, copySize)); - Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(src.mipLevel); TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit( - texture->GetDimension(), src.origin, copySize, texture->GetFormat(), - virtualSizeAtLevel, buffer->GetSize(), dst.offset, dst.bytesPerRow, - dst.rowsPerImage); + texture, src.mipLevel, src.origin, copySize, buffer->GetSize(), dst.offset, + dst.bytesPerRow, dst.rowsPerImage); for (uint32_t i = 0; i < splitCopies.count; ++i) { const TextureBufferCopySplit::CopyInfo& copyInfo = splitCopies.copies[i]; diff --git a/src/dawn_native/metal/DeviceMTL.h b/src/dawn_native/metal/DeviceMTL.h index 6baa728931..87a697bce5 100644 --- a/src/dawn_native/metal/DeviceMTL.h +++ b/src/dawn_native/metal/DeviceMTL.h @@ -18,6 +18,7 @@ #include "dawn_native/dawn_platform.h" #include "common/Serial.h" +#include "dawn_native/Commands.h" #include "dawn_native/Device.h" #include "dawn_native/metal/CommandRecordingContext.h" #include "dawn_native/metal/Forward.h" @@ -63,6 +64,10 @@ namespace dawn_native { namespace metal { BufferBase* destination, uint64_t destinationOffset, uint64_t size) override; + MaybeError CopyFromStagingToTexture(StagingBufferBase* source, + const TextureDataLayout& dataLayout, + TextureCopy* dst, + const Extent3D copySize); private: Device(AdapterBase* adapter, id mtlDevice, const DeviceDescriptor* descriptor); diff --git a/src/dawn_native/metal/DeviceMTL.mm b/src/dawn_native/metal/DeviceMTL.mm index 31ae40b046..d5dbbc5ea5 100644 --- a/src/dawn_native/metal/DeviceMTL.mm +++ b/src/dawn_native/metal/DeviceMTL.mm @@ -16,6 +16,7 @@ #include "dawn_native/BackendConnection.h" #include "dawn_native/BindGroupLayout.h" +#include "dawn_native/Commands.h" #include "dawn_native/ErrorData.h" #include "dawn_native/metal/BindGroupLayoutMTL.h" #include "dawn_native/metal/BindGroupMTL.h" @@ -30,6 +31,7 @@ #include "dawn_native/metal/StagingBufferMTL.h" #include "dawn_native/metal/SwapChainMTL.h" #include "dawn_native/metal/TextureMTL.h" +#include "dawn_native/metal/UtilsMetal.h" #include "dawn_platform/DawnPlatform.h" #include "dawn_platform/tracing/TraceEvent.h" @@ -266,6 +268,54 @@ namespace dawn_native { namespace metal { return {}; } + MaybeError Device::CopyFromStagingToTexture(StagingBufferBase* source, + const TextureDataLayout& dataLayout, + TextureCopy* dst, + const Extent3D copySize) { + Texture* texture = ToBackend(dst->texture.Get()); + + // This function assumes data is perfectly aligned. Otherwise, it might be necessary + // to split copying to several stages: see ComputeTextureBufferCopySplit. + uint32_t blockSize = dst->texture->GetFormat().blockByteSize; + uint32_t blockWidth = dst->texture->GetFormat().blockWidth; + uint32_t blockHeight = dst->texture->GetFormat().blockHeight; + ASSERT(dataLayout.rowsPerImage == (copySize.height)); + ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize); + + // TODO(tommek@google.com): Add tests for this in TextureZeroInitTests. + EnsureDestinationTextureInitialized(texture, *dst, copySize); + + // Metal validation layer requires that if the texture's pixel format is a compressed + // format, the sourceSize must be a multiple of the pixel format's block size or be + // clamped to the edge of the texture if the block extends outside the bounds of a + // texture. + const Extent3D clampedSize = + texture->ClampToMipLevelVirtualSize(dst->mipLevel, dst->origin, copySize); + const uint32_t copyBaseLayer = dst->origin.z; + const uint32_t copyLayerCount = copySize.depth; + const uint64_t bytesPerImage = + dataLayout.rowsPerImage * dataLayout.bytesPerRow / blockHeight; + + uint64_t bufferOffset = dataLayout.offset; + for (uint32_t copyLayer = copyBaseLayer; copyLayer < copyBaseLayer + copyLayerCount; + ++copyLayer) { + [GetPendingCommandContext()->EnsureBlit() + copyFromBuffer:ToBackend(source)->GetBufferHandle() + sourceOffset:bufferOffset + sourceBytesPerRow:dataLayout.bytesPerRow + sourceBytesPerImage:bytesPerImage + sourceSize:MTLSizeMake(clampedSize.width, clampedSize.height, 1) + toTexture:texture->GetMTLTexture() + destinationSlice:copyLayer + destinationLevel:dst->mipLevel + destinationOrigin:MTLOriginMake(dst->origin.x, dst->origin.y, 0)]; + + bufferOffset += bytesPerImage; + } + + return {}; + } + TextureBase* Device::CreateTextureWrappingIOSurface(const ExternalImageDescriptor* descriptor, IOSurfaceRef ioSurface, uint32_t plane) { diff --git a/src/dawn_native/metal/QueueMTL.h b/src/dawn_native/metal/QueueMTL.h index 2dd718e5c0..bda47eb853 100644 --- a/src/dawn_native/metal/QueueMTL.h +++ b/src/dawn_native/metal/QueueMTL.h @@ -28,6 +28,11 @@ namespace dawn_native { namespace metal { private: MaybeError SubmitImpl(uint32_t commandCount, CommandBufferBase* const* commands) override; + MaybeError WriteTextureImpl(const TextureCopyView* destination, + const void* data, + size_t dataSize, + const TextureDataLayout* dataLayout, + const Extent3D* writeSize) override; }; }} // namespace dawn_native::metal diff --git a/src/dawn_native/metal/QueueMTL.mm b/src/dawn_native/metal/QueueMTL.mm index ffe6ca181c..c0245b1b63 100644 --- a/src/dawn_native/metal/QueueMTL.mm +++ b/src/dawn_native/metal/QueueMTL.mm @@ -14,12 +14,62 @@ #include "dawn_native/metal/QueueMTL.h" +#include "common/Math.h" +#include "dawn_native/Buffer.h" +#include "dawn_native/CommandValidation.h" +#include "dawn_native/Commands.h" +#include "dawn_native/DynamicUploader.h" #include "dawn_native/metal/CommandBufferMTL.h" #include "dawn_native/metal/DeviceMTL.h" #include "dawn_platform/DawnPlatform.h" #include "dawn_platform/tracing/TraceEvent.h" namespace dawn_native { namespace metal { + namespace { + ResultOrError UploadTextureDataAligningBytesPerRow( + DeviceBase* device, + const void* data, + size_t dataSize, + uint32_t alignedBytesPerRow, + uint32_t alignedRowsPerImage, + const TextureDataLayout* dataLayout, + const Format& textureFormat, + const Extent3D* writeSize) { + uint32_t newDataSize = ComputeRequiredBytesInCopy( + textureFormat, *writeSize, alignedBytesPerRow, alignedRowsPerImage); + + UploadHandle uploadHandle; + DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate( + newDataSize, device->GetPendingCommandSerial())); + ASSERT(uploadHandle.mappedBuffer != nullptr); + + // TODO(tommek@google.com): Add an optimization to do a single memcpy if the data + // is already correctly packed. + uint8_t* dstPointer = static_cast(uploadHandle.mappedBuffer); + const uint8_t* srcPointer = static_cast(data); + srcPointer += dataLayout->offset; + + uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / textureFormat.blockHeight; + uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / textureFormat.blockHeight; + if (dataRowsPerImageInBlock == 0) { + dataRowsPerImageInBlock = writeSize->height / textureFormat.blockHeight; + } + + ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock); + uint64_t imageAdditionalStride = + dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock); + for (uint32_t d = 0; d < writeSize->depth; ++d) { + for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) { + memcpy(dstPointer, srcPointer, alignedBytesPerRow); + dstPointer += alignedBytesPerRow; + srcPointer += dataLayout->bytesPerRow; + } + srcPointer += imageAdditionalStride; + } + + return uploadHandle; + } + } Queue::Queue(Device* device) : QueueBase(device) { } @@ -39,4 +89,41 @@ namespace dawn_native { namespace metal { return {}; } + // We don't write from the CPU to the texture directly which can be done in Metal using the + // replaceRegion function, because the function requires a non-private storage mode and Dawn + // sets the private storage mode by default for all textures except IOSurfaces on macOS. + MaybeError Queue::WriteTextureImpl(const TextureCopyView* destination, + const void* data, + size_t dataSize, + const TextureDataLayout* dataLayout, + const Extent3D* writeSize) { + uint32_t blockSize = destination->texture->GetFormat().blockByteSize; + uint32_t blockWidth = destination->texture->GetFormat().blockWidth; + // We are only copying the part of the data that will appear in the texture. + // Note that validating texture copy range ensures that writeSize->width and + // writeSize->height are multiples of blockWidth and blockHeight respectively. + uint32_t alignedBytesPerRow = (writeSize->width) / blockWidth * blockSize; + uint32_t alignedRowsPerImage = writeSize->height; + + UploadHandle uploadHandle; + DAWN_TRY_ASSIGN(uploadHandle, + UploadTextureDataAligningBytesPerRow( + GetDevice(), data, dataSize, alignedBytesPerRow, alignedRowsPerImage, + dataLayout, destination->texture->GetFormat(), writeSize)); + + TextureDataLayout passDataLayout = *dataLayout; + passDataLayout.offset = uploadHandle.startOffset; + passDataLayout.bytesPerRow = alignedBytesPerRow; + passDataLayout.rowsPerImage = alignedRowsPerImage; + + TextureCopy textureCopy; + textureCopy.texture = destination->texture; + textureCopy.mipLevel = destination->mipLevel; + textureCopy.origin = destination->origin; + + return ToBackend(GetDevice()) + ->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout, &textureCopy, + *writeSize); + } + }} // namespace dawn_native::metal diff --git a/src/dawn_native/metal/UtilsMetal.h b/src/dawn_native/metal/UtilsMetal.h index 091d8284f0..fe0e2283d2 100644 --- a/src/dawn_native/metal/UtilsMetal.h +++ b/src/dawn_native/metal/UtilsMetal.h @@ -16,6 +16,8 @@ #define DAWNNATIVE_METAL_UTILSMETAL_H_ #include "dawn_native/dawn_platform.h" +#include "dawn_native/metal/DeviceMTL.h" +#include "dawn_native/metal/TextureMTL.h" #import @@ -23,6 +25,34 @@ namespace dawn_native { namespace metal { MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction); + struct TextureBufferCopySplit { + static constexpr uint32_t kMaxTextureBufferCopyRegions = 3; + + struct CopyInfo { + NSUInteger bufferOffset; + NSUInteger bytesPerRow; + NSUInteger bytesPerImage; + Origin3D textureOrigin; + Extent3D copyExtent; + }; + + uint32_t count = 0; + std::array copies; + }; + + TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture, + uint32_t mipLevel, + Origin3D origin, + Extent3D copyExtent, + uint64_t bufferSize, + uint64_t bufferOffset, + uint32_t bytesPerRow, + uint32_t rowsPerImage); + + void EnsureDestinationTextureInitialized(Texture* texture, + const TextureCopy& dst, + const Extent3D& size); + }} // namespace dawn_native::metal #endif // DAWNNATIVE_METAL_UTILSMETAL_H_ diff --git a/src/dawn_native/metal/UtilsMetal.mm b/src/dawn_native/metal/UtilsMetal.mm index aeb4f7aae8..13b4668818 100644 --- a/src/dawn_native/metal/UtilsMetal.mm +++ b/src/dawn_native/metal/UtilsMetal.mm @@ -13,6 +13,7 @@ // limitations under the License. #include "dawn_native/metal/UtilsMetal.h" +#include "dawn_native/CommandBuffer.h" #include "common/Assert.h" @@ -41,4 +42,126 @@ namespace dawn_native { namespace metal { } } + TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture, + uint32_t mipLevel, + Origin3D origin, + Extent3D copyExtent, + uint64_t bufferSize, + uint64_t bufferOffset, + uint32_t bytesPerRow, + uint32_t rowsPerImage) { + TextureBufferCopySplit copy; + const Format textureFormat = texture->GetFormat(); + + // When copying textures from/to an unpacked buffer, the Metal validation layer doesn't + // compute the correct range when checking if the buffer is big enough to contain the + // data for the whole copy. Instead of looking at the position of the last texel in the + // buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage / + // format.blockHeight) * copySize.depth. For example considering the pixel buffer below + // where in memory, each row data (D) of the texture is followed by some padding data + // (P): + // |DDDDDDD|PP| + // |DDDDDDD|PP| + // |DDDDDDD|PP| + // |DDDDDDD|PP| + // |DDDDDDA|PP| + // The last pixel read will be A, but the driver will think it is the whole last padding + // row, causing it to generate an error when the pixel buffer is just big enough. + + // We work around this limitation by detecting when Metal would complain and copy the + // last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage. + uint32_t dataRowsPerImage = rowsPerImage / textureFormat.blockHeight; + uint32_t bytesPerImage = bytesPerRow * dataRowsPerImage; + + // Metal validation layer requires that if the texture's pixel format is a compressed + // format, the sourceSize must be a multiple of the pixel format's block size or be + // clamped to the edge of the texture if the block extends outside the bounds of a + // texture. + const Extent3D clampedCopyExtent = + texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent); + + ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D); + + // Check whether buffer size is big enough. + bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth; + if (!needWorkaround) { + copy.count = 1; + copy.copies[0].bufferOffset = bufferOffset; + copy.copies[0].bytesPerRow = bytesPerRow; + copy.copies[0].bytesPerImage = bytesPerImage; + copy.copies[0].textureOrigin = origin; + copy.copies[0].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height, + copyExtent.depth}; + return copy; + } + + uint64_t currentOffset = bufferOffset; + + // Doing all the copy except the last image. + if (copyExtent.depth > 1) { + copy.copies[copy.count].bufferOffset = currentOffset; + copy.copies[copy.count].bytesPerRow = bytesPerRow; + copy.copies[copy.count].bytesPerImage = bytesPerImage; + copy.copies[copy.count].textureOrigin = origin; + copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height, + copyExtent.depth - 1}; + + ++copy.count; + + // Update offset to copy to the last image. + currentOffset += (copyExtent.depth - 1) * bytesPerImage; + } + + // Doing all the copy in last image except the last row. + uint32_t copyBlockRowCount = copyExtent.height / textureFormat.blockHeight; + if (copyBlockRowCount > 1) { + copy.copies[copy.count].bufferOffset = currentOffset; + copy.copies[copy.count].bytesPerRow = bytesPerRow; + copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1); + copy.copies[copy.count].textureOrigin = {origin.x, origin.y, + origin.z + copyExtent.depth - 1}; + + ASSERT(copyExtent.height - textureFormat.blockHeight < + texture->GetMipLevelVirtualSize(mipLevel).height); + copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, + copyExtent.height - textureFormat.blockHeight, 1}; + + ++copy.count; + + // Update offset to copy to the last row. + currentOffset += (copyBlockRowCount - 1) * bytesPerRow; + } + + // Doing the last row copy with the exact number of bytes in last row. + // Workaround this issue in a way just like the copy to a 1D texture. + uint32_t lastRowDataSize = + (copyExtent.width / textureFormat.blockWidth) * textureFormat.blockByteSize; + uint32_t lastRowCopyExtentHeight = + textureFormat.blockHeight + clampedCopyExtent.height - copyExtent.height; + ASSERT(lastRowCopyExtentHeight <= textureFormat.blockHeight); + + copy.copies[copy.count].bufferOffset = currentOffset; + copy.copies[copy.count].bytesPerRow = lastRowDataSize; + copy.copies[copy.count].bytesPerImage = lastRowDataSize; + copy.copies[copy.count].textureOrigin = { + origin.x, origin.y + copyExtent.height - textureFormat.blockHeight, + origin.z + copyExtent.depth - 1}; + copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, lastRowCopyExtentHeight, 1}; + ++copy.count; + + return copy; + } + + void EnsureDestinationTextureInitialized(Texture* texture, + const TextureCopy& dst, + const Extent3D& size) { + ASSERT(texture == dst.texture.Get()); + SubresourceRange range = GetSubresourcesAffectedByCopy(dst, size); + if (IsCompleteSubresourceCopiedTo(dst.texture.Get(), size, dst.mipLevel)) { + texture->SetIsSubresourceContentInitialized(true, range); + } else { + texture->EnsureSubresourceContentInitialized(range); + } + } + }} // namespace dawn_native::metal diff --git a/src/dawn_wire/client/Queue.cpp b/src/dawn_wire/client/Queue.cpp index f9ee04efad..ad116732ff 100644 --- a/src/dawn_wire/client/Queue.cpp +++ b/src/dawn_wire/client/Queue.cpp @@ -80,6 +80,7 @@ namespace dawn_wire { namespace client { QueueWriteTextureInternalCmd cmd; cmd.queueId = id; cmd.destination = destination; + cmd.data = static_cast(data); cmd.dataSize = dataSize; cmd.dataLayout = dataLayout; cmd.writeSize = writeSize; diff --git a/src/tests/end2end/CompressedTextureFormatTests.cpp b/src/tests/end2end/CompressedTextureFormatTests.cpp index dd0302e17b..d1a6cd0814 100644 --- a/src/tests/end2end/CompressedTextureFormatTests.cpp +++ b/src/tests/end2end/CompressedTextureFormatTests.cpp @@ -47,51 +47,54 @@ class CompressedTextureBCFormatTest : public DawnTest { return mIsBCFormatSupported; } - // Copy the compressed texture data into the destination texture as is specified in copyConfig. - void InitializeDataInCompressedTexture(wgpu::Texture bcCompressedTexture, - const CopyConfig& copyConfig) { - ASSERT(IsBCFormatSupported()); - - // Compute the upload buffer size with bytesPerRowAlignment and the copy region. - const wgpu::Extent3D textureSize = copyConfig.textureDescriptor.size; - uint32_t actualWidthAtLevel = textureSize.width >> copyConfig.viewMipmapLevel; - uint32_t actualHeightAtLevel = textureSize.height >> copyConfig.viewMipmapLevel; - uint32_t copyWidthInBlockAtLevel = - (actualWidthAtLevel + kBCBlockWidthInTexels - 1) / kBCBlockWidthInTexels; - uint32_t copyHeightInBlockAtLevel = - (actualHeightAtLevel + kBCBlockHeightInTexels - 1) / kBCBlockHeightInTexels; - uint32_t bufferRowPitchInBytes = 0; + // Compute the upload data for the copyConfig. + std::vector UploadData(const CopyConfig& copyConfig) { + uint32_t copyWidthInBlock = copyConfig.copyExtent3D.width / kBCBlockWidthInTexels; + uint32_t copyHeightInBlock = copyConfig.copyExtent3D.height / kBCBlockHeightInTexels; + uint32_t rowPitchInBytes = 0; if (copyConfig.bytesPerRowAlignment != 0) { - bufferRowPitchInBytes = copyConfig.bytesPerRowAlignment; + rowPitchInBytes = copyConfig.bytesPerRowAlignment; } else { - bufferRowPitchInBytes = - copyWidthInBlockAtLevel * - utils::GetTexelBlockSizeInBytes(copyConfig.textureDescriptor.format); + rowPitchInBytes = copyWidthInBlock * + utils::GetTexelBlockSizeInBytes(copyConfig.textureDescriptor.format); } - uint32_t copyBytesPerImage = bufferRowPitchInBytes * copyHeightInBlockAtLevel; + uint32_t copyRowsPerImageInBlock = copyConfig.rowsPerImage / kBCBlockHeightInTexels; + if (copyRowsPerImageInBlock == 0) { + copyRowsPerImageInBlock = copyHeightInBlock; + } + uint32_t copyBytesPerImage = rowPitchInBytes * copyRowsPerImageInBlock; uint32_t uploadBufferSize = copyConfig.bufferOffset + copyBytesPerImage * copyConfig.copyExtent3D.depth; - // Fill uploadData with the pre-prepared one-block compressed texture data. - std::vector uploadData(uploadBufferSize, 0); + // Fill data with the pre-prepared one-block compressed texture data. + std::vector data(uploadBufferSize, 0); std::vector oneBlockCompressedTextureData = GetOneBlockBCFormatTextureData(copyConfig.textureDescriptor.format); for (uint32_t layer = 0; layer < copyConfig.copyExtent3D.depth; ++layer) { - for (uint32_t h = 0; h < copyHeightInBlockAtLevel; ++h) { - for (uint32_t w = 0; w < copyWidthInBlockAtLevel; ++w) { - uint32_t uploadBufferOffset = - copyConfig.bufferOffset + copyBytesPerImage * layer + - bufferRowPitchInBytes * h + oneBlockCompressedTextureData.size() * w; - std::memcpy(&uploadData[uploadBufferOffset], - oneBlockCompressedTextureData.data(), + for (uint32_t h = 0; h < copyHeightInBlock; ++h) { + for (uint32_t w = 0; w < copyWidthInBlock; ++w) { + uint32_t uploadBufferOffset = copyConfig.bufferOffset + + copyBytesPerImage * layer + rowPitchInBytes * h + + oneBlockCompressedTextureData.size() * w; + std::memcpy(&data[uploadBufferOffset], oneBlockCompressedTextureData.data(), oneBlockCompressedTextureData.size() * sizeof(uint8_t)); } } } + return data; + } + + // Copy the compressed texture data into the destination texture as is specified in copyConfig. + void InitializeDataInCompressedTexture(wgpu::Texture bcCompressedTexture, + const CopyConfig& copyConfig) { + ASSERT(IsBCFormatSupported()); + + std::vector data = UploadData(copyConfig); + // Copy texture data from a staging buffer to the destination texture. - wgpu::Buffer stagingBuffer = utils::CreateBufferFromData( - device, uploadData.data(), uploadBufferSize, wgpu::BufferUsage::CopySrc); + wgpu::Buffer stagingBuffer = utils::CreateBufferFromData(device, data.data(), data.size(), + wgpu::BufferUsage::CopySrc); wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(stagingBuffer, copyConfig.bufferOffset, copyConfig.bytesPerRowAlignment, copyConfig.rowsPerImage); @@ -176,7 +179,6 @@ class CompressedTextureBCFormatTest : public DawnTest { const std::vector& expected) { ASSERT(IsBCFormatSupported()); - ASSERT(expected.size() == renderTargetSize.width * renderTargetSize.height); utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, renderTargetSize.width, renderTargetSize.height); @@ -204,6 +206,10 @@ class CompressedTextureBCFormatTest : public DawnTest { wgpu::Texture bcTexture = CreateTextureWithCompressedData(config); + VerifyBCTexture(config, bcTexture); + } + + void VerifyBCTexture(const CopyConfig& config, wgpu::Texture bcTexture) { wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest(); wgpu::Extent3D virtualSizeAtLevel = GetVirtualSizeAtLevel(config); @@ -221,7 +227,7 @@ class CompressedTextureBCFormatTest : public DawnTest { noPaddingExtent3D.depth = 1u; std::vector expectedData = - GetExpectedData(config.textureDescriptor.format, virtualSizeAtLevel); + GetExpectedData(config.textureDescriptor.format, noPaddingExtent3D); wgpu::Origin3D firstLayerCopyOrigin = {config.copyOrigin3D.x, config.copyOrigin3D.y, 0}; for (uint32_t layer = config.copyOrigin3D.z; @@ -1067,3 +1073,102 @@ DAWN_INSTANTIATE_TEST(CompressedTextureBCFormatTest, OpenGLBackend(), VulkanBackend(), VulkanBackend({"use_temporary_buffer_in_texture_to_texture_copy"})); + +class CompressedTextureWriteTextureTest : public CompressedTextureBCFormatTest { + protected: + void SetUp() override { + CompressedTextureBCFormatTest::SetUp(); + DAWN_SKIP_TEST_IF(!IsBCFormatSupported()); + } + + // Write the compressed texture data into the destination texture as is specified in copyConfig. + void WriteToCompressedTexture(wgpu::Texture bcCompressedTexture, const CopyConfig& copyConfig) { + ASSERT(IsBCFormatSupported()); + + std::vector data = UploadData(copyConfig); + + wgpu::TextureDataLayout textureDataLayout = utils::CreateTextureDataLayout( + copyConfig.bufferOffset, copyConfig.bytesPerRowAlignment, copyConfig.rowsPerImage); + + wgpu::TextureCopyView textureCopyView = utils::CreateTextureCopyView( + bcCompressedTexture, copyConfig.viewMipmapLevel, copyConfig.copyOrigin3D); + + queue.WriteTexture(&textureCopyView, data.data(), data.size(), &textureDataLayout, + ©Config.copyExtent3D); + } + + // Run the tests that write pre-prepared BC format data into a BC texture and verifies if we + // can render correctly with the pixel values sampled from the BC texture. + void TestWriteRegionIntoBCFormatTextures(const CopyConfig& config) { + ASSERT(IsBCFormatSupported()); + + wgpu::Texture bcTexture = device.CreateTexture(&config.textureDescriptor); + WriteToCompressedTexture(bcTexture, config); + + VerifyBCTexture(config, bcTexture); + } +}; + +// Test WriteTexture to a 2D texture with all parameters non-default +// with BC formats. +TEST_P(CompressedTextureWriteTextureTest, Basic) { + CopyConfig config; + config.textureDescriptor.usage = kDefaultBCFormatTextureUsage; + config.textureDescriptor.size = {20, 24, 1}; + + config.copyOrigin3D = {4, 8, 0}; + config.copyExtent3D = {12, 16, 1}; + config.bytesPerRowAlignment = 511; + config.rowsPerImage = 20; + + for (wgpu::TextureFormat format : kBCFormats) { + config.textureDescriptor.format = format; + TestWriteRegionIntoBCFormatTextures(config); + } +} + +// Test writing to multiple 2D texture array layers with BC formats. +TEST_P(CompressedTextureWriteTextureTest, WriteMultiple2DArrayLayers) { + CopyConfig config; + config.textureDescriptor.usage = kDefaultBCFormatTextureUsage; + config.textureDescriptor.size = {20, 24, 9}; + + config.copyOrigin3D = {4, 8, 3}; + config.copyExtent3D = {12, 16, 6}; + config.bytesPerRowAlignment = 511; + config.rowsPerImage = 20; + + for (wgpu::TextureFormat format : kBCFormats) { + config.textureDescriptor.format = format; + TestWriteRegionIntoBCFormatTextures(config); + } +} + +// Test BC format write textures where the physical size of the destination +// subresource is different from its virtual size. +TEST_P(CompressedTextureWriteTextureTest, + WriteIntoSubresourceWithPhysicalSizeNotEqualToVirtualSize) { + // Texture virtual size at mipLevel 2 will be {15, 15, 1} while the physical + // size will be {16, 16, 1}. + // Setting copyExtent.width or copyExtent.height to 16 fits in + // the texture physical size, but doesn't fit in the virtual size. + for (unsigned int w : {12, 16}) { + for (unsigned int h : {12, 16}) { + for (wgpu::TextureFormat format : kBCFormats) { + CopyConfig config; + config.textureDescriptor.usage = kDefaultBCFormatTextureUsage; + config.textureDescriptor.size = {60, 60, 1}; + config.textureDescriptor.mipLevelCount = 4; + config.viewMipmapLevel = 2; + + config.copyOrigin3D = {0, 0, 0}; + config.copyExtent3D = {w, h, 1}; + config.bytesPerRowAlignment = 256; + config.textureDescriptor.format = format; + TestWriteRegionIntoBCFormatTextures(config); + } + } + } +} + +DAWN_INSTANTIATE_TEST(CompressedTextureWriteTextureTest, MetalBackend()); diff --git a/src/tests/end2end/CopyTests.cpp b/src/tests/end2end/CopyTests.cpp index 4b9eac7579..78ebf83f3e 100644 --- a/src/tests/end2end/CopyTests.cpp +++ b/src/tests/end2end/CopyTests.cpp @@ -37,7 +37,7 @@ class CopyTests : public DawnTest { uint32_t rowsPerImage; }; - static std::vector GetExpectedTextureData(const utils::BufferTextureCopyLayout& layout) { + static std::vector GetExpectedTextureData(const utils::TextureDataCopyLayout& layout) { std::vector textureData(layout.texelBlockCount); for (uint32_t layer = 0; layer < layout.mipSize.depth; ++layer) { const uint32_t texelIndexOffsetPerSlice = layout.texelBlocksPerImage * layer; @@ -97,8 +97,8 @@ class CopyTests_T2B : public CopyTests { descriptor.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc; wgpu::Texture texture = device.CreateTexture(&descriptor); - const utils::BufferTextureCopyLayout copyLayout = - utils::GetBufferTextureCopyLayoutForTexture2DAtLevel( + const utils::TextureDataCopyLayout copyLayout = + utils::GetTextureDataCopyLayoutForTexture2DAtLevel( kTextureFormat, textureSpec.textureSize, textureSpec.level, bufferSpec.rowsPerImage); @@ -205,8 +205,8 @@ class CopyTests_B2T : public CopyTests { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - const utils::BufferTextureCopyLayout copyLayout = - utils::GetBufferTextureCopyLayoutForTexture2DAtLevel( + const utils::TextureDataCopyLayout copyLayout = + utils::GetTextureDataCopyLayoutForTexture2DAtLevel( kTextureFormat, textureSpec.textureSize, textureSpec.level, bufferSpec.rowsPerImage); @@ -281,8 +281,8 @@ class CopyTests_T2T : public CopyTests { // Create an upload buffer and use it to populate the current slice of the texture in // `level` mip level - const utils::BufferTextureCopyLayout copyLayout = - utils::GetBufferTextureCopyLayoutForTexture2DAtLevel( + const utils::TextureDataCopyLayout copyLayout = + utils::GetTextureDataCopyLayoutForTexture2DAtLevel( kTextureFormat, {srcSpec.textureSize.width, srcSpec.textureSize.height, copySize.depth}, srcSpec.level, 0); diff --git a/src/tests/end2end/QueueTests.cpp b/src/tests/end2end/QueueTests.cpp index b3f57a41b1..a1458078e1 100644 --- a/src/tests/end2end/QueueTests.cpp +++ b/src/tests/end2end/QueueTests.cpp @@ -19,6 +19,10 @@ #include "tests/DawnTest.h" +#include "common/Math.h" +#include "utils/TextureFormatUtils.h" +#include "utils/WGPUHelpers.h" + class QueueTests : public DawnTest {}; // Test that GetDefaultQueue always returns the same object. @@ -171,3 +175,319 @@ DAWN_INSTANTIATE_TEST(QueueWriteBufferTests, MetalBackend(), OpenGLBackend(), VulkanBackend()); + +class QueueWriteTextureTests : public DawnTest { + protected: + static constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::RGBA8Unorm; + + struct TextureSpec { + wgpu::Origin3D copyOrigin; + wgpu::Extent3D textureSize; + uint32_t level; + }; + + struct DataSpec { + uint64_t size; + uint64_t offset; + uint32_t bytesPerRow; + uint32_t rowsPerImage; + }; + + static DataSpec MinimumDataSpec(wgpu::Extent3D writeSize, + uint32_t bytesPerRow = 0, + uint32_t rowsPerImage = 0) { + if (bytesPerRow == 0) { + bytesPerRow = writeSize.width * utils::GetTexelBlockSizeInBytes(kTextureFormat); + } + if (rowsPerImage == 0) { + rowsPerImage = writeSize.height; + } + uint32_t totalDataSize = + utils::RequiredBytesInCopy(bytesPerRow, rowsPerImage, writeSize, kTextureFormat); + return {totalDataSize, 0, bytesPerRow, rowsPerImage}; + } + + static void PackTextureData(const uint8_t* srcData, + uint32_t width, + uint32_t height, + uint32_t srcBytesPerRow, + RGBA8* dstData, + uint32_t dstTexelPerRow, + uint32_t texelBlockSize) { + for (uint64_t y = 0; y < height; ++y) { + for (uint64_t x = 0; x < width; ++x) { + uint64_t src = x * texelBlockSize + y * srcBytesPerRow; + uint64_t dst = x + y * dstTexelPerRow; + + dstData[dst] = {srcData[src], srcData[src + 1], srcData[src + 2], srcData[src + 3]}; + } + } + } + + static void FillData(uint8_t* data, size_t count) { + for (size_t i = 0; i < count; ++i) { + data[i] = static_cast(i % 253); + } + } + + void DoTest(const TextureSpec& textureSpec, + const DataSpec& dataSpec, + const wgpu::Extent3D& copySize) { + // Create data of size `size` and populate it + std::vector data(dataSpec.size); + FillData(data.data(), data.size()); + + // Create a texture that is `width` x `height` with (`level` + 1) mip levels. + wgpu::TextureDescriptor descriptor = {}; + descriptor.dimension = wgpu::TextureDimension::e2D; + descriptor.size = textureSpec.textureSize; + descriptor.format = kTextureFormat; + descriptor.mipLevelCount = textureSpec.level + 1; + descriptor.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc; + wgpu::Texture texture = device.CreateTexture(&descriptor); + + wgpu::TextureDataLayout textureDataLayout = utils::CreateTextureDataLayout( + dataSpec.offset, dataSpec.bytesPerRow, dataSpec.rowsPerImage); + + wgpu::TextureCopyView textureCopyView = + utils::CreateTextureCopyView(texture, textureSpec.level, textureSpec.copyOrigin); + + queue.WriteTexture(&textureCopyView, data.data(), dataSpec.size, &textureDataLayout, + ©Size); + + const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(kTextureFormat); + wgpu::Extent3D mipSize = {textureSpec.textureSize.width >> textureSpec.level, + textureSpec.textureSize.height >> textureSpec.level, + textureSpec.textureSize.depth}; + uint32_t alignedBytesPerRow = Align(dataSpec.bytesPerRow, bytesPerTexel); + uint32_t appliedRowsPerImage = + dataSpec.rowsPerImage > 0 ? dataSpec.rowsPerImage : mipSize.height; + uint32_t bytesPerImage = dataSpec.bytesPerRow * appliedRowsPerImage; + + const uint32_t maxArrayLayer = textureSpec.copyOrigin.z + copySize.depth; + + uint64_t dataOffset = dataSpec.offset; + const uint32_t texelCountLastLayer = + (alignedBytesPerRow / bytesPerTexel) * (mipSize.height - 1) + mipSize.width; + for (uint32_t slice = textureSpec.copyOrigin.z; slice < maxArrayLayer; ++slice) { + // Pack the data in the specified copy region to have the same + // format as the expected texture data. + std::vector expected(texelCountLastLayer); + PackTextureData(&data[dataOffset], copySize.width, copySize.height, + dataSpec.bytesPerRow, expected.data(), copySize.width, bytesPerTexel); + + EXPECT_TEXTURE_RGBA8_EQ(expected.data(), texture, textureSpec.copyOrigin.x, + textureSpec.copyOrigin.y, copySize.width, copySize.height, + textureSpec.level, slice) + << "Write to texture failed copying " << dataSpec.size << "-byte data with offset " + << dataSpec.offset << " and bytes per row " << dataSpec.bytesPerRow << " to [(" + << textureSpec.copyOrigin.x << ", " << textureSpec.copyOrigin.y << "), (" + << textureSpec.copyOrigin.x + copySize.width << ", " + << textureSpec.copyOrigin.y + copySize.height << ")) region of " + << textureSpec.textureSize.width << " x " << textureSpec.textureSize.height + << " texture at mip level " << textureSpec.level << " layer " << slice << std::endl; + + dataOffset += bytesPerImage; + } + } +}; + +// Test writing the whole texture for varying texture sizes. +TEST_P(QueueWriteTextureTests, VaryingTextureSize) { + for (unsigned int w : {127, 128}) { + for (unsigned int h : {63, 64}) { + for (unsigned int d : {1, 3, 4}) { + TextureSpec textureSpec; + textureSpec.textureSize = {w, h, d}; + textureSpec.copyOrigin = {0, 0, 0}; + textureSpec.level = 0; + + DoTest(textureSpec, MinimumDataSpec({w, h, d}), {w, h, d}); + } + } + } +} + +// Test writing a pixel with an offset. +TEST_P(QueueWriteTextureTests, VaryingTextureOffset) { + constexpr uint32_t kWidth = 259; + constexpr uint32_t kHeight = 127; + DataSpec pixelData = MinimumDataSpec({1, 1, 1}); + + constexpr wgpu::Extent3D kCopySize = {1, 1, 1}; + constexpr wgpu::Extent3D kTextureSize = {kWidth, kHeight, 1}; + TextureSpec defaultTextureSpec; + defaultTextureSpec.textureSize = kTextureSize; + defaultTextureSpec.level = 0; + + for (unsigned int w : {0u, kWidth / 7, kWidth / 3, kWidth - 1}) { + for (unsigned int h : {0u, kHeight / 7, kHeight / 3, kHeight - 1}) { + TextureSpec textureSpec = defaultTextureSpec; + textureSpec.copyOrigin = {w, h, 0}; + DoTest(textureSpec, pixelData, kCopySize); + } + } +} + +// Test writing a pixel with an offset to a texture array +TEST_P(QueueWriteTextureTests, VaryingTextureArrayOffset) { + constexpr uint32_t kWidth = 259; + constexpr uint32_t kHeight = 127; + constexpr uint32_t kDepth = 62; + DataSpec pixelData = MinimumDataSpec({1, 1, 1}); + + constexpr wgpu::Extent3D kCopySize = {1, 1, 1}; + constexpr wgpu::Extent3D kTextureSize = {kWidth, kHeight, kDepth}; + TextureSpec defaultTextureSpec; + defaultTextureSpec.textureSize = kTextureSize; + defaultTextureSpec.level = 0; + + for (unsigned int w : {0u, kWidth / 7, kWidth / 3, kWidth - 1}) { + for (unsigned int h : {0u, kHeight / 7, kHeight / 3, kHeight - 1}) { + for (unsigned int d : {0u, kDepth / 7, kDepth / 3, kDepth - 1}) { + TextureSpec textureSpec = defaultTextureSpec; + textureSpec.copyOrigin = {w, h, d}; + DoTest(textureSpec, pixelData, kCopySize); + } + } + } +} + +// Test writing with varying write sizes. +TEST_P(QueueWriteTextureTests, VaryingWriteSize) { + constexpr uint32_t kWidth = 257; + constexpr uint32_t kHeight = 127; + for (unsigned int w : {13, 63, 128, 256}) { + for (unsigned int h : {16, 19, 32, 63}) { + TextureSpec textureSpec; + textureSpec.copyOrigin = {0, 0, 0}; + textureSpec.level = 0; + textureSpec.textureSize = {kWidth, kHeight, 1}; + DoTest(textureSpec, MinimumDataSpec({w, h, 1}), {w, h, 1}); + } + } +} + +// Test writing with varying write sizes to texture arrays. +TEST_P(QueueWriteTextureTests, VaryingArrayWriteSize) { + constexpr uint32_t kWidth = 257; + constexpr uint32_t kHeight = 127; + constexpr uint32_t kDepth = 65; + for (unsigned int w : {13, 63, 128, 256}) { + for (unsigned int h : {16, 19, 32, 63}) { + for (unsigned int d : {3, 6}) { + TextureSpec textureSpec; + textureSpec.copyOrigin = {0, 0, 0}; + textureSpec.level = 0; + textureSpec.textureSize = {kWidth, kHeight, kDepth}; + DoTest(textureSpec, MinimumDataSpec({w, h, d}), {w, h, d}); + } + } + } +} + +// Test writing to varying mips +TEST_P(QueueWriteTextureTests, TextureWriteToMip) { + constexpr uint32_t kWidth = 259; + constexpr uint32_t kHeight = 127; + + TextureSpec defaultTextureSpec; + defaultTextureSpec.copyOrigin = {0, 0, 0}; + defaultTextureSpec.textureSize = {kWidth, kHeight, 1}; + + for (unsigned int i = 1; i < 4; ++i) { + TextureSpec textureSpec = defaultTextureSpec; + textureSpec.level = i; + DoTest(textureSpec, MinimumDataSpec({kWidth >> i, kHeight >> i, 1}), + {kWidth >> i, kHeight >> i, 1}); + } +} + +// Test writing with different multiples of texel block size as data offset +TEST_P(QueueWriteTextureTests, VaryingDataOffset) { + constexpr uint32_t kWidth = 259; + constexpr uint32_t kHeight = 127; + + TextureSpec textureSpec; + textureSpec.copyOrigin = {0, 0, 0}; + textureSpec.textureSize = {kWidth, kHeight, 1}; + textureSpec.level = 0; + + for (unsigned int i : {1, 2, 4, 17, 64, 128, 300}) { + DataSpec dataSpec = MinimumDataSpec({kWidth, kHeight, 1}); + uint64_t offset = i * utils::GetTexelBlockSizeInBytes(kTextureFormat); + dataSpec.size += offset; + dataSpec.offset += offset; + DoTest(textureSpec, dataSpec, {kWidth, kHeight, 1}); + } +} + +// Test writing with rowsPerImage greater than needed. +TEST_P(QueueWriteTextureTests, VaryingRowsPerImage) { + constexpr uint32_t kWidth = 65; + constexpr uint32_t kHeight = 31; + constexpr uint32_t kDepth = 17; + + constexpr wgpu::Extent3D copySize = {kWidth - 1, kHeight - 1, kDepth - 1}; + + for (unsigned int r : {1, 2, 3, 64, 200}) { + TextureSpec textureSpec; + textureSpec.copyOrigin = {1, 1, 1}; + textureSpec.textureSize = {kWidth, kHeight, kDepth}; + textureSpec.level = 0; + + DataSpec dataSpec = MinimumDataSpec(copySize, 0, copySize.height + r); + DoTest(textureSpec, dataSpec, copySize); + } +} + +// Test with bytesPerRow greater than needed +TEST_P(QueueWriteTextureTests, VaryingBytesPerRow) { + constexpr uint32_t kWidth = 257; + constexpr uint32_t kHeight = 129; + + TextureSpec textureSpec; + textureSpec.textureSize = {kWidth, kHeight, 1}; + textureSpec.copyOrigin = {1, 2, 0}; + textureSpec.level = 0; + + constexpr wgpu::Extent3D copyExtent = {17, 19, 1}; + + for (unsigned int b : {1, 2, 3, 4}) { + uint32_t bytesPerRow = + copyExtent.width * utils::GetTexelBlockSizeInBytes(kTextureFormat) + b; + DoTest(textureSpec, MinimumDataSpec(copyExtent, bytesPerRow, 0), copyExtent); + } +} + +// Test with bytesPerRow greater than needed in a write to a texture array. +TEST_P(QueueWriteTextureTests, VaryingArrayBytesPerRow) { + constexpr uint32_t kWidth = 257; + constexpr uint32_t kHeight = 129; + constexpr uint32_t kLayers = 65; + + TextureSpec textureSpec; + textureSpec.textureSize = {kWidth, kHeight, kLayers}; + textureSpec.copyOrigin = {1, 2, 3}; + textureSpec.level = 0; + + constexpr wgpu::Extent3D copyExtent = {17, 19, 21}; + + // Test with bytesPerRow divisible by blockWidth + for (unsigned int b : {1, 2, 3, 65, 300}) { + uint32_t bytesPerRow = + (copyExtent.width + b) * utils::GetTexelBlockSizeInBytes(kTextureFormat); + uint32_t rowsPerImage = 23; + DoTest(textureSpec, MinimumDataSpec(copyExtent, bytesPerRow, rowsPerImage), copyExtent); + } + + // Test with bytesPerRow not divisible by blockWidth + for (unsigned int b : {1, 2, 3, 19, 301}) { + uint32_t bytesPerRow = + copyExtent.width * utils::GetTexelBlockSizeInBytes(kTextureFormat) + b; + uint32_t rowsPerImage = 23; + DoTest(textureSpec, MinimumDataSpec(copyExtent, bytesPerRow, rowsPerImage), copyExtent); + } +} + +DAWN_INSTANTIATE_TEST(QueueWriteTextureTests, MetalBackend()); diff --git a/src/utils/WGPUHelpers.cpp b/src/utils/WGPUHelpers.cpp index f728549307..0a47b8d709 100644 --- a/src/utils/WGPUHelpers.cpp +++ b/src/utils/WGPUHelpers.cpp @@ -269,9 +269,7 @@ namespace utils { uint32_t rowsPerImage) { wgpu::BufferCopyView bufferCopyView = {}; bufferCopyView.buffer = buffer; - bufferCopyView.layout.offset = offset; - bufferCopyView.layout.bytesPerRow = bytesPerRow; - bufferCopyView.layout.rowsPerImage = rowsPerImage; + bufferCopyView.layout = CreateTextureDataLayout(offset, bytesPerRow, rowsPerImage); return bufferCopyView; } @@ -287,6 +285,17 @@ namespace utils { return textureCopyView; } + wgpu::TextureDataLayout CreateTextureDataLayout(uint64_t offset, + uint32_t bytesPerRow, + uint32_t rowsPerImage) { + wgpu::TextureDataLayout textureDataLayout; + textureDataLayout.offset = offset; + textureDataLayout.bytesPerRow = bytesPerRow; + textureDataLayout.rowsPerImage = rowsPerImage; + + return textureDataLayout; + } + wgpu::SamplerDescriptor GetDefaultSamplerDescriptor() { wgpu::SamplerDescriptor desc = {}; @@ -391,12 +400,12 @@ namespace utils { } // TODO(jiawei.shao@intel.com): support compressed texture formats - BufferTextureCopyLayout GetBufferTextureCopyLayoutForTexture2DAtLevel( + TextureDataCopyLayout GetTextureDataCopyLayoutForTexture2DAtLevel( wgpu::TextureFormat format, wgpu::Extent3D textureSizeAtLevel0, uint32_t mipmapLevel, uint32_t rowsPerImage) { - BufferTextureCopyLayout layout; + TextureDataCopyLayout layout; layout.mipSize = {textureSizeAtLevel0.width >> mipmapLevel, textureSizeAtLevel0.height >> mipmapLevel, textureSizeAtLevel0.depth}; diff --git a/src/utils/WGPUHelpers.h b/src/utils/WGPUHelpers.h index 5c5f9f2f28..0c63c4de97 100644 --- a/src/utils/WGPUHelpers.h +++ b/src/utils/WGPUHelpers.h @@ -55,6 +55,9 @@ namespace utils { wgpu::TextureCopyView CreateTextureCopyView(wgpu::Texture texture, uint32_t level, wgpu::Origin3D origin); + wgpu::TextureDataLayout CreateTextureDataLayout(uint64_t offset, + uint32_t bytesPerRow, + uint32_t rowsPerImage); struct ComboRenderPassDescriptor : public wgpu::RenderPassDescriptor { public: @@ -130,7 +133,7 @@ namespace utils { const wgpu::BindGroupLayout& layout, std::initializer_list entriesInitializer); - struct BufferTextureCopyLayout { + struct TextureDataCopyLayout { uint64_t byteLength; uint64_t texelBlockCount; uint32_t bytesPerRow; @@ -146,7 +149,7 @@ namespace utils { uint32_t bytesPerRow, uint32_t rowsPerImage, uint32_t copyArrayLayerCount); - BufferTextureCopyLayout GetBufferTextureCopyLayoutForTexture2DAtLevel( + TextureDataCopyLayout GetTextureDataCopyLayoutForTexture2DAtLevel( wgpu::TextureFormat format, wgpu::Extent3D textureSizeAtLevel0, uint32_t mipmapLevel,