Add Buffer::GetAllocatedSize()

Buffer allocations may need padding based on backend requirements. GetAllocatedSize returns the size of the buffer actually allocated. This CL also updates the backends to use GetAllocatedSize for buffer clearing and barrier operations which should operate over the entire allocated resource. Bug: dawn:1011 Change-Id: Ic5233214414fa090725a4f50fff70d6e178494c3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/60701 Reviewed-by: Stephen White <senorblanco@chromium.org> Commit-Queue: Austin Eng <enga@chromium.org>
2025-08-06 04:05:40 +00:00 · 2021-08-05 15:26:58 +00:00 · 2021-08-05 15:26:58 +00:00 · 6c6707021a
commit 6c6707021a
parent fef90b8a4a
8 changed files with 106 additions and 57 deletions
--- a/src/dawn_native/Buffer.cpp
+++ b/src/dawn_native/Buffer.cpp
@ -177,6 +177,13 @@ namespace dawn_native {
        return mSize;
    }

+    uint64_t BufferBase::GetAllocatedSize() const {
+        ASSERT(!IsError());
+        // The backend must initialize this value.
+        ASSERT(mAllocatedSize != 0);
+        return mAllocatedSize;
+    }
+
    wgpu::BufferUsage BufferBase::GetUsage() const {
        ASSERT(!IsError());
        return mUsage;
@ -185,13 +192,29 @@ namespace dawn_native {
    MaybeError BufferBase::MapAtCreation() {
        DAWN_TRY(MapAtCreationInternal());

+        void* ptr;
+        size_t size;
+        if (mSize == 0) {
+            return {};
+        } else if (mStagingBuffer) {
+            // If there is a staging buffer for initialization, clear its contents directly.
+            // It should be exactly as large as the buffer allocation.
+            ptr = mStagingBuffer->GetMappedPointer();
+            size = mStagingBuffer->GetSize();
+            ASSERT(size == GetAllocatedSize());
+        } else {
+            // Otherwise, the buffer is directly mappable on the CPU.
+            ptr = GetMappedPointerImpl();
+            size = GetAllocatedSize();
+        }
+
        DeviceBase* device = GetDevice();
        if (device->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse)) {
-            memset(GetMappedRange(0, mSize), uint8_t(0u), mSize);
+            memset(ptr, uint8_t(0u), size);
            SetIsDataInitialized();
            device->IncrementLazyClearCountForTesting();
        } else if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) {
-            memset(GetMappedRange(0, mSize), uint8_t(1u), mSize);
+            memset(ptr, uint8_t(1u), size);
        }

        return {};
@ -215,9 +238,12 @@ namespace dawn_native {
        } else {
            // If any of these fail, the buffer will be deleted and replaced with an
            // error buffer.
+            // The staging buffer is used to return mappable data to inititalize the buffer
+            // contents. Allocate one as large as the real buffer size so that every byte is
+            // initialized.
            // TODO(crbug.com/dawn/828): Suballocate and reuse memory from a larger staging buffer
            // so we don't create many small buffers.
-            DAWN_TRY_ASSIGN(mStagingBuffer, GetDevice()->CreateStagingBuffer(GetSize()));
+            DAWN_TRY_ASSIGN(mStagingBuffer, GetDevice()->CreateStagingBuffer(GetAllocatedSize()));
        }

        return {};
@ -343,11 +369,14 @@ namespace dawn_native {

    MaybeError BufferBase::CopyFromStagingBuffer() {
        ASSERT(mStagingBuffer);
-        if (GetSize() == 0) {
+        if (mSize == 0) {
+            // Staging buffer is not created if zero size.
+            ASSERT(mStagingBuffer == nullptr);
            return {};
        }

-        DAWN_TRY(GetDevice()->CopyFromStagingToBuffer(mStagingBuffer.get(), 0, this, 0, GetSize()));
+        DAWN_TRY(GetDevice()->CopyFromStagingToBuffer(mStagingBuffer.get(), 0, this, 0,
+                                                      GetAllocatedSize()));

        DynamicUploader* uploader = GetDevice()->GetDynamicUploader();
        uploader->ReleaseStagingBuffer(std::move(mStagingBuffer));
--- a/src/dawn_native/Buffer.h
+++ b/src/dawn_native/Buffer.h
@ -54,6 +54,7 @@ namespace dawn_native {
        static BufferBase* MakeError(DeviceBase* device, const BufferDescriptor* descriptor);

        uint64_t GetSize() const;
+        uint64_t GetAllocatedSize() const;
        wgpu::BufferUsage GetUsage() const;

        MaybeError MapAtCreation();
@ -89,6 +90,8 @@ namespace dawn_native {

        MaybeError MapAtCreationInternal();

+        uint64_t mAllocatedSize = 0;
+
      private:
        virtual MaybeError MapAtCreationImpl() = 0;
        virtual MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) = 0;
--- a/src/dawn_native/d3d12/BufferD3D12.cpp
+++ b/src/dawn_native/d3d12/BufferD3D12.cpp
@ -102,17 +102,25 @@ namespace dawn_native { namespace d3d12 {
    }

    MaybeError Buffer::Initialize(bool mappedAtCreation) {
-        D3D12_RESOURCE_DESC resourceDescriptor;
-        resourceDescriptor.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
-        resourceDescriptor.Alignment = 0;
        // D3D buffers are always resource size aligned to 64KB. However, D3D12's validation forbids
        // binding a CBV to an unaligned size. To prevent, one can always safely align the buffer
        // desc size to the CBV data alignment as other buffer usages ignore it (no size check).
        // The validation will still enforce bound checks with the unaligned size returned by
        // GetSize().
        // https://docs.microsoft.com/en-us/windows/win32/direct3d12/uploading-resources#buffer-alignment
-        resourceDescriptor.Width =
-            Align(std::max(GetSize(), uint64_t(4u)), D3D12BufferSizeAlignment(GetUsage()));
+        // Allocate at least 4 bytes so clamped accesses are always in bounds.
+        uint64_t size = std::max(GetSize(), uint64_t(4u));
+        size_t alignment = D3D12BufferSizeAlignment(GetUsage());
+        if (size > std::numeric_limits<uint64_t>::max() - alignment) {
+            // Alignment would overlow.
+            return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
+        }
+        mAllocatedSize = Align(size, alignment);
+
+        D3D12_RESOURCE_DESC resourceDescriptor;
+        resourceDescriptor.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+        resourceDescriptor.Alignment = 0;
+        resourceDescriptor.Width = mAllocatedSize;
        resourceDescriptor.Height = 1;
        resourceDescriptor.DepthOrArraySize = 1;
        resourceDescriptor.MipLevels = 1;
@ -328,7 +336,7 @@ namespace dawn_native { namespace d3d12 {

        // The buffers with mappedAtCreation == true will be initialized in
        // BufferBase::MapAtCreation().
-        DAWN_TRY(MapInternal(true, 0, size_t(GetSize()), "D3D12 map at creation"));
+        DAWN_TRY(MapInternal(true, 0, size_t(GetAllocatedSize()), "D3D12 map at creation"));

        return {};
    }
@ -442,22 +450,23 @@ namespace dawn_native { namespace d3d12 {
        // The state of the buffers on UPLOAD heap must always be GENERIC_READ and cannot be
        // changed away, so we can only clear such buffer with buffer mapping.
        if (D3D12HeapType(GetUsage()) == D3D12_HEAP_TYPE_UPLOAD) {
-            DAWN_TRY(MapInternal(true, 0, size_t(GetSize()), "D3D12 map at clear buffer"));
-            memset(mMappedData, clearValue, GetSize());
+            DAWN_TRY(MapInternal(true, 0, size_t(GetAllocatedSize()), "D3D12 map at clear buffer"));
+            memset(mMappedData, clearValue, GetAllocatedSize());
            UnmapImpl();
        } else {
            // TODO(crbug.com/dawn/852): use ClearUnorderedAccessView*() when the buffer usage
            // includes STORAGE.
            DynamicUploader* uploader = device->GetDynamicUploader();
            UploadHandle uploadHandle;
-            DAWN_TRY_ASSIGN(uploadHandle,
-                            uploader->Allocate(GetSize(), device->GetPendingCommandSerial(),
-                                               kCopyBufferToBufferOffsetAlignment));
+            DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(GetAllocatedSize(),
+                                                             device->GetPendingCommandSerial(),
+                                                             kCopyBufferToBufferOffsetAlignment));

-            memset(uploadHandle.mappedBuffer, clearValue, GetSize());
+            memset(uploadHandle.mappedBuffer, clearValue, GetAllocatedSize());

            device->CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer,
-                                                uploadHandle.startOffset, this, 0, GetSize());
+                                                uploadHandle.startOffset, this, 0,
+                                                GetAllocatedSize());
        }

        return {};
--- a/src/dawn_native/metal/BufferMTL.mm
+++ b/src/dawn_native/metal/BufferMTL.mm
@ -48,21 +48,30 @@ namespace dawn_native { namespace metal {
        if (GetSize() > std::numeric_limits<NSUInteger>::max()) {
            return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
        }
-        NSUInteger currentSize = static_cast<NSUInteger>(std::max(GetSize(), uint64_t(4u)));
+
+        uint32_t alignment = 1;
+#ifdef DAWN_PLATFORM_MACOS
+        // [MTLBlitCommandEncoder fillBuffer] requires the size to be a multiple of 4 on MacOS.
+        alignment = 4;
+#endif

        // Metal validation layer requires the size of uniform buffer and storage buffer to be no
        // less than the size of the buffer block defined in shader, and the overall size of the
        // buffer must be aligned to the largest alignment of its members.
        if (GetUsage() &
            (wgpu::BufferUsage::Uniform | wgpu::BufferUsage::Storage | kInternalStorageBuffer)) {
-            if (currentSize >
-                std::numeric_limits<NSUInteger>::max() - kMinUniformOrStorageBufferAlignment) {
-                // Alignment would overlow.
-                return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
-            }
-            currentSize = Align(currentSize, kMinUniformOrStorageBufferAlignment);
+            ASSERT(IsAligned(kMinUniformOrStorageBufferAlignment, alignment));
+            alignment = kMinUniformOrStorageBufferAlignment;
        }

+        // Allocate at least 4 bytes so clamped accesses are always in bounds.
+        NSUInteger currentSize = static_cast<NSUInteger>(std::max(GetSize(), uint64_t(4u)));
+        if (currentSize > std::numeric_limits<NSUInteger>::max() - alignment) {
+            // Alignment would overlow.
+            return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
+        }
+        currentSize = Align(currentSize, kMinUniformOrStorageBufferAlignment);
+
        if (@available(iOS 12, macOS 10.14, *)) {
            NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength];
            if (currentSize > maxBufferSize) {
@ -83,6 +92,7 @@ namespace dawn_native { namespace metal {
            return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
        }

+        mAllocatedSize = currentSize;
        mMtlBuffer.Acquire([ToBackend(GetDevice())->GetMTLDevice()
            newBufferWithLength:currentSize
                        options:storageMode]);
@ -189,14 +199,9 @@ namespace dawn_native { namespace metal {

    void Buffer::ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue) {
        ASSERT(commandContext != nullptr);
-
-        // Metal validation layer doesn't allow the length of the range in fillBuffer() to be 0.
-        if (GetSize() == 0u) {
-            return;
-        }
-
+        ASSERT(GetAllocatedSize() > 0);
        [commandContext->EnsureBlit() fillBuffer:mMtlBuffer.Get()
-                                           range:NSMakeRange(0, GetSize())
+                                           range:NSMakeRange(0, GetAllocatedSize())
                                           value:clearValue];
    }

--- a/src/dawn_native/null/DeviceNull.cpp
+++ b/src/dawn_native/null/DeviceNull.cpp
@ -265,6 +265,7 @@ namespace dawn_native { namespace null {
    Buffer::Buffer(Device* device, const BufferDescriptor* descriptor)
        : BufferBase(device, descriptor) {
        mBackingData = std::unique_ptr<uint8_t[]>(new uint8_t[GetSize()]);
+        mAllocatedSize = GetSize();
    }

    Buffer::~Buffer() {
--- a/src/dawn_native/opengl/BufferGL.cpp
+++ b/src/dawn_native/opengl/BufferGL.cpp
@ -35,7 +35,8 @@ namespace dawn_native { namespace opengl {

    Buffer::Buffer(Device* device, const BufferDescriptor* descriptor)
        : BufferBase(device, descriptor) {
-        uint64_t size = GetAppliedSize();
+        // Allocate at least 4 bytes so clamped accesses are always in bounds.
+        mAllocatedSize = std::max(GetSize(), uint64_t(4u));

        device->gl.GenBuffers(1, &mBuffer);
        device->gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer);
@ -44,10 +45,11 @@ namespace dawn_native { namespace opengl {
        // BufferBase::MapAtCreation().
        if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting) &&
            !descriptor->mappedAtCreation) {
-            std::vector<uint8_t> clearValues(size, 1u);
-            device->gl.BufferData(GL_ARRAY_BUFFER, size, clearValues.data(), GL_STATIC_DRAW);
+            std::vector<uint8_t> clearValues(mAllocatedSize, 1u);
+            device->gl.BufferData(GL_ARRAY_BUFFER, mAllocatedSize, clearValues.data(),
+                                  GL_STATIC_DRAW);
        } else {
-            device->gl.BufferData(GL_ARRAY_BUFFER, size, nullptr, GL_STATIC_DRAW);
+            device->gl.BufferData(GL_ARRAY_BUFFER, mAllocatedSize, nullptr, GL_STATIC_DRAW);
        }
    }

@ -66,10 +68,6 @@ namespace dawn_native { namespace opengl {
        return mBuffer;
    }

-    uint64_t Buffer::GetAppliedSize() const {
-        return std::max(GetSize(), uint64_t(4u));
-    }
-
    void Buffer::EnsureDataInitialized() {
        if (IsDataInitialized() ||
            !GetDevice()->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse)) {
@ -109,7 +107,7 @@ namespace dawn_native { namespace opengl {
        ASSERT(GetDevice()->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse));
        ASSERT(!IsDataInitialized());

-        const uint64_t size = GetAppliedSize();
+        const uint64_t size = GetAllocatedSize();
        Device* device = ToBackend(GetDevice());

        const std::vector<uint8_t> clearValues(size, 0u);
--- a/src/dawn_native/opengl/BufferGL.h
+++ b/src/dawn_native/opengl/BufferGL.h
@ -47,8 +47,6 @@ namespace dawn_native { namespace opengl {
        MaybeError MapAtCreationImpl() override;
        void* GetMappedPointerImpl() override;

-        uint64_t GetAppliedSize() const;
-
        void InitializeToZero();

        GLuint mBuffer = 0;
--- a/src/dawn_native/vulkan/BufferVk.cpp
+++ b/src/dawn_native/vulkan/BufferVk.cpp
@ -137,13 +137,24 @@ namespace dawn_native { namespace vulkan {
    }

    MaybeError Buffer::Initialize(bool mappedAtCreation) {
+        // Allocate at least 4 bytes so clamped accesses are always in bounds.
+        // Also, Vulkan requires the size to be non-zero.
+        uint64_t size = std::max(GetSize(), uint64_t(4u));
+        // vkCmdFillBuffer requires the size to be a multiple of 4.
+        size_t alignment = 4u;
+        if (size > std::numeric_limits<uint64_t>::max() - alignment) {
+            // Alignment would overlow.
+            return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
+        }
+        mAllocatedSize = Align(size, alignment);
+
        // Avoid passing ludicrously large sizes to drivers because it causes issues: drivers add
        // some constants to the size passed and align it, but for values close to the maximum
        // VkDeviceSize this can cause overflows and makes drivers crash or return bad sizes in the
        // VkmemoryRequirements. See https://gitlab.khronos.org/vulkan/vulkan/issues/1904
        // Any size with one of two top bits of VkDeviceSize set is a HUGE allocation and we can
        // safely return an OOM error.
-        if (GetSize() & (uint64_t(3) << uint64_t(62))) {
+        if (mAllocatedSize & (uint64_t(3) << uint64_t(62))) {
            return DAWN_OUT_OF_MEMORY_ERROR("Buffer size is HUGE and could cause overflows");
        }

@ -151,7 +162,7 @@ namespace dawn_native { namespace vulkan {
        createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
        createInfo.pNext = nullptr;
        createInfo.flags = 0;
-        createInfo.size = std::max(GetSize(), uint64_t(4u));
+        createInfo.size = mAllocatedSize;
        // Add CopyDst for non-mappable buffer initialization with mappedAtCreation
        // and robust resource initialization.
        createInfo.usage = VulkanBufferUsage(GetUsage() | wgpu::BufferUsage::CopyDst);
@ -243,10 +254,8 @@ namespace dawn_native { namespace vulkan {
        barrier->dstQueueFamilyIndex = 0;
        barrier->buffer = mHandle;
        barrier->offset = 0;
-        // Size must be non-zero or VK_WHOLE_SIZE. Use WHOLE_SIZE
-        // instead of GetSize() because the buffer allocation may
-        // be padded.
-        barrier->size = VK_WHOLE_SIZE;
+        // VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it.
+        barrier->size = GetAllocatedSize();

        mLastUsage = usage;

@ -347,18 +356,15 @@ namespace dawn_native { namespace vulkan {

    void Buffer::ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue) {
        ASSERT(recordingContext != nullptr);
-
-        // Vulkan validation layer doesn't allow the `size` in vkCmdFillBuffer() to be 0.
-        if (GetSize() == 0u) {
-            return;
-        }
+        ASSERT(GetAllocatedSize() > 0);

        TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst);

        Device* device = ToBackend(GetDevice());
-        // This code is fine. According to jiawei.shao@intel.com, VK_WHOLE_SIZE doesn't work
-        // on old Windows Intel Vulkan drivers, so we don't use it.
-        device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, 0, GetSize(),
+        // VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it.
+        // Note: Allocated size must be a multiple of 4.
+        ASSERT(GetAllocatedSize() % 4 == 0);
+        device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, 0, GetAllocatedSize(),
                                 clearValue);
    }
 }}  // namespace dawn_native::vulkan