From 61d6d21091713d89299195b938898d290738e166 Mon Sep 17 00:00:00 2001 From: jchen10 Date: Thu, 3 Nov 2022 09:58:59 +0000 Subject: [PATCH] Byte counting WriteBuffer/Texture to submit Only tag to submit when the total size is larger than the threshold, so that we can make as few submits as possible meanwhile avoiding OOM. Bug: chromium:1258986 Change-Id: I7190e1bb942bfaffc5cd424ce4743173735b25e3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106418 Reviewed-by: Corentin Wallez Kokoro: Kokoro Reviewed-by: Austin Eng Commit-Queue: Jie A Chen --- src/dawn/native/Device.cpp | 24 +++++++ src/dawn/native/Device.h | 37 ++++++++--- src/dawn/native/DynamicUploader.cpp | 21 +++++++ src/dawn/native/DynamicUploader.h | 3 + src/dawn/native/Queue.cpp | 1 + src/dawn/native/d3d12/BufferD3D12.cpp | 4 +- .../native/d3d12/CommandRecordingContext.cpp | 11 ++++ .../native/d3d12/CommandRecordingContext.h | 3 + src/dawn/native/d3d12/DeviceD3D12.cpp | 60 ++++++++++-------- src/dawn/native/d3d12/DeviceD3D12.h | 31 +++++----- .../native/metal/CommandRecordingContext.h | 3 + .../native/metal/CommandRecordingContext.mm | 9 +++ src/dawn/native/metal/DeviceMTL.h | 23 ++++--- src/dawn/native/metal/DeviceMTL.mm | 62 ++++++++++++------- src/dawn/native/null/DeviceNull.cpp | 20 +++--- src/dawn/native/null/DeviceNull.h | 20 +++--- src/dawn/native/opengl/DeviceGL.cpp | 20 +++--- src/dawn/native/opengl/DeviceGL.h | 19 +++--- .../native/vulkan/CommandRecordingContext.h | 1 + src/dawn/native/vulkan/DeviceVk.cpp | 41 +++++++----- src/dawn/native/vulkan/DeviceVk.h | 23 ++++--- .../tests/unittests/native/mocks/DeviceMock.h | 5 +- 22 files changed, 292 insertions(+), 149 deletions(-) diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp index 3ec0328a88..a4e2e1746c 100644 --- a/src/dawn/native/Device.cpp +++ b/src/dawn/native/Device.cpp @@ -1918,4 +1918,28 @@ ExecutionSerial DeviceBase::GetScheduledWorkDoneSerial() const { return HasPendingCommands() ? GetPendingCommandSerial() : GetLastSubmittedCommandSerial(); } +MaybeError DeviceBase::CopyFromStagingToBuffer(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) { + DAWN_TRY( + CopyFromStagingToBufferImpl(source, sourceOffset, destination, destinationOffset, size)); + if (GetDynamicUploader()->ShouldFlush()) { + ForceEventualFlushOfCommands(); + } + return {}; +} + +MaybeError DeviceBase::CopyFromStagingToTexture(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) { + DAWN_TRY(CopyFromStagingToTextureImpl(source, src, dst, copySizePixels)); + if (GetDynamicUploader()->ShouldFlush()) { + ForceEventualFlushOfCommands(); + } + return {}; +} + } // namespace dawn::native diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h index 28ea5b7f81..6fd0d5a221 100644 --- a/src/dawn/native/Device.h +++ b/src/dawn/native/Device.h @@ -298,15 +298,15 @@ class DeviceBase : public RefCountedWithExternalCount { void StoreCachedBlob(const CacheKey& key, const Blob& blob); virtual ResultOrError> CreateStagingBuffer(size_t size) = 0; - virtual MaybeError CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) = 0; - virtual MaybeError CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) = 0; + MaybeError CopyFromStagingToBuffer(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size); + MaybeError CopyFromStagingToTexture(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels); DynamicUploader* GetDynamicUploader() const; @@ -405,6 +405,15 @@ class DeviceBase : public RefCountedWithExternalCount { // The serial by which time all currently submitted or pending operations will be completed. ExecutionSerial GetScheduledWorkDoneSerial() const; + // For the commands being internally recorded in backend, that were not urgent to submit, this + // method makes them to be submitted as soon as possbile in next ticks. + virtual void ForceEventualFlushOfCommands() = 0; + + // In the 'Normal' mode, currently recorded commands in the backend normally will be actually + // submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed + // as late as possible, for example, until the client has explictly issued a submission. + enum class SubmitMode { Normal, Passive }; + protected: // Constructor used only for mocking and testing. DeviceBase(); @@ -515,6 +524,16 @@ class DeviceBase : public RefCountedWithExternalCount { // Indicates whether the backend has pending commands to be submitted as soon as possible. virtual bool HasPendingCommands() const = 0; + virtual MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) = 0; + virtual MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) = 0; + wgpu::ErrorCallback mUncapturedErrorCallback = nullptr; void* mUncapturedErrorUserdata = nullptr; diff --git a/src/dawn/native/DynamicUploader.cpp b/src/dawn/native/DynamicUploader.cpp index bae374f2cb..9dca0577c5 100644 --- a/src/dawn/native/DynamicUploader.cpp +++ b/src/dawn/native/DynamicUploader.cpp @@ -126,4 +126,25 @@ ResultOrError DynamicUploader::Allocate(uint64_t allocationSize, uploadHandle.startOffset += additionalOffset; return uploadHandle; } + +bool DynamicUploader::ShouldFlush() { + uint64_t kTotalAllocatedSizeThreshold = 64 * 1024 * 1024; + // We use total allocated size instead of pending-upload size to prevent Dawn from allocating + // too much GPU memory so that the risk of OOM can be minimized. + return GetTotalAllocatedSize() > kTotalAllocatedSizeThreshold; +} + +uint64_t DynamicUploader::GetTotalAllocatedSize() { + uint64_t size = 0; + for (const auto& buffer : mReleasedStagingBuffers.IterateAll()) { + size += buffer->GetSize(); + } + for (const auto& buffer : mRingBuffers) { + if (buffer->mStagingBuffer != nullptr) { + size += buffer->mStagingBuffer->GetSize(); + } + } + return size; +} + } // namespace dawn::native diff --git a/src/dawn/native/DynamicUploader.h b/src/dawn/native/DynamicUploader.h index 0317e8de5e..9cc6a831c1 100644 --- a/src/dawn/native/DynamicUploader.h +++ b/src/dawn/native/DynamicUploader.h @@ -49,8 +49,11 @@ class DynamicUploader { uint64_t offsetAlignment); void Deallocate(ExecutionSerial lastCompletedSerial); + bool ShouldFlush(); + private: static constexpr uint64_t kRingBufferSize = 4 * 1024 * 1024; + uint64_t GetTotalAllocatedSize(); struct RingBuffer { std::unique_ptr mStagingBuffer; diff --git a/src/dawn/native/Queue.cpp b/src/dawn/native/Queue.cpp index 801e8edf13..d1d6025d43 100644 --- a/src/dawn/native/Queue.cpp +++ b/src/dawn/native/Queue.cpp @@ -225,6 +225,7 @@ void QueueBase::APIOnSubmittedWorkDone(uint64_t signalValue, } void QueueBase::TrackTask(std::unique_ptr task) { + GetDevice()->ForceEventualFlushOfCommands(); // we can move the task to the callback task manager, as it's ready to be called if there are no // scheduled commands. if (!GetDevice()->HasScheduledCommands()) { diff --git a/src/dawn/native/d3d12/BufferD3D12.cpp b/src/dawn/native/d3d12/BufferD3D12.cpp index f880760839..eeb19c351f 100644 --- a/src/dawn/native/d3d12/BufferD3D12.cpp +++ b/src/dawn/native/d3d12/BufferD3D12.cpp @@ -488,8 +488,8 @@ MaybeError Buffer::ClearBuffer(CommandRecordingContext* commandContext, memset(uploadHandle.mappedBuffer, clearValue, size); - device->CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer, - uploadHandle.startOffset, this, offset, size); + device->CopyFromStagingToBufferHelper(commandContext, uploadHandle.stagingBuffer, + uploadHandle.startOffset, this, offset, size); } return {}; diff --git a/src/dawn/native/d3d12/CommandRecordingContext.cpp b/src/dawn/native/d3d12/CommandRecordingContext.cpp index cd44e4aed1..6a368a07b8 100644 --- a/src/dawn/native/d3d12/CommandRecordingContext.cpp +++ b/src/dawn/native/d3d12/CommandRecordingContext.cpp @@ -60,6 +60,7 @@ MaybeError CommandRecordingContext::Open(ID3D12Device* d3d12Device, } mIsOpen = true; + mNeedsSubmit = false; return {}; } @@ -128,6 +129,7 @@ MaybeError CommandRecordingContext::ExecuteCommandList(Device* device) { } mIsOpen = false; + mNeedsSubmit = false; mSharedTextures.clear(); mHeapsPendingUsage.clear(); mTempBuffers.clear(); @@ -162,6 +164,7 @@ void CommandRecordingContext::Release() { mD3d12CommandList.Reset(); mD3d12CommandList4.Reset(); mIsOpen = false; + mNeedsSubmit = false; mSharedTextures.clear(); mHeapsPendingUsage.clear(); mTempBuffers.clear(); @@ -171,6 +174,14 @@ bool CommandRecordingContext::IsOpen() const { return mIsOpen; } +bool CommandRecordingContext::NeedsSubmit() const { + return mNeedsSubmit; +} + +void CommandRecordingContext::SetNeedsSubmit() { + mNeedsSubmit = true; +} + void CommandRecordingContext::AddToTempBuffers(Ref tempBuffer) { mTempBuffers.emplace_back(tempBuffer); } diff --git a/src/dawn/native/d3d12/CommandRecordingContext.h b/src/dawn/native/d3d12/CommandRecordingContext.h index 80b6204e1b..49a41c1c95 100644 --- a/src/dawn/native/d3d12/CommandRecordingContext.h +++ b/src/dawn/native/d3d12/CommandRecordingContext.h @@ -37,6 +37,8 @@ class CommandRecordingContext { ID3D12GraphicsCommandList4* GetCommandList4() const; void Release(); bool IsOpen() const; + bool NeedsSubmit() const; + void SetNeedsSubmit(); MaybeError ExecuteCommandList(Device* device); @@ -48,6 +50,7 @@ class CommandRecordingContext { ComPtr mD3d12CommandList; ComPtr mD3d12CommandList4; bool mIsOpen = false; + bool mNeedsSubmit = false; std::set mSharedTextures; std::vector mHeapsPendingUsage; diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp index 73bb13bd35..d26fefd049 100644 --- a/src/dawn/native/d3d12/DeviceD3D12.cpp +++ b/src/dawn/native/d3d12/DeviceD3D12.cpp @@ -274,12 +274,16 @@ ResidencyManager* Device::GetResidencyManager() const { return mResidencyManager.get(); } -ResultOrError Device::GetPendingCommandContext() { +ResultOrError Device::GetPendingCommandContext( + Device::SubmitMode submitMode) { // Callers of GetPendingCommandList do so to record commands. Only reserve a command // allocator when it is needed so we don't submit empty command lists if (!mPendingCommands.IsOpen()) { DAWN_TRY(mPendingCommands.Open(mD3d12Device.Get(), mCommandAllocatorManager.get())); } + if (submitMode == Device::SubmitMode::Normal) { + mPendingCommands.SetNeedsSubmit(); + } return &mPendingCommands; } @@ -309,9 +313,9 @@ MaybeError Device::ClearBufferToZero(CommandRecordingContext* commandContext, memset(uploadHandle.mappedBuffer, 0u, kZeroBufferSize); - CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer, - uploadHandle.startOffset, mZeroBuffer.Get(), 0, - kZeroBufferSize); + CopyFromStagingToBufferHelper(commandContext, uploadHandle.stagingBuffer, + uploadHandle.startOffset, mZeroBuffer.Get(), 0, + kZeroBufferSize); mZeroBuffer->SetIsDataInitialized(); } @@ -346,7 +350,7 @@ MaybeError Device::TickImpl() { mDepthStencilViewAllocator->Tick(completedSerial); mUsedComObjectRefs.ClearUpTo(completedSerial); - if (mPendingCommands.IsOpen()) { + if (mPendingCommands.IsOpen() && mPendingCommands.NeedsSubmit()) { DAWN_TRY(ExecutePendingCommandContext()); DAWN_TRY(NextSerial()); } @@ -401,7 +405,13 @@ void Device::ReferenceUntilUnused(ComPtr object) { } bool Device::HasPendingCommands() const { - return mPendingCommands.IsOpen(); + return mPendingCommands.NeedsSubmit(); +} + +void Device::ForceEventualFlushOfCommands() { + if (mPendingCommands.IsOpen()) { + mPendingCommands.SetNeedsSubmit(); + } } MaybeError Device::ExecutePendingCommandContext() { @@ -484,13 +494,13 @@ ResultOrError> Device::CreateStagingBuffer(si return std::move(stagingBuffer); } -MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) { +MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) { CommandRecordingContext* commandRecordingContext; - DAWN_TRY_ASSIGN(commandRecordingContext, GetPendingCommandContext()); + DAWN_TRY_ASSIGN(commandRecordingContext, GetPendingCommandContext(Device::SubmitMode::Passive)); Buffer* dstBuffer = ToBackend(destination); @@ -499,18 +509,18 @@ MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, commandRecordingContext, destinationOffset, size)); DAWN_UNUSED(cleared); - CopyFromStagingToBufferImpl(commandRecordingContext, source, sourceOffset, destination, - destinationOffset, size); + CopyFromStagingToBufferHelper(commandRecordingContext, source, sourceOffset, destination, + destinationOffset, size); return {}; } -void Device::CopyFromStagingToBufferImpl(CommandRecordingContext* commandContext, - StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) { +void Device::CopyFromStagingToBufferHelper(CommandRecordingContext* commandContext, + StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) { ASSERT(commandContext != nullptr); Buffer* dstBuffer = ToBackend(destination); StagingBuffer* srcBuffer = ToBackend(source); @@ -521,12 +531,12 @@ void Device::CopyFromStagingToBufferImpl(CommandRecordingContext* commandContext sourceOffset, size); } -MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) { +MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) { CommandRecordingContext* commandContext; - DAWN_TRY_ASSIGN(commandContext, GetPendingCommandContext()); + DAWN_TRY_ASSIGN(commandContext, GetPendingCommandContext(Device::SubmitMode::Passive)); Texture* texture = ToBackend(dst->texture.Get()); SubresourceRange range = GetSubresourcesAffectedByCopy(*dst, copySizePixels); diff --git a/src/dawn/native/d3d12/DeviceD3D12.h b/src/dawn/native/d3d12/DeviceD3D12.h index 70f6fab308..ccd5cb01d6 100644 --- a/src/dawn/native/d3d12/DeviceD3D12.h +++ b/src/dawn/native/d3d12/DeviceD3D12.h @@ -77,7 +77,8 @@ class Device final : public DeviceBase { ComPtr GetDxcCompiler() const; ComPtr GetDxcValidator() const; - ResultOrError GetPendingCommandContext(); + ResultOrError GetPendingCommandContext( + Device::SubmitMode submitMode = Device::SubmitMode::Normal); MaybeError ClearBufferToZero(CommandRecordingContext* commandContext, BufferBase* destination, @@ -91,26 +92,28 @@ class Device final : public DeviceBase { void ReferenceUntilUnused(ComPtr object); + void ForceEventualFlushOfCommands() override; + MaybeError ExecutePendingCommandContext(); ResultOrError> CreateStagingBuffer(size_t size) override; - MaybeError CopyFromStagingToBuffer(StagingBufferBase* source, + MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) override; + + void CopyFromStagingToBufferHelper(CommandRecordingContext* commandContext, + StagingBufferBase* source, uint64_t sourceOffset, BufferBase* destination, uint64_t destinationOffset, - uint64_t size) override; + uint64_t size); - void CopyFromStagingToBufferImpl(CommandRecordingContext* commandContext, - StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size); - - MaybeError CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) override; + MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) override; ResultOrError AllocateMemory( D3D12_HEAP_TYPE heapType, diff --git a/src/dawn/native/metal/CommandRecordingContext.h b/src/dawn/native/metal/CommandRecordingContext.h index 025c88bbe0..ca096bb535 100644 --- a/src/dawn/native/metal/CommandRecordingContext.h +++ b/src/dawn/native/metal/CommandRecordingContext.h @@ -30,6 +30,8 @@ class CommandRecordingContext : NonMovable { ~CommandRecordingContext(); id GetCommands(); + void SetNeedsSubmit(); + bool NeedsSubmit() const; void MarkUsed(); bool WasUsed() const; @@ -59,6 +61,7 @@ class CommandRecordingContext : NonMovable { NSPRef> mCompute; NSPRef> mRender; bool mInEncoder = false; + bool mNeedsSubmit = false; bool mUsed = false; }; diff --git a/src/dawn/native/metal/CommandRecordingContext.mm b/src/dawn/native/metal/CommandRecordingContext.mm index 294e53f8ea..ad5ab82d3f 100644 --- a/src/dawn/native/metal/CommandRecordingContext.mm +++ b/src/dawn/native/metal/CommandRecordingContext.mm @@ -29,6 +29,13 @@ id CommandRecordingContext::GetCommands() { return mCommands.Get(); } +void CommandRecordingContext::SetNeedsSubmit() { + mNeedsSubmit = true; +} +bool CommandRecordingContext::NeedsSubmit() const { + return mNeedsSubmit; +} + void CommandRecordingContext::MarkUsed() { mUsed = true; } @@ -38,6 +45,7 @@ bool CommandRecordingContext::WasUsed() const { MaybeError CommandRecordingContext::PrepareNextCommandBuffer(id queue) { ASSERT(mCommands == nil); + ASSERT(!mNeedsSubmit); ASSERT(!mUsed); // The MTLCommandBuffer will be autoreleased by default. @@ -58,6 +66,7 @@ NSPRef> CommandRecordingContext::AcquireCommands() { } ASSERT(!mInEncoder); + mNeedsSubmit = false; mUsed = false; return std::move(mCommands); } diff --git a/src/dawn/native/metal/DeviceMTL.h b/src/dawn/native/metal/DeviceMTL.h index e6c14eae99..fef04bf7a6 100644 --- a/src/dawn/native/metal/DeviceMTL.h +++ b/src/dawn/native/metal/DeviceMTL.h @@ -49,7 +49,8 @@ class Device final : public DeviceBase { id GetMTLDevice(); id GetMTLQueue(); - CommandRecordingContext* GetPendingCommandContext(); + CommandRecordingContext* GetPendingCommandContext( + Device::SubmitMode submitMode = Device::SubmitMode::Normal); MaybeError SubmitPendingCommandBuffer(); Ref CreateTextureWrappingIOSurface(const ExternalImageDescriptor* descriptor, @@ -57,15 +58,15 @@ class Device final : public DeviceBase { void WaitForCommandsToBeScheduled(); ResultOrError> CreateStagingBuffer(size_t size) override; - MaybeError CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) override; - MaybeError CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& dataLayout, - TextureCopy* dst, - const Extent3D& copySizePixels) override; + MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) override; + MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& dataLayout, + TextureCopy* dst, + const Extent3D& copySizePixels) override; uint32_t GetOptimalBytesPerRowAlignment() const override; uint64_t GetOptimalBufferToTextureCopyOffsetAlignment() const override; @@ -79,6 +80,8 @@ class Device final : public DeviceBase { // single-byte buffer id GetDummyBlitMtlBuffer(); + void ForceEventualFlushOfCommands() override; + private: Device(AdapterBase* adapter, NSPRef> mtlDevice, diff --git a/src/dawn/native/metal/DeviceMTL.mm b/src/dawn/native/metal/DeviceMTL.mm index 5adfefbdf5..ee4952ff85 100644 --- a/src/dawn/native/metal/DeviceMTL.mm +++ b/src/dawn/native/metal/DeviceMTL.mm @@ -343,7 +343,9 @@ ResultOrError Device::CheckAndUpdateCompletedSerials() { } MaybeError Device::TickImpl() { - DAWN_TRY(SubmitPendingCommandBuffer()); + if (mCommandContext.NeedsSubmit()) { + DAWN_TRY(SubmitPendingCommandBuffer()); + } // Just run timestamp period calculation when timestamp feature is enabled and timestamp // conversion is not disabled. @@ -366,17 +368,26 @@ id Device::GetMTLQueue() { return mCommandQueue.Get(); } -CommandRecordingContext* Device::GetPendingCommandContext() { +CommandRecordingContext* Device::GetPendingCommandContext(Device::SubmitMode submitMode) { + if (submitMode == DeviceBase::SubmitMode::Normal) { + mCommandContext.SetNeedsSubmit(); + } mCommandContext.MarkUsed(); return &mCommandContext; } bool Device::HasPendingCommands() const { - return mCommandContext.WasUsed(); + return mCommandContext.NeedsSubmit(); +} + +void Device::ForceEventualFlushOfCommands() { + if (mCommandContext.WasUsed()) { + mCommandContext.SetNeedsSubmit(); + } } MaybeError Device::SubmitPendingCommandBuffer() { - if (!mCommandContext.WasUsed()) { + if (!mCommandContext.NeedsSubmit()) { return {}; } @@ -428,42 +439,45 @@ ResultOrError> Device::CreateStagingBuffer(si return std::move(stagingBuffer); } -MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) { +MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) { // Metal validation layers forbid 0-sized copies, assert it is skipped prior to calling // this function. ASSERT(size != 0); ToBackend(destination) - ->EnsureDataInitializedAsDestination(GetPendingCommandContext(), destinationOffset, size); + ->EnsureDataInitializedAsDestination( + GetPendingCommandContext(DeviceBase::SubmitMode::Passive), destinationOffset, size); id uploadBuffer = ToBackend(source)->GetBufferHandle(); id buffer = ToBackend(destination)->GetMTLBuffer(); - [GetPendingCommandContext()->EnsureBlit() copyFromBuffer:uploadBuffer - sourceOffset:sourceOffset - toBuffer:buffer - destinationOffset:destinationOffset - size:size]; + [GetPendingCommandContext(DeviceBase::SubmitMode::Passive)->EnsureBlit() + copyFromBuffer:uploadBuffer + sourceOffset:sourceOffset + toBuffer:buffer + destinationOffset:destinationOffset + size:size]; return {}; } // In Metal we don't write from the CPU to the texture directly which can be done using the // replaceRegion function, because the function requires a non-private storage mode and Dawn // sets the private storage mode by default for all textures except IOSurfaces on macOS. -MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& dataLayout, - TextureCopy* dst, - const Extent3D& copySizePixels) { +MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& dataLayout, + TextureCopy* dst, + const Extent3D& copySizePixels) { Texture* texture = ToBackend(dst->texture.Get()); - EnsureDestinationTextureInitialized(GetPendingCommandContext(), texture, *dst, copySizePixels); + EnsureDestinationTextureInitialized(GetPendingCommandContext(DeviceBase::SubmitMode::Passive), + texture, *dst, copySizePixels); - RecordCopyBufferToTexture(GetPendingCommandContext(), ToBackend(source)->GetBufferHandle(), - source->GetSize(), dataLayout.offset, dataLayout.bytesPerRow, - dataLayout.rowsPerImage, texture, dst->mipLevel, dst->origin, - dst->aspect, copySizePixels); + RecordCopyBufferToTexture(GetPendingCommandContext(DeviceBase::SubmitMode::Passive), + ToBackend(source)->GetBufferHandle(), source->GetSize(), + dataLayout.offset, dataLayout.bytesPerRow, dataLayout.rowsPerImage, + texture, dst->mipLevel, dst->origin, dst->aspect, copySizePixels); return {}; } diff --git a/src/dawn/native/null/DeviceNull.cpp b/src/dawn/native/null/DeviceNull.cpp index 90cc9626cc..e5b84d8faa 100644 --- a/src/dawn/native/null/DeviceNull.cpp +++ b/src/dawn/native/null/DeviceNull.cpp @@ -217,11 +217,11 @@ bool Device::HasPendingCommands() const { return false; } -MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) { +MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) { if (IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse)) { destination->SetIsDataInitialized(); } @@ -238,10 +238,10 @@ MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, return {}; } -MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) { +MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) { return {}; } @@ -556,6 +556,8 @@ float Device::GetTimestampPeriodInNS() const { return 1.0f; } +void Device::ForceEventualFlushOfCommands() {} + Texture::Texture(DeviceBase* device, const TextureDescriptor* descriptor, TextureState state) : TextureBase(device, descriptor, state) {} diff --git a/src/dawn/native/null/DeviceNull.h b/src/dawn/native/null/DeviceNull.h index e2d6036666..51274ab79c 100644 --- a/src/dawn/native/null/DeviceNull.h +++ b/src/dawn/native/null/DeviceNull.h @@ -106,15 +106,15 @@ class Device final : public DeviceBase { MaybeError SubmitPendingOperations(); ResultOrError> CreateStagingBuffer(size_t size) override; - MaybeError CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) override; - MaybeError CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) override; + MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) override; + MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) override; MaybeError IncrementMemoryUsage(uint64_t bytes); void DecrementMemoryUsage(uint64_t bytes); @@ -124,6 +124,8 @@ class Device final : public DeviceBase { float GetTimestampPeriodInNS() const override; + void ForceEventualFlushOfCommands() override; + private: using DeviceBase::DeviceBase; diff --git a/src/dawn/native/opengl/DeviceGL.cpp b/src/dawn/native/opengl/DeviceGL.cpp index 7ca79ab08c..b663841076 100644 --- a/src/dawn/native/opengl/DeviceGL.cpp +++ b/src/dawn/native/opengl/DeviceGL.cpp @@ -418,18 +418,18 @@ ResultOrError> Device::CreateStagingBuffer(si return DAWN_UNIMPLEMENTED_ERROR("Device unable to create staging buffer."); } -MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) { +MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) { return DAWN_UNIMPLEMENTED_ERROR("Device unable to copy from staging buffer."); } -MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) { +MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) { return DAWN_UNIMPLEMENTED_ERROR("Device unable to copy from staging buffer to texture."); } @@ -464,6 +464,8 @@ float Device::GetTimestampPeriodInNS() const { return 1.0f; } +void Device::ForceEventualFlushOfCommands() {} + const OpenGLFunctions& Device::GetGL() const { if (mContext) { mContext->MakeCurrent(); diff --git a/src/dawn/native/opengl/DeviceGL.h b/src/dawn/native/opengl/DeviceGL.h index 1831774a44..8c544f5d10 100644 --- a/src/dawn/native/opengl/DeviceGL.h +++ b/src/dawn/native/opengl/DeviceGL.h @@ -68,21 +68,22 @@ class Device final : public DeviceBase { MaybeError TickImpl() override; ResultOrError> CreateStagingBuffer(size_t size) override; - MaybeError CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) override; + MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) override; - MaybeError CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) override; + MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) override; uint32_t GetOptimalBytesPerRowAlignment() const override; uint64_t GetOptimalBufferToTextureCopyOffsetAlignment() const override; float GetTimestampPeriodInNS() const override; + void ForceEventualFlushOfCommands() override; class Context { public: diff --git a/src/dawn/native/vulkan/CommandRecordingContext.h b/src/dawn/native/vulkan/CommandRecordingContext.h index c8fb5445c8..7948e3b19d 100644 --- a/src/dawn/native/vulkan/CommandRecordingContext.h +++ b/src/dawn/native/vulkan/CommandRecordingContext.h @@ -40,6 +40,7 @@ struct CommandRecordingContext { // For Device state tracking only. VkCommandPool commandPool = VK_NULL_HANDLE; + bool needsSubmit = false; bool used = false; // In some cases command buffer will need to be split to accomodate driver bug workarounds. diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp index be8dc51719..ffb058d4df 100644 --- a/src/dawn/native/vulkan/DeviceVk.cpp +++ b/src/dawn/native/vulkan/DeviceVk.cpp @@ -230,7 +230,7 @@ MaybeError Device::TickImpl() { mDeleter->Tick(completedSerial); mDescriptorAllocatorsPendingDeallocation.ClearUpTo(completedSerial); - if (mRecordingContext.used) { + if (mRecordingContext.needsSubmit) { DAWN_TRY(SubmitPendingCommands()); } @@ -282,18 +282,23 @@ void Device::EnqueueDeferredDeallocation(DescriptorSetAllocator* allocator) { mDescriptorAllocatorsPendingDeallocation.Enqueue(allocator, GetPendingCommandSerial()); } -CommandRecordingContext* Device::GetPendingRecordingContext() { +CommandRecordingContext* Device::GetPendingRecordingContext(Device::SubmitMode submitMode) { ASSERT(mRecordingContext.commandBuffer != VK_NULL_HANDLE); + mRecordingContext.needsSubmit |= (submitMode == DeviceBase::SubmitMode::Normal); mRecordingContext.used = true; return &mRecordingContext; } bool Device::HasPendingCommands() const { - return mRecordingContext.used; + return mRecordingContext.needsSubmit; +} + +void Device::ForceEventualFlushOfCommands() { + mRecordingContext.needsSubmit |= mRecordingContext.used; } MaybeError Device::SubmitPendingCommands() { - if (!mRecordingContext.used) { + if (!mRecordingContext.needsSubmit) { return {}; } @@ -705,7 +710,7 @@ ResultOrError Device::CheckAndUpdateCompletedSerials() { } MaybeError Device::PrepareRecordingContext() { - ASSERT(!mRecordingContext.used); + ASSERT(!mRecordingContext.needsSubmit); ASSERT(mRecordingContext.commandBuffer == VK_NULL_HANDLE); ASSERT(mRecordingContext.commandPool == VK_NULL_HANDLE); @@ -812,16 +817,17 @@ ResultOrError> Device::CreateStagingBuffer(si return std::move(stagingBuffer); } -MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) { +MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) { // It is a validation error to do a 0-sized copy in Vulkan, check it is skipped prior to // calling this function. ASSERT(size != 0); - CommandRecordingContext* recordingContext = GetPendingRecordingContext(); + CommandRecordingContext* recordingContext = + GetPendingRecordingContext(DeviceBase::SubmitMode::Passive); ToBackend(destination) ->EnsureDataInitializedAsDestination(recordingContext, destinationOffset, size); @@ -845,15 +851,16 @@ MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, return {}; } -MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) { +MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) { // There is no need of a barrier to make host writes available and visible to the copy // operation for HOST_COHERENT memory. The Vulkan spec for vkQueueSubmit describes that it // does an implicit availability, visibility and domain operation. - CommandRecordingContext* recordingContext = GetPendingRecordingContext(); + CommandRecordingContext* recordingContext = + GetPendingRecordingContext(DeviceBase::SubmitMode::Passive); VkBufferImageCopy region = ComputeBufferImageCopyRegion(src, *dst, copySizePixels); VkImageSubresourceLayers subresource = region.imageSubresource; @@ -1118,7 +1125,7 @@ void Device::DestroyImpl() { ToBackend(GetAdapter())->GetVulkanInstance()->StopListeningForDeviceMessages(this); // Immediately tag the recording context as unused so we don't try to submit it in Tick. - mRecordingContext.used = false; + mRecordingContext.needsSubmit = false; if (mRecordingContext.commandPool != VK_NULL_HANDLE) { // The VkCommandBuffer memory should be wholly owned by the pool and freed when it is // destroyed, but that's not the case in some drivers and the leak memory. diff --git a/src/dawn/native/vulkan/DeviceVk.h b/src/dawn/native/vulkan/DeviceVk.h index 963fcaa15a..d9c01db213 100644 --- a/src/dawn/native/vulkan/DeviceVk.h +++ b/src/dawn/native/vulkan/DeviceVk.h @@ -65,7 +65,8 @@ class Device final : public DeviceBase { ResourceMemoryAllocator* GetResourceMemoryAllocator() const; external_semaphore::Service* GetExternalSemaphoreService() const; - CommandRecordingContext* GetPendingRecordingContext(); + CommandRecordingContext* GetPendingRecordingContext( + Device::SubmitMode submitMode = Device::SubmitMode::Normal); MaybeError SplitRecordingContext(CommandRecordingContext* recordingContext); MaybeError SubmitPendingCommands(); @@ -89,15 +90,15 @@ class Device final : public DeviceBase { MaybeError TickImpl() override; ResultOrError> CreateStagingBuffer(size_t size) override; - MaybeError CopyFromStagingToBuffer(StagingBufferBase* source, - uint64_t sourceOffset, - BufferBase* destination, - uint64_t destinationOffset, - uint64_t size) override; - MaybeError CopyFromStagingToTexture(const StagingBufferBase* source, - const TextureDataLayout& src, - TextureCopy* dst, - const Extent3D& copySizePixels) override; + MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) override; + MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source, + const TextureDataLayout& src, + TextureCopy* dst, + const Extent3D& copySizePixels) override; // Return the fixed subgroup size to use for compute shaders on this device or 0 if none // needs to be set. @@ -115,6 +116,8 @@ class Device final : public DeviceBase { // Used to associate this device with validation layer messages. const char* GetDebugPrefix() { return mDebugPrefix.c_str(); } + void ForceEventualFlushOfCommands() override; + private: Device(Adapter* adapter, const DeviceDescriptor* descriptor, diff --git a/src/dawn/tests/unittests/native/mocks/DeviceMock.h b/src/dawn/tests/unittests/native/mocks/DeviceMock.h index 309f2d9c0d..d3a3a84d51 100644 --- a/src/dawn/tests/unittests/native/mocks/DeviceMock.h +++ b/src/dawn/tests/unittests/native/mocks/DeviceMock.h @@ -39,11 +39,11 @@ class DeviceMock : public DeviceBase { (size_t), (override)); MOCK_METHOD(MaybeError, - CopyFromStagingToBuffer, + CopyFromStagingToBufferImpl, (StagingBufferBase*, uint64_t, BufferBase*, uint64_t, uint64_t), (override)); MOCK_METHOD(MaybeError, - CopyFromStagingToTexture, + CopyFromStagingToTextureImpl, (const StagingBufferBase*, const TextureDataLayout&, TextureCopy*, const Extent3D&), (override)); @@ -51,6 +51,7 @@ class DeviceMock : public DeviceBase { MOCK_METHOD(uint64_t, GetOptimalBufferToTextureCopyOffsetAlignment, (), (const, override)); MOCK_METHOD(float, GetTimestampPeriodInNS, (), (const, override)); + MOCK_METHOD(void, ForceEventualFlushOfCommands, (), (override)); MOCK_METHOD(ResultOrError>, CreateBindGroupImpl,