From e3eb03f8a3133f1b1c2ddc28e48caf29c8d2edaa Mon Sep 17 00:00:00 2001 From: Peng Huang Date: Thu, 19 Jan 2023 20:33:32 +0000 Subject: [PATCH] Add the last usage serial in Buffer Add the last usage serial in Buffer, it is used for optimizing MapAsync(), so the callback of MapAsync() is called when the last usage serial is done instead of using the current pending serial when the MapAsync() is called. Bug: b/265151060 Change-Id: Ibc95d4e41d41896f0a49b0fd1068912b46ea14e1 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/116693 Commit-Queue: Peng Huang Kokoro: Kokoro Reviewed-by: Austin Eng --- src/dawn/native/Buffer.cpp | 13 +- src/dawn/native/Buffer.h | 3 + src/dawn/native/CommandEncoder.cpp | 30 ++-- src/dawn/native/Queue.cpp | 41 ++++-- src/dawn/native/Queue.h | 7 +- src/dawn/native/d3d12/BufferD3D12.cpp | 28 ++-- src/dawn/native/d3d12/BufferD3D12.h | 4 - src/dawn/native/metal/BufferMTL.h | 1 + src/dawn/native/metal/BufferMTL.mm | 5 + src/dawn/native/metal/CommandBufferMTL.mm | 23 ++- src/dawn/native/metal/DeviceMTL.mm | 5 +- src/dawn/native/opengl/BufferGL.cpp | 2 + src/dawn/native/vulkan/BufferVk.cpp | 12 +- src/dawn/native/vulkan/BufferVk.h | 8 +- src/dawn/native/vulkan/CommandBufferVk.cpp | 4 +- src/dawn/tests/end2end/BufferTests.cpp | 158 +++++++++++++++++++++ 16 files changed, 272 insertions(+), 72 deletions(-) diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp index cd796560c0..0e8828bdbc 100644 --- a/src/dawn/native/Buffer.cpp +++ b/src/dawn/native/Buffer.cpp @@ -455,8 +455,12 @@ void BufferBase::APIMapAsync(wgpu::MapMode mode, std::unique_ptr request = std::make_unique(GetDevice()->GetPlatform(), this, mLastMapID); TRACE_EVENT1(GetDevice()->GetPlatform(), General, "Buffer::APIMapAsync", "serial", - uint64_t(GetDevice()->GetPendingCommandSerial())); - GetDevice()->GetQueue()->TrackTask(std::move(request)); + uint64_t(mLastUsageSerial)); + if (mLastUsageSerial != kMaxExecutionSerial) { + GetDevice()->GetQueue()->TrackTask(std::move(request), mLastUsageSerial); + } else { + GetDevice()->GetQueue()->TrackTaskAfterEventualFlush(std::move(request)); + } } void* BufferBase::APIGetMappedRange(size_t offset, size_t size) { @@ -661,6 +665,11 @@ void BufferBase::SetIsDataInitialized() { mIsDataInitialized = true; } +void BufferBase::SetLastUsageSerial(ExecutionSerial serial) { + ASSERT(serial >= mLastUsageSerial); + mLastUsageSerial = serial; +} + bool BufferBase::IsFullBufferRange(uint64_t offset, uint64_t size) const { return offset == 0 && size == GetSize(); } diff --git a/src/dawn/native/Buffer.h b/src/dawn/native/Buffer.h index 729767f343..a120517c39 100644 --- a/src/dawn/native/Buffer.h +++ b/src/dawn/native/Buffer.h @@ -73,6 +73,7 @@ class BufferBase : public ApiObjectBase { bool NeedsInitialization() const; bool IsDataInitialized() const; void SetIsDataInitialized(); + void SetLastUsageSerial(ExecutionSerial serial); virtual void* GetMappedPointer() = 0; void* GetMappedRange(size_t offset, size_t size, bool writable = true); @@ -106,6 +107,8 @@ class BufferBase : public ApiObjectBase { uint64_t mAllocatedSize = 0; + ExecutionSerial mLastUsageSerial = ExecutionSerial(0); + private: // A helper structure to enforce that the mapAsync callback is called only at the very end of // methods that might trigger callbacks. Non-copyable but movable for the assertion in the diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp index 119aece7cb..df7db6d778 100644 --- a/src/dawn/native/CommandEncoder.cpp +++ b/src/dawn/native/CommandEncoder.cpp @@ -1159,11 +1159,11 @@ void CommandEncoder::APICopyBufferToBuffer(BufferBase* source, "validating source %s usage.", source); DAWN_TRY_CONTEXT(ValidateCanUseAs(destination, wgpu::BufferUsage::CopyDst), "validating destination %s usage.", destination); - - mTopLevelBuffers.insert(source); - mTopLevelBuffers.insert(destination); } + mTopLevelBuffers.insert(source); + mTopLevelBuffers.insert(destination); + CopyBufferToBufferCmd* copy = allocator->Allocate(Command::CopyBufferToBuffer); copy->source = source; @@ -1210,11 +1210,11 @@ void CommandEncoder::APICopyBufferToTexture(const ImageCopyBuffer* source, destination->texture->GetFormat().HasDepthOrStencil())); DAWN_TRY(ValidateLinearTextureData(source->layout, source->buffer->GetSize(), blockInfo, *copySize)); - - mTopLevelBuffers.insert(source->buffer); - mTopLevelTextures.insert(destination->texture); } + mTopLevelBuffers.insert(source->buffer); + mTopLevelTextures.insert(destination->texture); + TextureDataLayout srcLayout = source->layout; ApplyDefaultTextureDataLayoutOptions(&srcLayout, blockInfo, *copySize); @@ -1269,11 +1269,11 @@ void CommandEncoder::APICopyTextureToBuffer(const ImageCopyTexture* source, source->texture->GetFormat().HasDepthOrStencil())); DAWN_TRY(ValidateLinearTextureData( destination->layout, destination->buffer->GetSize(), blockInfo, *copySize)); - - mTopLevelTextures.insert(source->texture); - mTopLevelBuffers.insert(destination->buffer); } + mTopLevelTextures.insert(source->texture); + mTopLevelBuffers.insert(destination->buffer); + TextureDataLayout dstLayout = destination->layout; ApplyDefaultTextureDataLayoutOptions(&dstLayout, blockInfo, *copySize); @@ -1377,11 +1377,11 @@ void CommandEncoder::APICopyTextureToTextureHelper(const ImageCopyTexture* sourc DAWN_TRY(ValidateCanUseAs(destination->texture, wgpu::TextureUsage::CopyDst, mUsageValidationMode)); } - - mTopLevelTextures.insert(source->texture); - mTopLevelTextures.insert(destination->texture); } + mTopLevelTextures.insert(source->texture); + mTopLevelTextures.insert(destination->texture); + CopyTextureToTextureCmd* copy = allocator->Allocate(Command::CopyTextureToTexture); copy->source.texture = source->texture; @@ -1434,7 +1434,6 @@ void CommandEncoder::APIClearBuffer(BufferBase* buffer, uint64_t offset, uint64_ DAWN_INVALID_IF(offset % 4 != 0, "Offset (%u) is not a multiple of 4 bytes,", offset); - mTopLevelBuffers.insert(buffer); } else { if (size == wgpu::kWholeSize) { DAWN_ASSERT(buffer->GetSize() >= offset); @@ -1442,6 +1441,8 @@ void CommandEncoder::APIClearBuffer(BufferBase* buffer, uint64_t offset, uint64_ } } + mTopLevelBuffers.insert(buffer); + ClearBufferCmd* cmd = allocator->Allocate(Command::ClearBuffer); cmd->buffer = buffer; cmd->offset = offset; @@ -1528,9 +1529,10 @@ void CommandEncoder::APIResolveQuerySet(QuerySetBase* querySet, DAWN_TRY(ValidateCanUseAs(destination, wgpu::BufferUsage::QueryResolve)); TrackUsedQuerySet(querySet); - mTopLevelBuffers.insert(destination); } + mTopLevelBuffers.insert(destination); + ResolveQuerySetCmd* cmd = allocator->Allocate(Command::ResolveQuerySet); cmd->querySet = querySet; diff --git a/src/dawn/native/Queue.cpp b/src/dawn/native/Queue.cpp index d1d6025d43..acce952e31 100644 --- a/src/dawn/native/Queue.cpp +++ b/src/dawn/native/Queue.cpp @@ -218,24 +218,35 @@ void QueueBase::APIOnSubmittedWorkDone(uint64_t signalValue, // also used to make sure ALL queue work is finished in tests, so we also wait for pending // commands (this is non-observable outside of tests so it's ok to do deviate a bit from the // spec). - TrackTask(std::move(task)); + TrackTaskAfterEventualFlush(std::move(task)); TRACE_EVENT1(GetDevice()->GetPlatform(), General, "Queue::APIOnSubmittedWorkDone", "serial", uint64_t(GetDevice()->GetPendingCommandSerial())); } -void QueueBase::TrackTask(std::unique_ptr task) { - GetDevice()->ForceEventualFlushOfCommands(); - // we can move the task to the callback task manager, as it's ready to be called if there are no - // scheduled commands. - if (!GetDevice()->HasScheduledCommands()) { +void QueueBase::TrackTask(std::unique_ptr task, ExecutionSerial serial) { + // If the task depends on a serial which is not submitted yet, force a flush. + if (serial > GetDevice()->GetLastSubmittedCommandSerial()) { + GetDevice()->ForceEventualFlushOfCommands(); + } + + ASSERT(serial <= GetDevice()->GetScheduledWorkDoneSerial()); + + // If the serial indicated command has been completed, the task will be moved to callback task + // manager. + if (serial <= GetDevice()->GetCompletedCommandSerial()) { task->SetFinishedSerial(GetDevice()->GetCompletedCommandSerial()); GetDevice()->GetCallbackTaskManager()->AddCallbackTask(std::move(task)); } else { - mTasksInFlight.Enqueue(std::move(task), GetDevice()->GetScheduledWorkDoneSerial()); + mTasksInFlight.Enqueue(std::move(task), serial); } } +void QueueBase::TrackTaskAfterEventualFlush(std::unique_ptr task) { + GetDevice()->ForceEventualFlushOfCommands(); + TrackTask(std::move(task), GetDevice()->GetScheduledWorkDoneSerial()); +} + void QueueBase::Tick(ExecutionSerial finishedSerial) { // If a user calls Queue::Submit inside a task, for example in a Buffer::MapAsync callback, // then the device will be ticked, which in turns ticks the queue, causing reentrance here. @@ -427,9 +438,13 @@ MaybeError QueueBase::ValidateSubmit(uint32_t commandCount, for (uint32_t i = 0; i < commandCount; ++i) { DAWN_TRY(GetDevice()->ValidateObject(commands[i])); DAWN_TRY(commands[i]->ValidateCanUseInSubmitNow()); - const CommandBufferResourceUsage& usages = commands[i]->GetResourceUsages(); + for (const BufferBase* buffer : usages.topLevelBuffers) { + DAWN_TRY(buffer->ValidateCanUseOnQueueNow()); + } + + // Maybe track last usage for other resources, and use it to release resources earlier? for (const SyncScopeResourceUsage& scope : usages.renderPasses) { for (const BufferBase* buffer : scope.buffers) { DAWN_TRY(buffer->ValidateCanUseOnQueueNow()); @@ -456,9 +471,6 @@ MaybeError QueueBase::ValidateSubmit(uint32_t commandCount, } } - for (const BufferBase* buffer : usages.topLevelBuffers) { - DAWN_TRY(buffer->ValidateCanUseOnQueueNow()); - } for (const TextureBase* texture : usages.topLevelTextures) { DAWN_TRY(texture->ValidateCanUseInSubmitNow()); } @@ -529,9 +541,10 @@ void QueueBase::SubmitInternal(uint32_t commandCount, CommandBufferBase* const* } TRACE_EVENT0(device->GetPlatform(), General, "Queue::Submit"); - if (device->IsValidationEnabled() && - device->ConsumedError(ValidateSubmit(commandCount, commands))) { - return; + if (device->IsValidationEnabled()) { + if (device->ConsumedError(ValidateSubmit(commandCount, commands))) { + return; + } } ASSERT(!IsError()); diff --git a/src/dawn/native/Queue.h b/src/dawn/native/Queue.h index 4b4e2ff5bf..61227a6a34 100644 --- a/src/dawn/native/Queue.h +++ b/src/dawn/native/Queue.h @@ -17,7 +17,7 @@ #include -#include "dawn/common/SerialQueue.h" +#include "dawn/common/SerialMap.h" #include "dawn/native/CallbackTaskManager.h" #include "dawn/native/Error.h" #include "dawn/native/Forward.h" @@ -77,7 +77,8 @@ class QueueBase : public ApiObjectBase { uint64_t bufferOffset, const void* data, size_t size); - void TrackTask(std::unique_ptr task); + void TrackTask(std::unique_ptr task, ExecutionSerial serial); + void TrackTaskAfterEventualFlush(std::unique_ptr task); void Tick(ExecutionSerial finishedSerial); void HandleDeviceLoss(); @@ -121,7 +122,7 @@ class QueueBase : public ApiObjectBase { void SubmitInternal(uint32_t commandCount, CommandBufferBase* const* commands); - SerialQueue> mTasksInFlight; + SerialMap> mTasksInFlight; }; } // namespace dawn::native diff --git a/src/dawn/native/d3d12/BufferD3D12.cpp b/src/dawn/native/d3d12/BufferD3D12.cpp index 7ee1103f9d..c8ce893c1d 100644 --- a/src/dawn/native/d3d12/BufferD3D12.cpp +++ b/src/dawn/native/d3d12/BufferD3D12.cpp @@ -203,25 +203,8 @@ bool Buffer::TrackUsageAndGetResourceBarrier(CommandRecordingContext* commandCon Heap* heap = ToBackend(mResourceAllocation.GetResourceHeap()); commandContext->TrackHeapUsage(heap, GetDevice()->GetPendingCommandSerial()); - // Return the resource barrier. - return TransitionUsageAndGetResourceBarrier(commandContext, barrier, newUsage); -} + SetLastUsageSerial(GetDevice()->GetPendingCommandSerial()); -void Buffer::TrackUsageAndTransitionNow(CommandRecordingContext* commandContext, - wgpu::BufferUsage newUsage) { - D3D12_RESOURCE_BARRIER barrier; - - if (TrackUsageAndGetResourceBarrier(commandContext, &barrier, newUsage)) { - commandContext->GetCommandList()->ResourceBarrier(1, &barrier); - } -} - -// When true is returned, a D3D12_RESOURCE_BARRIER has been created and must be used in a -// ResourceBarrier call. Failing to do so will cause the tracked state to become invalid and can -// cause subsequent errors. -bool Buffer::TransitionUsageAndGetResourceBarrier(CommandRecordingContext* commandContext, - D3D12_RESOURCE_BARRIER* barrier, - wgpu::BufferUsage newUsage) { // Resources in upload and readback heaps must be kept in the COPY_SOURCE/DEST state if (mFixedResourceState) { ASSERT(mLastUsage == newUsage); @@ -298,6 +281,15 @@ bool Buffer::TransitionUsageAndGetResourceBarrier(CommandRecordingContext* comma return true; } +void Buffer::TrackUsageAndTransitionNow(CommandRecordingContext* commandContext, + wgpu::BufferUsage newUsage) { + D3D12_RESOURCE_BARRIER barrier; + + if (TrackUsageAndGetResourceBarrier(commandContext, &barrier, newUsage)) { + commandContext->GetCommandList()->ResourceBarrier(1, &barrier); + } +} + D3D12_GPU_VIRTUAL_ADDRESS Buffer::GetVA() const { return mResourceAllocation.GetGPUPointer(); } diff --git a/src/dawn/native/d3d12/BufferD3D12.h b/src/dawn/native/d3d12/BufferD3D12.h index 85c4227e8b..4913fbdeeb 100644 --- a/src/dawn/native/d3d12/BufferD3D12.h +++ b/src/dawn/native/d3d12/BufferD3D12.h @@ -67,10 +67,6 @@ class Buffer final : public BufferBase { MaybeError MapInternal(bool isWrite, size_t start, size_t end, const char* contextInfo); - bool TransitionUsageAndGetResourceBarrier(CommandRecordingContext* commandContext, - D3D12_RESOURCE_BARRIER* barrier, - wgpu::BufferUsage newUsage); - MaybeError InitializeToZero(CommandRecordingContext* commandContext); MaybeError ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue, diff --git a/src/dawn/native/metal/BufferMTL.h b/src/dawn/native/metal/BufferMTL.h index 3c6333d3ee..eeeb82d5f6 100644 --- a/src/dawn/native/metal/BufferMTL.h +++ b/src/dawn/native/metal/BufferMTL.h @@ -34,6 +34,7 @@ class Buffer final : public BufferBase { id GetMTLBuffer() const; + void TrackUsage(); bool EnsureDataInitialized(CommandRecordingContext* commandContext); bool EnsureDataInitializedAsDestination(CommandRecordingContext* commandContext, uint64_t offset, diff --git a/src/dawn/native/metal/BufferMTL.mm b/src/dawn/native/metal/BufferMTL.mm index 7800969830..8b0eb96324 100644 --- a/src/dawn/native/metal/BufferMTL.mm +++ b/src/dawn/native/metal/BufferMTL.mm @@ -178,6 +178,10 @@ void Buffer::DestroyImpl() { mMtlBuffer = nullptr; } +void Buffer::TrackUsage() { + SetLastUsageSerial(GetDevice()->GetPendingCommandSerial()); +} + bool Buffer::EnsureDataInitialized(CommandRecordingContext* commandContext) { if (!NeedsInitialization()) { return false; @@ -234,6 +238,7 @@ void Buffer::ClearBuffer(CommandRecordingContext* commandContext, ASSERT(commandContext != nullptr); size = size > 0 ? size : GetAllocatedSize(); ASSERT(size > 0); + TrackUsage(); [commandContext->EnsureBlit() fillBuffer:mMtlBuffer.Get() range:NSMakeRange(offset, size) value:clearValue]; diff --git a/src/dawn/native/metal/CommandBufferMTL.mm b/src/dawn/native/metal/CommandBufferMTL.mm index 214ef2c03b..17ee0fdec7 100644 --- a/src/dawn/native/metal/CommandBufferMTL.mm +++ b/src/dawn/native/metal/CommandBufferMTL.mm @@ -496,6 +496,7 @@ class BindGroupTracker : public BindGroupTrackerBase { switch (bindingInfo.bindingType) { case BindingInfoType::Buffer: { const BufferBinding& binding = group->GetBindingAsBufferBinding(bindingIndex); + ToBackend(binding.buffer)->TrackUsage(); const id buffer = ToBackend(binding.buffer)->GetMTLBuffer(); NSUInteger offset = binding.offset; @@ -591,6 +592,7 @@ class VertexBufferTracker { : mLengthTracker(lengthTracker) {} void OnSetVertexBuffer(VertexBufferSlot slot, Buffer* buffer, uint64_t offset) { + buffer->TrackUsage(); mVertexBuffers[slot] = buffer->GetMTLBuffer(); mVertexBufferOffsets[slot] = offset; @@ -819,10 +821,13 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) break; } - ToBackend(copy->source)->EnsureDataInitialized(commandContext); - ToBackend(copy->destination) - ->EnsureDataInitializedAsDestination(commandContext, copy->destinationOffset, - copy->size); + auto srcBuffer = ToBackend(copy->source); + srcBuffer->EnsureDataInitialized(commandContext); + srcBuffer->TrackUsage(); + auto dstBuffer = ToBackend(copy->destination); + dstBuffer->EnsureDataInitializedAsDestination(commandContext, + copy->destinationOffset, copy->size); + dstBuffer->TrackUsage(); [commandContext->EnsureBlit() copyFromBuffer:ToBackend(copy->source)->GetMTLBuffer() @@ -849,6 +854,7 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) buffer->EnsureDataInitialized(commandContext); EnsureDestinationTextureInitialized(commandContext, texture, dst, copySize); + buffer->TrackUsage(); texture->SynchronizeTextureBeforeUse(commandContext); RecordCopyBufferToTexture(commandContext, buffer->GetMTLBuffer(), buffer->GetSize(), src.offset, src.bytesPerRow, src.rowsPerImage, texture, @@ -874,6 +880,7 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) texture->SynchronizeTextureBeforeUse(commandContext); texture->EnsureSubresourceContentInitialized( commandContext, GetSubresourcesAffectedByCopy(src, copySize)); + buffer->TrackUsage(); TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit( texture, src.mipLevel, src.origin, copySize, buffer->GetSize(), dst.offset, @@ -1028,6 +1035,7 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) commandContext, cmd->offset, cmd->size); if (!clearedToZero) { + dstBuffer->TrackUsage(); [commandContext->EnsureBlit() fillBuffer:dstBuffer->GetMTLBuffer() range:NSMakeRange(cmd->offset, cmd->size) value:0u]; @@ -1045,6 +1053,7 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) commandContext, cmd->destinationOffset, cmd->queryCount * sizeof(uint64_t)); if (querySet->GetQueryType() == wgpu::QueryType::Occlusion) { + destination->TrackUsage(); [commandContext->EnsureBlit() copyFromBuffer:querySet->GetVisibilityBuffer() sourceOffset:NSUInteger(cmd->firstQuery * sizeof(uint64_t)) @@ -1053,6 +1062,7 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) size:NSUInteger(cmd->queryCount * sizeof(uint64_t))]; } else { if (@available(macos 10.15, iOS 14.0, *)) { + destination->TrackUsage(); [commandContext->EnsureBlit() resolveCounters:querySet->GetCounterSampleBuffer() inRange:NSMakeRange(cmd->firstQuery, cmd->queryCount) @@ -1142,6 +1152,7 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) dstBuffer->EnsureDataInitializedAsDestination(commandContext, offset, size); + dstBuffer->TrackUsage(); [commandContext->EnsureBlit() copyFromBuffer:ToBackend(uploadHandle.stagingBuffer)->GetMTLBuffer() sourceOffset:uploadHandle.startOffset @@ -1245,6 +1256,7 @@ MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandCont storageBufferLengths.Apply(encoder, lastPipeline); Buffer* buffer = ToBackend(dispatch->indirectBuffer.Get()); + buffer->TrackUsage(); id indirectBuffer = buffer->GetMTLBuffer(); [encoder dispatchThreadgroupsWithIndirectBuffer:indirectBuffer @@ -1422,6 +1434,7 @@ MaybeError CommandBuffer::EncodeRenderPass(id encoder, storageBufferLengths.Apply(encoder, lastPipeline, enableVertexPulling); Buffer* buffer = ToBackend(draw->indirectBuffer.Get()); + buffer->TrackUsage(); id indirectBuffer = buffer->GetMTLBuffer(); [encoder drawPrimitives:lastPipeline->GetMTLPrimitiveTopology() indirectBuffer:indirectBuffer @@ -1439,6 +1452,7 @@ MaybeError CommandBuffer::EncodeRenderPass(id encoder, Buffer* buffer = ToBackend(draw->indirectBuffer.Get()); ASSERT(buffer != nullptr); + buffer->TrackUsage(); id indirectBuffer = buffer->GetMTLBuffer(); [encoder drawIndexedPrimitives:lastPipeline->GetMTLPrimitiveTopology() indexType:indexBufferType @@ -1518,6 +1532,7 @@ MaybeError CommandBuffer::EncodeRenderPass(id encoder, case Command::SetIndexBuffer: { SetIndexBufferCmd* cmd = iter->NextCommand(); auto b = ToBackend(cmd->buffer.Get()); + b->TrackUsage(); indexBuffer = b->GetMTLBuffer(); indexBufferBaseOffset = cmd->offset; indexBufferType = MTLIndexFormat(cmd->format); diff --git a/src/dawn/native/metal/DeviceMTL.mm b/src/dawn/native/metal/DeviceMTL.mm index 7fd6f0c11d..f599245ba5 100644 --- a/src/dawn/native/metal/DeviceMTL.mm +++ b/src/dawn/native/metal/DeviceMTL.mm @@ -478,11 +478,12 @@ MaybeError Device::CopyFromStagingToBufferImpl(BufferBase* source, GetPendingCommandContext(DeviceBase::SubmitMode::Passive), destinationOffset, size); id uploadBuffer = ToBackend(source)->GetMTLBuffer(); - id buffer = ToBackend(destination)->GetMTLBuffer(); + Buffer* buffer = ToBackend(destination); + buffer->TrackUsage(); [GetPendingCommandContext(DeviceBase::SubmitMode::Passive)->EnsureBlit() copyFromBuffer:uploadBuffer sourceOffset:sourceOffset - toBuffer:buffer + toBuffer:buffer->GetMTLBuffer() destinationOffset:destinationOffset size:size]; return {}; diff --git a/src/dawn/native/opengl/BufferGL.cpp b/src/dawn/native/opengl/BufferGL.cpp index 1b6046c66c..80bf283edd 100644 --- a/src/dawn/native/opengl/BufferGL.cpp +++ b/src/dawn/native/opengl/BufferGL.cpp @@ -39,6 +39,8 @@ ResultOrError> Buffer::CreateInternalBuffer(Device* device, Buffer::Buffer(Device* device, const BufferDescriptor* descriptor) : BufferBase(device, descriptor) { + // TODO(penghuang): track usage for GL. + mLastUsageSerial = kMaxExecutionSerial; const OpenGLFunctions& gl = device->GetGL(); // Allocate at least 4 bytes so clamped accesses are always in bounds. mAllocatedSize = std::max(GetSize(), uint64_t(4u)); diff --git a/src/dawn/native/vulkan/BufferVk.cpp b/src/dawn/native/vulkan/BufferVk.cpp index 5029928708..2242ccf38a 100644 --- a/src/dawn/native/vulkan/BufferVk.cpp +++ b/src/dawn/native/vulkan/BufferVk.cpp @@ -249,7 +249,7 @@ void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, VkPipelineStageFlags srcStages = 0; VkPipelineStageFlags dstStages = 0; - if (TransitionUsageAndGetResourceBarrier(usage, &barrier, &srcStages, &dstStages)) { + if (TrackUsageAndGetResourceBarrier(usage, &barrier, &srcStages, &dstStages)) { ASSERT(srcStages != 0 && dstStages != 0); ToBackend(GetDevice()) ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, @@ -257,10 +257,12 @@ void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, } } -bool Buffer::TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage, - VkBufferMemoryBarrier* barrier, - VkPipelineStageFlags* srcStages, - VkPipelineStageFlags* dstStages) { +bool Buffer::TrackUsageAndGetResourceBarrier(wgpu::BufferUsage usage, + VkBufferMemoryBarrier* barrier, + VkPipelineStageFlags* srcStages, + VkPipelineStageFlags* dstStages) { + SetLastUsageSerial(GetDevice()->GetPendingCommandSerial()); + bool lastIncludesTarget = IsSubset(usage, mLastUsage); constexpr wgpu::BufferUsage kReuseNoBarrierBufferUsages = kReadOnlyBufferUsages | wgpu::BufferUsage::MapWrite; diff --git a/src/dawn/native/vulkan/BufferVk.h b/src/dawn/native/vulkan/BufferVk.h index 6e8ef6405f..dddeea8bb6 100644 --- a/src/dawn/native/vulkan/BufferVk.h +++ b/src/dawn/native/vulkan/BufferVk.h @@ -36,10 +36,10 @@ class Buffer final : public BufferBase { // `commands`. // TODO(crbug.com/dawn/851): coalesce barriers and do them early when possible. void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage); - bool TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage, - VkBufferMemoryBarrier* barrier, - VkPipelineStageFlags* srcStages, - VkPipelineStageFlags* dstStages); + bool TrackUsageAndGetResourceBarrier(wgpu::BufferUsage usage, + VkBufferMemoryBarrier* barrier, + VkPipelineStageFlags* srcStages, + VkPipelineStageFlags* dstStages); // All the Ensure methods return true if the buffer was initialized to zero. bool EnsureDataInitialized(CommandRecordingContext* recordingContext); diff --git a/src/dawn/native/vulkan/CommandBufferVk.cpp b/src/dawn/native/vulkan/CommandBufferVk.cpp index ddd92059bc..b0f8cee14e 100644 --- a/src/dawn/native/vulkan/CommandBufferVk.cpp +++ b/src/dawn/native/vulkan/CommandBufferVk.cpp @@ -167,8 +167,8 @@ void TransitionAndClearForSyncScope(Device* device, buffer->EnsureDataInitialized(recordingContext); VkBufferMemoryBarrier bufferBarrier; - if (buffer->TransitionUsageAndGetResourceBarrier(scope.bufferUsages[i], &bufferBarrier, - &srcStages, &dstStages)) { + if (buffer->TrackUsageAndGetResourceBarrier(scope.bufferUsages[i], &bufferBarrier, + &srcStages, &dstStages)) { bufferBarriers.push_back(bufferBarrier); } } diff --git a/src/dawn/tests/end2end/BufferTests.cpp b/src/dawn/tests/end2end/BufferTests.cpp index 03ba28ad78..f889e24e79 100644 --- a/src/dawn/tests/end2end/BufferTests.cpp +++ b/src/dawn/tests/end2end/BufferTests.cpp @@ -541,6 +541,164 @@ DAWN_INSTANTIATE_TEST(BufferMappingTests, OpenGLESBackend(), VulkanBackend()); +class BufferMappingCallbackTests : public BufferMappingTests { + protected: + void SubmitCommandBuffer(wgpu::Buffer buffer) { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + + { + // Record enough commands to make sure the submission cannot be completed by GPU too + // quick. + constexpr int kRepeatCount = 50; + constexpr int kBufferSize = 1024 * 1024 * 10; + wgpu::Buffer tempWriteBuffer = CreateMapWriteBuffer(kBufferSize); + wgpu::Buffer tempReadBuffer = CreateMapReadBuffer(kBufferSize); + for (int i = 0; i < kRepeatCount; ++i) { + encoder.CopyBufferToBuffer(tempWriteBuffer, 0, tempReadBuffer, 0, kBufferSize); + } + } + + if (buffer) { + if (buffer.GetUsage() & wgpu::BufferUsage::CopyDst) { + encoder.ClearBuffer(buffer); + } else { + wgpu::Buffer tempBuffer = CreateMapReadBuffer(buffer.GetSize()); + encoder.CopyBufferToBuffer(buffer, 0, tempBuffer, 0, buffer.GetSize()); + } + } + wgpu::CommandBuffer commandBuffer = encoder.Finish(); + queue.Submit(1, &commandBuffer); + } + + void Wait(std::vector& done) { + do { + WaitABit(); + } while (std::any_of(done.begin(), done.end(), [](bool done) { return !done; })); + } +}; + +TEST_P(BufferMappingCallbackTests, EmptySubmissionAndThenMap) { + wgpu::Buffer buffer = CreateMapWriteBuffer(4); + MapAsyncAndWait(buffer, wgpu::MapMode::Write, 0, wgpu::kWholeMapSize); + buffer.Unmap(); + + std::vector done = {false, false}; + + // 1. submission without using buffer. + SubmitCommandBuffer({}); + queue.OnSubmittedWorkDone( + 0, + [](WGPUQueueWorkDoneStatus status, void* userdata) { + EXPECT_EQ(status, WGPUQueueWorkDoneStatus_Success); + auto& done = *static_cast*>(userdata); + done[0] = true; + // Step 2 callback should be called first, this is the second. + const std::vector kExpected = {true, true}; + EXPECT_EQ(done, kExpected); + }, + &done); + + // 2. + buffer.MapAsync( + wgpu::MapMode::Write, 0, 4, + [](WGPUBufferMapAsyncStatus status, void* userdata) { + EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success); + auto& done = *static_cast*>(userdata); + done[1] = true; + // The buffer is not used by step 1, so this callback is called first. + const std::vector kExpected = {false, true}; + EXPECT_EQ(done, kExpected); + }, + &done); + + Wait(done); +} + +TEST_P(BufferMappingCallbackTests, UseTheBufferAndThenMap) { + wgpu::Buffer buffer = CreateMapWriteBuffer(4); + MapAsyncAndWait(buffer, wgpu::MapMode::Write, 0, wgpu::kWholeMapSize); + buffer.Unmap(); + + std::vector done = {false, false}; + + // 1. Submit a command buffer which uses the buffer + SubmitCommandBuffer(buffer); + queue.OnSubmittedWorkDone( + 0, + [](WGPUQueueWorkDoneStatus status, void* userdata) { + EXPECT_EQ(status, WGPUQueueWorkDoneStatus_Success); + auto& done = *static_cast*>(userdata); + done[0] = true; + // This callback should be called first + const std::vector kExpected = {true, false}; + EXPECT_EQ(done, kExpected); + }, + &done); + + // 2. + buffer.MapAsync( + wgpu::MapMode::Write, 0, 4, + [](WGPUBufferMapAsyncStatus status, void* userdata) { + EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success); + auto& done = *static_cast*>(userdata); + done[1] = true; + // The buffer is used by step 1, so this callback is called second. + const std::vector kExpected = {true, true}; + EXPECT_EQ(done, kExpected); + }, + &done); + + Wait(done); + + buffer.Unmap(); +} + +TEST_P(BufferMappingCallbackTests, EmptySubmissionWriteAndThenMap) { + wgpu::Buffer buffer = CreateMapReadBuffer(4); + MapAsyncAndWait(buffer, wgpu::MapMode::Read, 0, wgpu::kWholeMapSize); + buffer.Unmap(); + + std::vector done = {false, false}; + + // 1. submission without using buffer. + SubmitCommandBuffer({}); + queue.OnSubmittedWorkDone( + 0, + [](WGPUQueueWorkDoneStatus status, void* userdata) { + EXPECT_EQ(status, WGPUQueueWorkDoneStatus_Success); + auto& done = *static_cast*>(userdata); + done[0] = true; + // Step 2 callback should be called first, this is the second. + const std::vector kExpected = {true, false}; + EXPECT_EQ(done, kExpected); + }, + &done); + + int32_t data = 0x12345678; + queue.WriteBuffer(buffer, 0, &data, sizeof(data)); + + // 2. + buffer.MapAsync( + wgpu::MapMode::Read, 0, 4, + [](WGPUBufferMapAsyncStatus status, void* userdata) { + EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success); + auto& done = *static_cast*>(userdata); + done[1] = true; + // The buffer is not used by step 1, so this callback is called first. + const std::vector kExpected = {true, true}; + EXPECT_EQ(done, kExpected); + }, + &done); + + Wait(done); + + buffer.Unmap(); +} + +// MapAsync() will record pipeline barrier in pending command buffer with Vulkan. +// TODO(penghuang): enable this test for Vulkan. +DAWN_INSTANTIATE_TEST(BufferMappingCallbackTests, D3D12Backend(), MetalBackend()); + class BufferMappedAtCreationTests : public DawnTest { protected: static void MapCallback(WGPUBufferMapAsyncStatus status, void* userdata) {