From 8fc1758a3d42bd8691cb3e35bc0a61bd74f66cc9 Mon Sep 17 00:00:00 2001 From: Peng Huang <penghuang@chromium.org> Date: Wed, 25 Jan 2023 19:55:52 +0000 Subject: [PATCH] Append pipeline barrier for mappable buffers for submissions So the next MapAsync() call, doesn't need to record pipeline barrier command into the pending command buffer, submit it, and wait for it completion. Bug: b/265152896 Change-Id: Ied7cbca24a7b1bc81c0a6402179452599aabfe9b Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/116920 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Peng Huang <penghuang@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> --- src/dawn/native/Buffer.cpp | 3 +- src/dawn/native/Buffer.h | 2 +- src/dawn/native/d3d12/BufferD3D12.cpp | 2 +- src/dawn/native/metal/BufferMTL.mm | 2 +- src/dawn/native/vulkan/BufferVk.cpp | 92 ++++++++++++++++--- src/dawn/native/vulkan/BufferVk.h | 10 +- src/dawn/native/vulkan/CommandBufferVk.cpp | 4 +- .../native/vulkan/CommandRecordingContext.h | 4 + src/dawn/native/vulkan/DeviceVk.cpp | 6 ++ src/dawn/tests/end2end/BufferTests.cpp | 10 +- 10 files changed, 108 insertions(+), 27 deletions(-) diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp index 0e8828bdbc..67aa123c51 100644 --- a/src/dawn/native/Buffer.cpp +++ b/src/dawn/native/Buffer.cpp @@ -665,7 +665,8 @@ void BufferBase::SetIsDataInitialized() { mIsDataInitialized = true; } -void BufferBase::SetLastUsageSerial(ExecutionSerial serial) { +void BufferBase::MarkUsedInPendingCommands() { + ExecutionSerial serial = GetDevice()->GetPendingCommandSerial(); ASSERT(serial >= mLastUsageSerial); mLastUsageSerial = serial; } diff --git a/src/dawn/native/Buffer.h b/src/dawn/native/Buffer.h index a120517c39..d152d899fc 100644 --- a/src/dawn/native/Buffer.h +++ b/src/dawn/native/Buffer.h @@ -73,7 +73,7 @@ class BufferBase : public ApiObjectBase { bool NeedsInitialization() const; bool IsDataInitialized() const; void SetIsDataInitialized(); - void SetLastUsageSerial(ExecutionSerial serial); + void MarkUsedInPendingCommands(); virtual void* GetMappedPointer() = 0; void* GetMappedRange(size_t offset, size_t size, bool writable = true); diff --git a/src/dawn/native/d3d12/BufferD3D12.cpp b/src/dawn/native/d3d12/BufferD3D12.cpp index c8ce893c1d..28a0ea9cc3 100644 --- a/src/dawn/native/d3d12/BufferD3D12.cpp +++ b/src/dawn/native/d3d12/BufferD3D12.cpp @@ -203,7 +203,7 @@ bool Buffer::TrackUsageAndGetResourceBarrier(CommandRecordingContext* commandCon Heap* heap = ToBackend(mResourceAllocation.GetResourceHeap()); commandContext->TrackHeapUsage(heap, GetDevice()->GetPendingCommandSerial()); - SetLastUsageSerial(GetDevice()->GetPendingCommandSerial()); + MarkUsedInPendingCommands(); // Resources in upload and readback heaps must be kept in the COPY_SOURCE/DEST state if (mFixedResourceState) { diff --git a/src/dawn/native/metal/BufferMTL.mm b/src/dawn/native/metal/BufferMTL.mm index 8b0eb96324..07349d1a4f 100644 --- a/src/dawn/native/metal/BufferMTL.mm +++ b/src/dawn/native/metal/BufferMTL.mm @@ -179,7 +179,7 @@ void Buffer::DestroyImpl() { } void Buffer::TrackUsage() { - SetLastUsageSerial(GetDevice()->GetPendingCommandSerial()); + MarkUsedInPendingCommands(); } bool Buffer::EnsureDataInitialized(CommandRecordingContext* commandContext) { diff --git a/src/dawn/native/vulkan/BufferVk.cpp b/src/dawn/native/vulkan/BufferVk.cpp index 2242ccf38a..eeafa2273e 100644 --- a/src/dawn/native/vulkan/BufferVk.cpp +++ b/src/dawn/native/vulkan/BufferVk.cpp @@ -18,6 +18,7 @@ #include <cstring> #include <limits> #include <utility> +#include <vector> #include "dawn/native/CommandBuffer.h" #include "dawn/native/vulkan/DeviceVk.h" @@ -31,6 +32,8 @@ namespace dawn::native::vulkan { namespace { +constexpr wgpu::BufferUsage kMapUsages = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::MapWrite; + VkBufferUsageFlags VulkanBufferUsage(wgpu::BufferUsage usage) { VkBufferUsageFlags flags = 0; @@ -249,7 +252,8 @@ void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, VkPipelineStageFlags srcStages = 0; VkPipelineStageFlags dstStages = 0; - if (TrackUsageAndGetResourceBarrier(usage, &barrier, &srcStages, &dstStages)) { + if (TrackUsageAndGetResourceBarrier(recordingContext, usage, &barrier, &srcStages, + &dstStages)) { ASSERT(srcStages != 0 && dstStages != 0); ToBackend(GetDevice()) ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, @@ -257,25 +261,50 @@ void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, } } -bool Buffer::TrackUsageAndGetResourceBarrier(wgpu::BufferUsage usage, +bool Buffer::TrackUsageAndGetResourceBarrier(CommandRecordingContext* recordingContext, + wgpu::BufferUsage usage, VkBufferMemoryBarrier* barrier, VkPipelineStageFlags* srcStages, VkPipelineStageFlags* dstStages) { - SetLastUsageSerial(GetDevice()->GetPendingCommandSerial()); + if (usage & kMapUsages) { + // The pipeline barrier isn't needed, the buffer can be mapped immediately. + if (mLastUsage == usage) { + return false; + } - bool lastIncludesTarget = IsSubset(usage, mLastUsage); - constexpr wgpu::BufferUsage kReuseNoBarrierBufferUsages = - kReadOnlyBufferUsages | wgpu::BufferUsage::MapWrite; - bool lastCanBeReusedWithoutBarrier = IsSubset(mLastUsage, kReuseNoBarrierBufferUsages); + // Special-case for the initial transition: the pipeline barrier isn't needed. + if (mLastUsage == wgpu::BufferUsage::None) { + mLastUsage = usage; + return false; + } - if (lastIncludesTarget && lastCanBeReusedWithoutBarrier) { - return false; - } + // For other cases, a pipeline barrier is needed, so mark the buffer is used within the + // pending commands. + MarkUsedInPendingCommands(); + } else { + // Request non CPU usage, so assume the buffer will be used in pending commands. + MarkUsedInPendingCommands(); - // Special-case for the initial transition: Vulkan doesn't allow access flags to be 0. - if (mLastUsage == wgpu::BufferUsage::None) { - mLastUsage = usage; - return false; + // If the buffer is mappable and the requested usage is not map usage, we need add it into + // mappableBuffersForEagerTransition, so the buffer can be transitioned backed to map + // usages at end of the submit. + if (GetUsage() & kMapUsages) { + recordingContext->mappableBuffersForEagerTransition.insert(this); + } + + // Special-case for the initial transition: Vulkan doesn't allow access flags to be 0. + if (mLastUsage == wgpu::BufferUsage::None) { + mLastUsage = usage; + return false; + } + + bool lastIncludesTarget = IsSubset(usage, mLastUsage); + bool lastReadOnly = IsSubset(mLastUsage, kReadOnlyBufferUsages); + + // We can skip transitions to already current read-only usages. + if (lastIncludesTarget && lastReadOnly) { + return false; + } } *srcStages |= VulkanPipelineStage(mLastUsage); @@ -384,6 +413,41 @@ bool Buffer::EnsureDataInitializedAsDestination(CommandRecordingContext* recordi return true; } +// static +void Buffer::TransitionMappableBuffersEagerly(const VulkanFunctions& fn, + CommandRecordingContext* recordingContext, + std::set<Buffer*> buffers) { + ASSERT(!buffers.empty()); + ASSERT(recordingContext->mappableBuffersForEagerTransition.empty()); + + VkPipelineStageFlags srcStages = 0; + VkPipelineStageFlags dstStages = 0; + + std::vector<VkBufferMemoryBarrier> barriers; + barriers.reserve(buffers.size()); + + for (Buffer* buffer : buffers) { + wgpu::BufferUsage mapUsage = buffer->GetUsage() & kMapUsages; + ASSERT(mapUsage == wgpu::BufferUsage::MapRead || mapUsage == wgpu::BufferUsage::MapWrite); + VkBufferMemoryBarrier barrier; + + if (buffer->TrackUsageAndGetResourceBarrier(recordingContext, mapUsage, &barrier, + &srcStages, &dstStages)) { + barriers.push_back(barrier); + } + // TrackUsageAndGetResourceBarrier() should not modify recordingContext for map usages. + ASSERT(recordingContext->mappableBuffersForEagerTransition.empty()); + } + + if (barriers.empty()) { + return; + } + + ASSERT(srcStages != 0 && dstStages != 0); + fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, nullptr, + barriers.size(), barriers.data(), 0, nullptr); +} + void Buffer::SetLabelImpl() { SetDebugName(ToBackend(GetDevice()), mHandle, "Dawn_Buffer", GetLabel()); } diff --git a/src/dawn/native/vulkan/BufferVk.h b/src/dawn/native/vulkan/BufferVk.h index dddeea8bb6..c806b001f6 100644 --- a/src/dawn/native/vulkan/BufferVk.h +++ b/src/dawn/native/vulkan/BufferVk.h @@ -15,6 +15,8 @@ #ifndef SRC_DAWN_NATIVE_VULKAN_BUFFERVK_H_ #define SRC_DAWN_NATIVE_VULKAN_BUFFERVK_H_ +#include <set> + #include "dawn/native/Buffer.h" #include "dawn/common/SerialQueue.h" @@ -25,6 +27,7 @@ namespace dawn::native::vulkan { struct CommandRecordingContext; class Device; +struct VulkanFunctions; class Buffer final : public BufferBase { public: @@ -36,7 +39,8 @@ class Buffer final : public BufferBase { // `commands`. // TODO(crbug.com/dawn/851): coalesce barriers and do them early when possible. void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage); - bool TrackUsageAndGetResourceBarrier(wgpu::BufferUsage usage, + bool TrackUsageAndGetResourceBarrier(CommandRecordingContext* recordingContext, + wgpu::BufferUsage usage, VkBufferMemoryBarrier* barrier, VkPipelineStageFlags* srcStages, VkPipelineStageFlags* dstStages); @@ -52,6 +56,10 @@ class Buffer final : public BufferBase { // Dawn API void SetLabelImpl() override; + static void TransitionMappableBuffersEagerly(const VulkanFunctions& fn, + CommandRecordingContext* recordingContext, + std::set<Buffer*> buffers); + private: ~Buffer() override; using BufferBase::BufferBase; diff --git a/src/dawn/native/vulkan/CommandBufferVk.cpp b/src/dawn/native/vulkan/CommandBufferVk.cpp index b0f8cee14e..6121e9c9d2 100644 --- a/src/dawn/native/vulkan/CommandBufferVk.cpp +++ b/src/dawn/native/vulkan/CommandBufferVk.cpp @@ -167,8 +167,8 @@ void TransitionAndClearForSyncScope(Device* device, buffer->EnsureDataInitialized(recordingContext); VkBufferMemoryBarrier bufferBarrier; - if (buffer->TrackUsageAndGetResourceBarrier(scope.bufferUsages[i], &bufferBarrier, - &srcStages, &dstStages)) { + if (buffer->TrackUsageAndGetResourceBarrier(recordingContext, scope.bufferUsages[i], + &bufferBarrier, &srcStages, &dstStages)) { bufferBarriers.push_back(bufferBarrier); } } diff --git a/src/dawn/native/vulkan/CommandRecordingContext.h b/src/dawn/native/vulkan/CommandRecordingContext.h index 7bd762f69c..a20db63c42 100644 --- a/src/dawn/native/vulkan/CommandRecordingContext.h +++ b/src/dawn/native/vulkan/CommandRecordingContext.h @@ -48,6 +48,10 @@ struct CommandRecordingContext { // kept alive by the CommandBuffer so they don't need to be Ref-ed. std::set<Texture*> externalTexturesForEagerTransition; + // Mappable buffers which will be eagerly transitioned to usage MapRead or MapWrite after + // VkSubmit. + std::set<Buffer*> mappableBuffersForEagerTransition; + // For Device state tracking only. VkCommandPool commandPool = VK_NULL_HANDLE; bool needsSubmit = false; diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp index 9bc942bd0a..9375be3e47 100644 --- a/src/dawn/native/vulkan/DeviceVk.cpp +++ b/src/dawn/native/vulkan/DeviceVk.cpp @@ -318,6 +318,12 @@ MaybeError Device::SubmitPendingCommands() { return {}; } + if (!mRecordingContext.mappableBuffersForEagerTransition.empty()) { + // Transition mappable buffers back to map usages with the submit. + Buffer::TransitionMappableBuffersEagerly( + fn, &mRecordingContext, std::move(mRecordingContext.mappableBuffersForEagerTransition)); + } + ScopedSignalSemaphore scopedSignalSemaphore(this, VK_NULL_HANDLE); if (mRecordingContext.externalTexturesForEagerTransition.size() > 0) { // Create an external semaphore for all external textures that have been used in the pending diff --git a/src/dawn/tests/end2end/BufferTests.cpp b/src/dawn/tests/end2end/BufferTests.cpp index f889e24e79..8e3f2a5e8c 100644 --- a/src/dawn/tests/end2end/BufferTests.cpp +++ b/src/dawn/tests/end2end/BufferTests.cpp @@ -600,7 +600,7 @@ TEST_P(BufferMappingCallbackTests, EmptySubmissionAndThenMap) { // 2. buffer.MapAsync( - wgpu::MapMode::Write, 0, 4, + wgpu::MapMode::Write, 0, wgpu::kWholeMapSize, [](WGPUBufferMapAsyncStatus status, void* userdata) { EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success); auto& done = *static_cast<std::vector<bool>*>(userdata); @@ -637,7 +637,7 @@ TEST_P(BufferMappingCallbackTests, UseTheBufferAndThenMap) { // 2. buffer.MapAsync( - wgpu::MapMode::Write, 0, 4, + wgpu::MapMode::Write, 0, wgpu::kWholeMapSize, [](WGPUBufferMapAsyncStatus status, void* userdata) { EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success); auto& done = *static_cast<std::vector<bool>*>(userdata); @@ -679,7 +679,7 @@ TEST_P(BufferMappingCallbackTests, EmptySubmissionWriteAndThenMap) { // 2. buffer.MapAsync( - wgpu::MapMode::Read, 0, 4, + wgpu::MapMode::Read, 0, wgpu::kWholeMapSize, [](WGPUBufferMapAsyncStatus status, void* userdata) { EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success); auto& done = *static_cast<std::vector<bool>*>(userdata); @@ -695,9 +695,7 @@ TEST_P(BufferMappingCallbackTests, EmptySubmissionWriteAndThenMap) { buffer.Unmap(); } -// MapAsync() will record pipeline barrier in pending command buffer with Vulkan. -// TODO(penghuang): enable this test for Vulkan. -DAWN_INSTANTIATE_TEST(BufferMappingCallbackTests, D3D12Backend(), MetalBackend()); +DAWN_INSTANTIATE_TEST(BufferMappingCallbackTests, D3D12Backend(), MetalBackend(), VulkanBackend()); class BufferMappedAtCreationTests : public DawnTest { protected: