Append pipeline barrier for mappable buffers for submissions

So the next MapAsync() call, doesn't need to record pipeline barrier
command into the pending command buffer, submit it, and wait for it
completion.

Bug: b/265152896
Change-Id: Ied7cbca24a7b1bc81c0a6402179452599aabfe9b
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/116920
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Peng Huang <penghuang@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
Peng Huang 2023-01-25 19:55:52 +00:00 committed by Dawn LUCI CQ
parent e8870efb1b
commit 8fc1758a3d
10 changed files with 108 additions and 27 deletions

View File

@ -665,7 +665,8 @@ void BufferBase::SetIsDataInitialized() {
mIsDataInitialized = true;
}
void BufferBase::SetLastUsageSerial(ExecutionSerial serial) {
void BufferBase::MarkUsedInPendingCommands() {
ExecutionSerial serial = GetDevice()->GetPendingCommandSerial();
ASSERT(serial >= mLastUsageSerial);
mLastUsageSerial = serial;
}

View File

@ -73,7 +73,7 @@ class BufferBase : public ApiObjectBase {
bool NeedsInitialization() const;
bool IsDataInitialized() const;
void SetIsDataInitialized();
void SetLastUsageSerial(ExecutionSerial serial);
void MarkUsedInPendingCommands();
virtual void* GetMappedPointer() = 0;
void* GetMappedRange(size_t offset, size_t size, bool writable = true);

View File

@ -203,7 +203,7 @@ bool Buffer::TrackUsageAndGetResourceBarrier(CommandRecordingContext* commandCon
Heap* heap = ToBackend(mResourceAllocation.GetResourceHeap());
commandContext->TrackHeapUsage(heap, GetDevice()->GetPendingCommandSerial());
SetLastUsageSerial(GetDevice()->GetPendingCommandSerial());
MarkUsedInPendingCommands();
// Resources in upload and readback heaps must be kept in the COPY_SOURCE/DEST state
if (mFixedResourceState) {

View File

@ -179,7 +179,7 @@ void Buffer::DestroyImpl() {
}
void Buffer::TrackUsage() {
SetLastUsageSerial(GetDevice()->GetPendingCommandSerial());
MarkUsedInPendingCommands();
}
bool Buffer::EnsureDataInitialized(CommandRecordingContext* commandContext) {

View File

@ -18,6 +18,7 @@
#include <cstring>
#include <limits>
#include <utility>
#include <vector>
#include "dawn/native/CommandBuffer.h"
#include "dawn/native/vulkan/DeviceVk.h"
@ -31,6 +32,8 @@ namespace dawn::native::vulkan {
namespace {
constexpr wgpu::BufferUsage kMapUsages = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::MapWrite;
VkBufferUsageFlags VulkanBufferUsage(wgpu::BufferUsage usage) {
VkBufferUsageFlags flags = 0;
@ -249,7 +252,8 @@ void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
if (TrackUsageAndGetResourceBarrier(usage, &barrier, &srcStages, &dstStages)) {
if (TrackUsageAndGetResourceBarrier(recordingContext, usage, &barrier, &srcStages,
&dstStages)) {
ASSERT(srcStages != 0 && dstStages != 0);
ToBackend(GetDevice())
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
@ -257,25 +261,50 @@ void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
}
}
bool Buffer::TrackUsageAndGetResourceBarrier(wgpu::BufferUsage usage,
bool Buffer::TrackUsageAndGetResourceBarrier(CommandRecordingContext* recordingContext,
wgpu::BufferUsage usage,
VkBufferMemoryBarrier* barrier,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages) {
SetLastUsageSerial(GetDevice()->GetPendingCommandSerial());
if (usage & kMapUsages) {
// The pipeline barrier isn't needed, the buffer can be mapped immediately.
if (mLastUsage == usage) {
return false;
}
bool lastIncludesTarget = IsSubset(usage, mLastUsage);
constexpr wgpu::BufferUsage kReuseNoBarrierBufferUsages =
kReadOnlyBufferUsages | wgpu::BufferUsage::MapWrite;
bool lastCanBeReusedWithoutBarrier = IsSubset(mLastUsage, kReuseNoBarrierBufferUsages);
// Special-case for the initial transition: the pipeline barrier isn't needed.
if (mLastUsage == wgpu::BufferUsage::None) {
mLastUsage = usage;
return false;
}
if (lastIncludesTarget && lastCanBeReusedWithoutBarrier) {
return false;
}
// For other cases, a pipeline barrier is needed, so mark the buffer is used within the
// pending commands.
MarkUsedInPendingCommands();
} else {
// Request non CPU usage, so assume the buffer will be used in pending commands.
MarkUsedInPendingCommands();
// Special-case for the initial transition: Vulkan doesn't allow access flags to be 0.
if (mLastUsage == wgpu::BufferUsage::None) {
mLastUsage = usage;
return false;
// If the buffer is mappable and the requested usage is not map usage, we need add it into
// mappableBuffersForEagerTransition, so the buffer can be transitioned backed to map
// usages at end of the submit.
if (GetUsage() & kMapUsages) {
recordingContext->mappableBuffersForEagerTransition.insert(this);
}
// Special-case for the initial transition: Vulkan doesn't allow access flags to be 0.
if (mLastUsage == wgpu::BufferUsage::None) {
mLastUsage = usage;
return false;
}
bool lastIncludesTarget = IsSubset(usage, mLastUsage);
bool lastReadOnly = IsSubset(mLastUsage, kReadOnlyBufferUsages);
// We can skip transitions to already current read-only usages.
if (lastIncludesTarget && lastReadOnly) {
return false;
}
}
*srcStages |= VulkanPipelineStage(mLastUsage);
@ -384,6 +413,41 @@ bool Buffer::EnsureDataInitializedAsDestination(CommandRecordingContext* recordi
return true;
}
// static
void Buffer::TransitionMappableBuffersEagerly(const VulkanFunctions& fn,
CommandRecordingContext* recordingContext,
std::set<Buffer*> buffers) {
ASSERT(!buffers.empty());
ASSERT(recordingContext->mappableBuffersForEagerTransition.empty());
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
std::vector<VkBufferMemoryBarrier> barriers;
barriers.reserve(buffers.size());
for (Buffer* buffer : buffers) {
wgpu::BufferUsage mapUsage = buffer->GetUsage() & kMapUsages;
ASSERT(mapUsage == wgpu::BufferUsage::MapRead || mapUsage == wgpu::BufferUsage::MapWrite);
VkBufferMemoryBarrier barrier;
if (buffer->TrackUsageAndGetResourceBarrier(recordingContext, mapUsage, &barrier,
&srcStages, &dstStages)) {
barriers.push_back(barrier);
}
// TrackUsageAndGetResourceBarrier() should not modify recordingContext for map usages.
ASSERT(recordingContext->mappableBuffersForEagerTransition.empty());
}
if (barriers.empty()) {
return;
}
ASSERT(srcStages != 0 && dstStages != 0);
fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, nullptr,
barriers.size(), barriers.data(), 0, nullptr);
}
void Buffer::SetLabelImpl() {
SetDebugName(ToBackend(GetDevice()), mHandle, "Dawn_Buffer", GetLabel());
}

View File

@ -15,6 +15,8 @@
#ifndef SRC_DAWN_NATIVE_VULKAN_BUFFERVK_H_
#define SRC_DAWN_NATIVE_VULKAN_BUFFERVK_H_
#include <set>
#include "dawn/native/Buffer.h"
#include "dawn/common/SerialQueue.h"
@ -25,6 +27,7 @@ namespace dawn::native::vulkan {
struct CommandRecordingContext;
class Device;
struct VulkanFunctions;
class Buffer final : public BufferBase {
public:
@ -36,7 +39,8 @@ class Buffer final : public BufferBase {
// `commands`.
// TODO(crbug.com/dawn/851): coalesce barriers and do them early when possible.
void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage);
bool TrackUsageAndGetResourceBarrier(wgpu::BufferUsage usage,
bool TrackUsageAndGetResourceBarrier(CommandRecordingContext* recordingContext,
wgpu::BufferUsage usage,
VkBufferMemoryBarrier* barrier,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages);
@ -52,6 +56,10 @@ class Buffer final : public BufferBase {
// Dawn API
void SetLabelImpl() override;
static void TransitionMappableBuffersEagerly(const VulkanFunctions& fn,
CommandRecordingContext* recordingContext,
std::set<Buffer*> buffers);
private:
~Buffer() override;
using BufferBase::BufferBase;

View File

@ -167,8 +167,8 @@ void TransitionAndClearForSyncScope(Device* device,
buffer->EnsureDataInitialized(recordingContext);
VkBufferMemoryBarrier bufferBarrier;
if (buffer->TrackUsageAndGetResourceBarrier(scope.bufferUsages[i], &bufferBarrier,
&srcStages, &dstStages)) {
if (buffer->TrackUsageAndGetResourceBarrier(recordingContext, scope.bufferUsages[i],
&bufferBarrier, &srcStages, &dstStages)) {
bufferBarriers.push_back(bufferBarrier);
}
}

View File

@ -48,6 +48,10 @@ struct CommandRecordingContext {
// kept alive by the CommandBuffer so they don't need to be Ref-ed.
std::set<Texture*> externalTexturesForEagerTransition;
// Mappable buffers which will be eagerly transitioned to usage MapRead or MapWrite after
// VkSubmit.
std::set<Buffer*> mappableBuffersForEagerTransition;
// For Device state tracking only.
VkCommandPool commandPool = VK_NULL_HANDLE;
bool needsSubmit = false;

View File

@ -318,6 +318,12 @@ MaybeError Device::SubmitPendingCommands() {
return {};
}
if (!mRecordingContext.mappableBuffersForEagerTransition.empty()) {
// Transition mappable buffers back to map usages with the submit.
Buffer::TransitionMappableBuffersEagerly(
fn, &mRecordingContext, std::move(mRecordingContext.mappableBuffersForEagerTransition));
}
ScopedSignalSemaphore scopedSignalSemaphore(this, VK_NULL_HANDLE);
if (mRecordingContext.externalTexturesForEagerTransition.size() > 0) {
// Create an external semaphore for all external textures that have been used in the pending

View File

@ -600,7 +600,7 @@ TEST_P(BufferMappingCallbackTests, EmptySubmissionAndThenMap) {
// 2.
buffer.MapAsync(
wgpu::MapMode::Write, 0, 4,
wgpu::MapMode::Write, 0, wgpu::kWholeMapSize,
[](WGPUBufferMapAsyncStatus status, void* userdata) {
EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success);
auto& done = *static_cast<std::vector<bool>*>(userdata);
@ -637,7 +637,7 @@ TEST_P(BufferMappingCallbackTests, UseTheBufferAndThenMap) {
// 2.
buffer.MapAsync(
wgpu::MapMode::Write, 0, 4,
wgpu::MapMode::Write, 0, wgpu::kWholeMapSize,
[](WGPUBufferMapAsyncStatus status, void* userdata) {
EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success);
auto& done = *static_cast<std::vector<bool>*>(userdata);
@ -679,7 +679,7 @@ TEST_P(BufferMappingCallbackTests, EmptySubmissionWriteAndThenMap) {
// 2.
buffer.MapAsync(
wgpu::MapMode::Read, 0, 4,
wgpu::MapMode::Read, 0, wgpu::kWholeMapSize,
[](WGPUBufferMapAsyncStatus status, void* userdata) {
EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success);
auto& done = *static_cast<std::vector<bool>*>(userdata);
@ -695,9 +695,7 @@ TEST_P(BufferMappingCallbackTests, EmptySubmissionWriteAndThenMap) {
buffer.Unmap();
}
// MapAsync() will record pipeline barrier in pending command buffer with Vulkan.
// TODO(penghuang): enable this test for Vulkan.
DAWN_INSTANTIATE_TEST(BufferMappingCallbackTests, D3D12Backend(), MetalBackend());
DAWN_INSTANTIATE_TEST(BufferMappingCallbackTests, D3D12Backend(), MetalBackend(), VulkanBackend());
class BufferMappedAtCreationTests : public DawnTest {
protected: