Vulkan: Combine all the barriers before dispatch() in one call

This patch combines all the resource barriers added before each
dispatch() into one call to reduce the number of
vkCmdPipelineBarrier() in the Vulkan command buffer.

BUG=dawn:522

Change-Id: I1b5943e62eb0a09db96de12196fcabb3448e9e4d
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/28283
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
This commit is contained in:
Jiawei Shao 2020-09-10 00:26:07 +00:00 committed by Commit Bot service account
parent 39d1cc9e9c
commit 87f25134a8
5 changed files with 109 additions and 61 deletions

View File

@ -186,23 +186,20 @@ namespace dawn_native { namespace vulkan {
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
wgpu::BufferUsage usage) {
std::vector<VkBufferMemoryBarrier> barriers;
VkBufferMemoryBarrier barrier;
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
TransitionUsageNow(recordingContext, usage, &barriers, &srcStages, &dstStages);
if (barriers.size() > 0) {
ASSERT(barriers.size() == 1);
if (TransitionUsageAndGetResourceBarrier(usage, &barrier, &srcStages, &dstStages)) {
ASSERT(srcStages != 0 && dstStages != 0);
ToBackend(GetDevice())
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
nullptr, barriers.size(), barriers.data(), 0, nullptr);
nullptr, 1u, &barrier, 0, nullptr);
}
}
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
wgpu::BufferUsage usage,
std::vector<VkBufferMemoryBarrier>* bufferBarriers,
bool Buffer::TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage,
VkBufferMemoryBarrier* barrier,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages) {
bool lastIncludesTarget = (mLastUsage & usage) == usage;
@ -210,32 +207,31 @@ namespace dawn_native { namespace vulkan {
// We can skip transitions to already current read-only usages.
if (lastIncludesTarget && lastReadOnly) {
return;
return false;
}
// Special-case for the initial transition: Vulkan doesn't allow access flags to be 0.
if (mLastUsage == wgpu::BufferUsage::None) {
mLastUsage = usage;
return;
return false;
}
*srcStages |= VulkanPipelineStage(mLastUsage);
*dstStages |= VulkanPipelineStage(usage);
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = VulkanAccessFlags(mLastUsage);
barrier.dstAccessMask = VulkanAccessFlags(usage);
barrier.srcQueueFamilyIndex = 0;
barrier.dstQueueFamilyIndex = 0;
barrier.buffer = mHandle;
barrier.offset = 0;
barrier.size = GetSize();
bufferBarriers->push_back(barrier);
barrier->sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier->pNext = nullptr;
barrier->srcAccessMask = VulkanAccessFlags(mLastUsage);
barrier->dstAccessMask = VulkanAccessFlags(usage);
barrier->srcQueueFamilyIndex = 0;
barrier->dstQueueFamilyIndex = 0;
barrier->buffer = mHandle;
barrier->offset = 0;
barrier->size = GetSize();
mLastUsage = usage;
return true;
}
bool Buffer::IsCPUWritableAtCreation() const {

View File

@ -37,9 +37,8 @@ namespace dawn_native { namespace vulkan {
// `commands`.
// TODO(cwallez@chromium.org): coalesce barriers and do them early when possible.
void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage);
void TransitionUsageNow(CommandRecordingContext* recordingContext,
wgpu::BufferUsage usage,
std::vector<VkBufferMemoryBarrier>* bufferBarriers,
bool TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage,
VkBufferMemoryBarrier* barrier,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages);

View File

@ -146,43 +146,59 @@ namespace dawn_native { namespace vulkan {
mDirtyBindGroupsObjectChangedOrIsDynamic, mBindGroups,
mDynamicOffsetCounts, mDynamicOffsets);
// TODO(jiawei.shao@intel.com): combine the following barriers in one
// vkCmdPipelineBarrier() call.
std::vector<VkBufferMemoryBarrier> bufferBarriers;
std::vector<VkImageMemoryBarrier> imageBarriers;
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
for (BindGroupIndex index : IterateBitSet(mBindGroupLayoutsMask)) {
BindGroupLayoutBase* layout = mBindGroups[index]->GetLayout();
for (BindingIndex binding{0}; binding < layout->GetBindingCount(); ++binding) {
switch (layout->GetBindingInfo(binding).type) {
case wgpu::BindingType::StorageBuffer:
case wgpu::BindingType::ReadonlyStorageBuffer:
ToBackend(
mBindGroups[index]->GetBindingAsBufferBinding(binding).buffer)
->TransitionUsageNow(recordingContext,
wgpu::BufferUsage::Storage);
case wgpu::BindingType::ReadonlyStorageBuffer: {
VkBufferMemoryBarrier bufferBarrier;
if (ToBackend(mBindGroups[index]
->GetBindingAsBufferBinding(binding)
.buffer)
->TransitionUsageAndGetResourceBarrier(
wgpu::BufferUsage::Storage, &bufferBarrier, &srcStages,
&dstStages)) {
bufferBarriers.push_back(bufferBarrier);
}
break;
}
case wgpu::BindingType::ReadonlyStorageTexture:
case wgpu::BindingType::WriteonlyStorageTexture: {
TextureViewBase* view =
mBindGroups[index]->GetBindingAsTextureView(binding);
ToBackend(view->GetTexture())
->TransitionUsageNow(recordingContext,
wgpu::TextureUsage::Storage,
view->GetSubresourceRange());
->TransitionUsageAndGetResourceBarrier(
wgpu::TextureUsage::Storage, view->GetSubresourceRange(),
&imageBarriers, &srcStages, &dstStages);
break;
}
case wgpu::BindingType::UniformBuffer:
ToBackend(
mBindGroups[index]->GetBindingAsBufferBinding(binding).buffer)
->TransitionUsageNow(recordingContext,
wgpu::BufferUsage::Uniform);
case wgpu::BindingType::UniformBuffer: {
VkBufferMemoryBarrier bufferBarrier;
if (ToBackend(mBindGroups[index]
->GetBindingAsBufferBinding(binding)
.buffer)
->TransitionUsageAndGetResourceBarrier(
wgpu::BufferUsage::Uniform, &bufferBarrier, &srcStages,
&dstStages)) {
bufferBarriers.push_back(bufferBarrier);
}
break;
}
case wgpu::BindingType::SampledTexture: {
TextureViewBase* view =
mBindGroups[index]->GetBindingAsTextureView(binding);
ToBackend(view->GetTexture())
->TransitionUsageNow(recordingContext,
wgpu::TextureUsage::Sampled,
view->GetSubresourceRange());
->TransitionUsageAndGetResourceBarrier(
wgpu::TextureUsage::Sampled, view->GetSubresourceRange(),
&imageBarriers, &srcStages, &dstStages);
break;
}
@ -200,6 +216,15 @@ namespace dawn_native { namespace vulkan {
}
}
}
if (!bufferBarriers.empty() || !imageBarriers.empty()) {
ASSERT(srcStages != 0 && dstStages != 0);
device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages,
dstStages, 0, 0, nullptr, bufferBarriers.size(),
bufferBarriers.data(), imageBarriers.size(),
imageBarriers.data());
}
DidApply();
}
};
@ -459,8 +484,12 @@ namespace dawn_native { namespace vulkan {
for (size_t i = 0; i < usages.buffers.size(); ++i) {
Buffer* buffer = ToBackend(usages.buffers[i]);
buffer->EnsureDataInitialized(recordingContext);
buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i],
&bufferBarriers, &srcStages, &dstStages);
VkBufferMemoryBarrier bufferBarrier;
if (buffer->TransitionUsageAndGetResourceBarrier(
usages.bufferUsages[i], &bufferBarrier, &srcStages, &dstStages)) {
bufferBarriers.push_back(bufferBarrier);
}
}
for (size_t i = 0; i < usages.textures.size(); ++i) {

View File

@ -818,6 +818,32 @@ namespace dawn_native { namespace vulkan {
wgpu::TextureUsage usage,
const SubresourceRange& range) {
std::vector<VkImageMemoryBarrier> barriers;
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
TransitionUsageAndGetResourceBarrier(usage, range, &barriers, &srcStages, &dstStages);
if (mExternalState != ExternalState::InternalOnly) {
TweakTransitionForExternalUsage(recordingContext, &barriers, 0);
}
if (!barriers.empty()) {
ASSERT(srcStages != 0 && dstStages != 0);
ToBackend(GetDevice())
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
nullptr, 0, nullptr, barriers.size(), barriers.data());
}
}
void Texture::TransitionUsageAndGetResourceBarrier(
wgpu::TextureUsage usage,
const SubresourceRange& range,
std::vector<VkImageMemoryBarrier>* imageBarriers,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages) {
ASSERT(imageBarriers != nullptr);
const Format& format = GetFormat();
wgpu::TextureUsage allLastUsages = wgpu::TextureUsage::None;
@ -837,7 +863,7 @@ namespace dawn_native { namespace vulkan {
if (CanReuseWithoutBarrier(mSubresourceLastUsages[0], usage)) {
return;
}
barriers.push_back(
imageBarriers->push_back(
BuildMemoryBarrier(format, mHandle, mSubresourceLastUsages[0], usage, range));
allLastUsages = mSubresourceLastUsages[0];
for (uint32_t i = 0; i < GetSubresourceCount(); ++i) {
@ -868,22 +894,15 @@ namespace dawn_native { namespace vulkan {
mSubresourceLastUsages[index] = usage;
}
barriers.push_back(BuildMemoryBarrier(
imageBarriers->push_back(BuildMemoryBarrier(
format, mHandle, lastUsage, usage,
SubresourceRange::SingleMipAndLayer(level, layer, format.aspects)));
}
}
}
if (mExternalState != ExternalState::InternalOnly) {
TweakTransitionForExternalUsage(recordingContext, &barriers, 0);
}
VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format);
VkPipelineStageFlags dstStages = VulkanPipelineStage(usage, format);
ToBackend(GetDevice())
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
nullptr, 0, nullptr, barriers.size(), barriers.data());
*srcStages |= VulkanPipelineStage(allLastUsages, format);
*dstStages |= VulkanPipelineStage(usage, format);
mSameLastUsagesAcrossSubresources = areAllSubresourcesCovered;
}

View File

@ -70,6 +70,11 @@ namespace dawn_native { namespace vulkan {
void TransitionUsageNow(CommandRecordingContext* recordingContext,
wgpu::TextureUsage usage,
const SubresourceRange& range);
void TransitionUsageAndGetResourceBarrier(wgpu::TextureUsage usage,
const SubresourceRange& range,
std::vector<VkImageMemoryBarrier>* imageBarriers,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages);
void TransitionUsageForPass(CommandRecordingContext* recordingContext,
const PassTextureUsage& textureUsages,
std::vector<VkImageMemoryBarrier>* imageBarriers,