Vulkan: Combine all pass barriers in a single call

This patch combines barriers in a render/compute pass into one
function call.

Previously, we need to dispatch barrier(s) for each buffer/texture
in a pass. So we may need quite a lot function calls to deliver
barriers in a pass in real web applications. One example is that
we did see that too many function calls to deliver barriers in
Aquarium (WebGPU porting) contributed to CPU usage and bottleneck.

Bug: dawn:441

Change-Id: Ibe44967fefd2e1e6e64df4587146c4fb7fbe8e73
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/22700
Commit-Queue: Yunchao He <yunchao.he@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
This commit is contained in:
Yunchao He 2020-06-08 22:27:18 +00:00 committed by Commit Bot service account
parent f5657af110
commit 0a79bee135
5 changed files with 81 additions and 31 deletions

View File

@ -186,6 +186,25 @@ namespace dawn_native { namespace vulkan {
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
wgpu::BufferUsage usage) { wgpu::BufferUsage usage) {
std::vector<VkBufferMemoryBarrier> barriers;
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
TransitionUsageNow(recordingContext, usage, &barriers, &srcStages, &dstStages);
if (barriers.size() > 0) {
ASSERT(barriers.size() == 1);
ToBackend(GetDevice())
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
nullptr, barriers.size(), barriers.data(), 0, nullptr);
}
}
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
wgpu::BufferUsage usage,
std::vector<VkBufferMemoryBarrier>* bufferBarriers,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages) {
bool lastIncludesTarget = (mLastUsage & usage) == usage; bool lastIncludesTarget = (mLastUsage & usage) == usage;
bool lastReadOnly = (mLastUsage & kReadOnlyBufferUsages) == mLastUsage; bool lastReadOnly = (mLastUsage & kReadOnlyBufferUsages) == mLastUsage;
@ -200,8 +219,8 @@ namespace dawn_native { namespace vulkan {
return; return;
} }
VkPipelineStageFlags srcStages = VulkanPipelineStage(mLastUsage); *srcStages |= VulkanPipelineStage(mLastUsage);
VkPipelineStageFlags dstStages = VulkanPipelineStage(usage); *dstStages |= VulkanPipelineStage(usage);
VkBufferMemoryBarrier barrier; VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@ -214,9 +233,7 @@ namespace dawn_native { namespace vulkan {
barrier.offset = 0; barrier.offset = 0;
barrier.size = GetSize(); barrier.size = GetSize();
ToBackend(GetDevice()) bufferBarriers->push_back(barrier);
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
nullptr, 1, &barrier, 0, nullptr);
mLastUsage = usage; mLastUsage = usage;
} }

View File

@ -39,6 +39,11 @@ namespace dawn_native { namespace vulkan {
// `commands`. // `commands`.
// TODO(cwallez@chromium.org): coalesce barriers and do them early when possible. // TODO(cwallez@chromium.org): coalesce barriers and do them early when possible.
void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage); void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage);
void TransitionUsageNow(CommandRecordingContext* recordingContext,
wgpu::BufferUsage usage,
std::vector<VkBufferMemoryBarrier>* bufferBarriers,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages);
private: private:
~Buffer() override; ~Buffer() override;

View File

@ -376,12 +376,19 @@ namespace dawn_native { namespace vulkan {
VkCommandBuffer commands = recordingContext->commandBuffer; VkCommandBuffer commands = recordingContext->commandBuffer;
// Records the necessary barriers for the resource usage pre-computed by the frontend // Records the necessary barriers for the resource usage pre-computed by the frontend
auto TransitionForPass = [](CommandRecordingContext* recordingContext, auto TransitionForPass = [](Device* device, CommandRecordingContext* recordingContext,
const PassResourceUsage& usages) { const PassResourceUsage& usages) {
std::vector<VkBufferMemoryBarrier> bufferBarriers;
std::vector<VkImageMemoryBarrier> imageBarriers;
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
for (size_t i = 0; i < usages.buffers.size(); ++i) { for (size_t i = 0; i < usages.buffers.size(); ++i) {
Buffer* buffer = ToBackend(usages.buffers[i]); Buffer* buffer = ToBackend(usages.buffers[i]);
buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i]); buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i],
&bufferBarriers, &srcStages, &dstStages);
} }
for (size_t i = 0; i < usages.textures.size(); ++i) { for (size_t i = 0; i < usages.textures.size(); ++i) {
Texture* texture = ToBackend(usages.textures[i]); Texture* texture = ToBackend(usages.textures[i]);
// Clear textures that are not output attachments. Output attachments will be // Clear textures that are not output attachments. Output attachments will be
@ -393,9 +400,18 @@ namespace dawn_native { namespace vulkan {
texture->GetArrayLayers()); texture->GetArrayLayers());
} }
texture->TransitionUsageForPass(recordingContext, texture->TransitionUsageForPass(recordingContext,
usages.textureUsages[i].subresourceUsages); usages.textureUsages[i].subresourceUsages,
&imageBarriers, &srcStages, &dstStages);
}
if (bufferBarriers.size() || imageBarriers.size()) {
device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages,
0, 0, nullptr, bufferBarriers.size(),
bufferBarriers.data(), imageBarriers.size(),
imageBarriers.data());
} }
}; };
const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass; const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
size_t nextPassNumber = 0; size_t nextPassNumber = 0;
@ -562,7 +578,7 @@ namespace dawn_native { namespace vulkan {
case Command::BeginRenderPass: { case Command::BeginRenderPass: {
BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>(); BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();
TransitionForPass(recordingContext, passResourceUsages[nextPassNumber]); TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
LazyClearRenderPassAttachments(cmd); LazyClearRenderPassAttachments(cmd);
DAWN_TRY(RecordRenderPass(recordingContext, cmd)); DAWN_TRY(RecordRenderPass(recordingContext, cmd));
@ -574,7 +590,7 @@ namespace dawn_native { namespace vulkan {
case Command::BeginComputePass: { case Command::BeginComputePass: {
mCommands.NextCommand<BeginComputePassCmd>(); mCommands.NextCommand<BeginComputePassCmd>();
TransitionForPass(recordingContext, passResourceUsages[nextPassNumber]); TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
RecordComputePass(recordingContext); RecordComputePass(recordingContext);
nextPassNumber++; nextPassNumber++;

View File

@ -670,34 +670,41 @@ namespace dawn_native { namespace vulkan {
} }
void Texture::TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext, void Texture::TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
std::vector<VkImageMemoryBarrier>* barriers) { std::vector<VkImageMemoryBarrier>* barriers,
size_t transitionBarrierStart) {
ASSERT(GetNumMipLevels() == 1 && GetArrayLayers() == 1); ASSERT(GetNumMipLevels() == 1 && GetArrayLayers() == 1);
ASSERT(barriers->size() <= 1);
// transitionBarrierStart specify the index where barriers for current transition start in
// the vector. barriers->size() - transitionBarrierStart is the number of barriers that we
// have already added into the vector during current transition.
ASSERT(barriers->size() - transitionBarrierStart <= 1);
if (mExternalState == ExternalState::PendingAcquire) { if (mExternalState == ExternalState::PendingAcquire) {
if (!barriers->size()) { if (barriers->size() == transitionBarrierStart) {
barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle, barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
wgpu::TextureUsage::None, wgpu::TextureUsage::None,
wgpu::TextureUsage::None, 0, 0)); wgpu::TextureUsage::None, 0, 0));
} }
// Transfer texture from external queue to graphics queue // Transfer texture from external queue to graphics queue
(*barriers)[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR; (*barriers)[transitionBarrierStart].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
(*barriers)[0].dstQueueFamilyIndex = ToBackend(GetDevice())->GetGraphicsQueueFamily(); (*barriers)[transitionBarrierStart].dstQueueFamilyIndex =
ToBackend(GetDevice())->GetGraphicsQueueFamily();
// Don't override oldLayout to leave it as VK_IMAGE_LAYOUT_UNDEFINED // Don't override oldLayout to leave it as VK_IMAGE_LAYOUT_UNDEFINED
// TODO(http://crbug.com/dawn/200) // TODO(http://crbug.com/dawn/200)
mExternalState = ExternalState::Acquired; mExternalState = ExternalState::Acquired;
} else if (mExternalState == ExternalState::PendingRelease) { } else if (mExternalState == ExternalState::PendingRelease) {
if (!barriers->size()) { if (barriers->size() == transitionBarrierStart) {
barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle, barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
wgpu::TextureUsage::None, wgpu::TextureUsage::None,
wgpu::TextureUsage::None, 0, 0)); wgpu::TextureUsage::None, 0, 0));
} }
// Transfer texture from graphics queue to external queue // Transfer texture from graphics queue to external queue
(*barriers)[0].srcQueueFamilyIndex = ToBackend(GetDevice())->GetGraphicsQueueFamily(); (*barriers)[transitionBarrierStart].srcQueueFamilyIndex =
(*barriers)[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR; ToBackend(GetDevice())->GetGraphicsQueueFamily();
(*barriers)[0].newLayout = VK_IMAGE_LAYOUT_GENERAL; (*barriers)[transitionBarrierStart].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
(*barriers)[transitionBarrierStart].newLayout = VK_IMAGE_LAYOUT_GENERAL;
mExternalState = ExternalState::Released; mExternalState = ExternalState::Released;
} }
@ -714,8 +721,11 @@ namespace dawn_native { namespace vulkan {
} }
void Texture::TransitionUsageForPass(CommandRecordingContext* recordingContext, void Texture::TransitionUsageForPass(CommandRecordingContext* recordingContext,
const std::vector<wgpu::TextureUsage>& subresourceUsages) { const std::vector<wgpu::TextureUsage>& subresourceUsages,
std::vector<VkImageMemoryBarrier> barriers; std::vector<VkImageMemoryBarrier>* imageBarriers,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages) {
size_t transitionBarrierStart = imageBarriers->size();
const Format& format = GetFormat(); const Format& format = GetFormat();
wgpu::TextureUsage allUsages = wgpu::TextureUsage::None; wgpu::TextureUsage allUsages = wgpu::TextureUsage::None;
@ -740,7 +750,7 @@ namespace dawn_native { namespace vulkan {
continue; continue;
} }
barriers.push_back( imageBarriers->push_back(
BuildMemoryBarrier(format, mHandle, mLastSubresourceUsages[index], BuildMemoryBarrier(format, mHandle, mLastSubresourceUsages[index],
subresourceUsages[index], mipLevel, arrayLayer)); subresourceUsages[index], mipLevel, arrayLayer));
@ -751,14 +761,12 @@ namespace dawn_native { namespace vulkan {
} }
if (mExternalState != ExternalState::InternalOnly) { if (mExternalState != ExternalState::InternalOnly) {
TweakTransitionForExternalUsage(recordingContext, &barriers); TweakTransitionForExternalUsage(recordingContext, imageBarriers,
transitionBarrierStart);
} }
VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format); *srcStages |= VulkanPipelineStage(allLastUsages, format);
VkPipelineStageFlags dstStages = VulkanPipelineStage(allUsages, format); *dstStages |= VulkanPipelineStage(allUsages, format);
ToBackend(GetDevice())
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
nullptr, 0, nullptr, barriers.size(), barriers.data());
} }
void Texture::TransitionUsageNow(CommandRecordingContext* recordingContext, void Texture::TransitionUsageNow(CommandRecordingContext* recordingContext,
@ -796,7 +804,7 @@ namespace dawn_native { namespace vulkan {
} }
if (mExternalState != ExternalState::InternalOnly) { if (mExternalState != ExternalState::InternalOnly) {
TweakTransitionForExternalUsage(recordingContext, &barriers); TweakTransitionForExternalUsage(recordingContext, &barriers, 0);
} }
VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format); VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format);

View File

@ -73,7 +73,10 @@ namespace dawn_native { namespace vulkan {
uint32_t baseArrayLayer, uint32_t baseArrayLayer,
uint32_t layerCount); uint32_t layerCount);
void TransitionUsageForPass(CommandRecordingContext* recordingContext, void TransitionUsageForPass(CommandRecordingContext* recordingContext,
const std::vector<wgpu::TextureUsage>& subresourceUsages); const std::vector<wgpu::TextureUsage>& subresourceUsages,
std::vector<VkImageMemoryBarrier>* imageBarriers,
VkPipelineStageFlags* srcStages,
VkPipelineStageFlags* dstStages);
void EnsureSubresourceContentInitialized(CommandRecordingContext* recordingContext, void EnsureSubresourceContentInitialized(CommandRecordingContext* recordingContext,
uint32_t baseMipLevel, uint32_t baseMipLevel,
@ -107,7 +110,8 @@ namespace dawn_native { namespace vulkan {
TextureBase::ClearValue); TextureBase::ClearValue);
void TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext, void TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
std::vector<VkImageMemoryBarrier>* barriers); std::vector<VkImageMemoryBarrier>* barriers,
size_t transitionBarrierStart);
VkImage mHandle = VK_NULL_HANDLE; VkImage mHandle = VK_NULL_HANDLE;
ResourceMemoryAllocation mMemoryAllocation; ResourceMemoryAllocation mMemoryAllocation;