From 0a79bee135cd916e5d85495c47f4f77746e1c5f6 Mon Sep 17 00:00:00 2001 From: Yunchao He Date: Mon, 8 Jun 2020 22:27:18 +0000 Subject: [PATCH] Vulkan: Combine all pass barriers in a single call This patch combines barriers in a render/compute pass into one function call. Previously, we need to dispatch barrier(s) for each buffer/texture in a pass. So we may need quite a lot function calls to deliver barriers in a pass in real web applications. One example is that we did see that too many function calls to deliver barriers in Aquarium (WebGPU porting) contributed to CPU usage and bottleneck. Bug: dawn:441 Change-Id: Ibe44967fefd2e1e6e64df4587146c4fb7fbe8e73 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/22700 Commit-Queue: Yunchao He Reviewed-by: Austin Eng --- src/dawn_native/vulkan/BufferVk.cpp | 27 ++++++++++--- src/dawn_native/vulkan/BufferVk.h | 5 +++ src/dawn_native/vulkan/CommandBufferVk.cpp | 26 +++++++++--- src/dawn_native/vulkan/TextureVk.cpp | 46 +++++++++++++--------- src/dawn_native/vulkan/TextureVk.h | 8 +++- 5 files changed, 81 insertions(+), 31 deletions(-) diff --git a/src/dawn_native/vulkan/BufferVk.cpp b/src/dawn_native/vulkan/BufferVk.cpp index 0b4584bbdf..6bd2987051 100644 --- a/src/dawn_native/vulkan/BufferVk.cpp +++ b/src/dawn_native/vulkan/BufferVk.cpp @@ -186,6 +186,25 @@ namespace dawn_native { namespace vulkan { void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage) { + std::vector barriers; + VkPipelineStageFlags srcStages = 0; + VkPipelineStageFlags dstStages = 0; + + TransitionUsageNow(recordingContext, usage, &barriers, &srcStages, &dstStages); + + if (barriers.size() > 0) { + ASSERT(barriers.size() == 1); + ToBackend(GetDevice()) + ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, + nullptr, barriers.size(), barriers.data(), 0, nullptr); + } + } + + void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, + wgpu::BufferUsage usage, + std::vector* bufferBarriers, + VkPipelineStageFlags* srcStages, + VkPipelineStageFlags* dstStages) { bool lastIncludesTarget = (mLastUsage & usage) == usage; bool lastReadOnly = (mLastUsage & kReadOnlyBufferUsages) == mLastUsage; @@ -200,8 +219,8 @@ namespace dawn_native { namespace vulkan { return; } - VkPipelineStageFlags srcStages = VulkanPipelineStage(mLastUsage); - VkPipelineStageFlags dstStages = VulkanPipelineStage(usage); + *srcStages |= VulkanPipelineStage(mLastUsage); + *dstStages |= VulkanPipelineStage(usage); VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -214,9 +233,7 @@ namespace dawn_native { namespace vulkan { barrier.offset = 0; barrier.size = GetSize(); - ToBackend(GetDevice()) - ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, - nullptr, 1, &barrier, 0, nullptr); + bufferBarriers->push_back(barrier); mLastUsage = usage; } diff --git a/src/dawn_native/vulkan/BufferVk.h b/src/dawn_native/vulkan/BufferVk.h index 021e4abc92..93669b61e8 100644 --- a/src/dawn_native/vulkan/BufferVk.h +++ b/src/dawn_native/vulkan/BufferVk.h @@ -39,6 +39,11 @@ namespace dawn_native { namespace vulkan { // `commands`. // TODO(cwallez@chromium.org): coalesce barriers and do them early when possible. void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage); + void TransitionUsageNow(CommandRecordingContext* recordingContext, + wgpu::BufferUsage usage, + std::vector* bufferBarriers, + VkPipelineStageFlags* srcStages, + VkPipelineStageFlags* dstStages); private: ~Buffer() override; diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp index c220176e2c..54c06860a3 100644 --- a/src/dawn_native/vulkan/CommandBufferVk.cpp +++ b/src/dawn_native/vulkan/CommandBufferVk.cpp @@ -376,12 +376,19 @@ namespace dawn_native { namespace vulkan { VkCommandBuffer commands = recordingContext->commandBuffer; // Records the necessary barriers for the resource usage pre-computed by the frontend - auto TransitionForPass = [](CommandRecordingContext* recordingContext, + auto TransitionForPass = [](Device* device, CommandRecordingContext* recordingContext, const PassResourceUsage& usages) { + std::vector bufferBarriers; + std::vector imageBarriers; + VkPipelineStageFlags srcStages = 0; + VkPipelineStageFlags dstStages = 0; + for (size_t i = 0; i < usages.buffers.size(); ++i) { Buffer* buffer = ToBackend(usages.buffers[i]); - buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i]); + buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i], + &bufferBarriers, &srcStages, &dstStages); } + for (size_t i = 0; i < usages.textures.size(); ++i) { Texture* texture = ToBackend(usages.textures[i]); // Clear textures that are not output attachments. Output attachments will be @@ -393,9 +400,18 @@ namespace dawn_native { namespace vulkan { texture->GetArrayLayers()); } texture->TransitionUsageForPass(recordingContext, - usages.textureUsages[i].subresourceUsages); + usages.textureUsages[i].subresourceUsages, + &imageBarriers, &srcStages, &dstStages); + } + + if (bufferBarriers.size() || imageBarriers.size()) { + device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, + 0, 0, nullptr, bufferBarriers.size(), + bufferBarriers.data(), imageBarriers.size(), + imageBarriers.data()); } }; + const std::vector& passResourceUsages = GetResourceUsages().perPass; size_t nextPassNumber = 0; @@ -562,7 +578,7 @@ namespace dawn_native { namespace vulkan { case Command::BeginRenderPass: { BeginRenderPassCmd* cmd = mCommands.NextCommand(); - TransitionForPass(recordingContext, passResourceUsages[nextPassNumber]); + TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]); LazyClearRenderPassAttachments(cmd); DAWN_TRY(RecordRenderPass(recordingContext, cmd)); @@ -574,7 +590,7 @@ namespace dawn_native { namespace vulkan { case Command::BeginComputePass: { mCommands.NextCommand(); - TransitionForPass(recordingContext, passResourceUsages[nextPassNumber]); + TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]); RecordComputePass(recordingContext); nextPassNumber++; diff --git a/src/dawn_native/vulkan/TextureVk.cpp b/src/dawn_native/vulkan/TextureVk.cpp index c9ad0eb653..15d713750a 100644 --- a/src/dawn_native/vulkan/TextureVk.cpp +++ b/src/dawn_native/vulkan/TextureVk.cpp @@ -670,34 +670,41 @@ namespace dawn_native { namespace vulkan { } void Texture::TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext, - std::vector* barriers) { + std::vector* barriers, + size_t transitionBarrierStart) { ASSERT(GetNumMipLevels() == 1 && GetArrayLayers() == 1); - ASSERT(barriers->size() <= 1); + + // transitionBarrierStart specify the index where barriers for current transition start in + // the vector. barriers->size() - transitionBarrierStart is the number of barriers that we + // have already added into the vector during current transition. + ASSERT(barriers->size() - transitionBarrierStart <= 1); if (mExternalState == ExternalState::PendingAcquire) { - if (!barriers->size()) { + if (barriers->size() == transitionBarrierStart) { barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle, wgpu::TextureUsage::None, wgpu::TextureUsage::None, 0, 0)); } // Transfer texture from external queue to graphics queue - (*barriers)[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR; - (*barriers)[0].dstQueueFamilyIndex = ToBackend(GetDevice())->GetGraphicsQueueFamily(); + (*barriers)[transitionBarrierStart].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR; + (*barriers)[transitionBarrierStart].dstQueueFamilyIndex = + ToBackend(GetDevice())->GetGraphicsQueueFamily(); // Don't override oldLayout to leave it as VK_IMAGE_LAYOUT_UNDEFINED // TODO(http://crbug.com/dawn/200) mExternalState = ExternalState::Acquired; } else if (mExternalState == ExternalState::PendingRelease) { - if (!barriers->size()) { + if (barriers->size() == transitionBarrierStart) { barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle, wgpu::TextureUsage::None, wgpu::TextureUsage::None, 0, 0)); } // Transfer texture from graphics queue to external queue - (*barriers)[0].srcQueueFamilyIndex = ToBackend(GetDevice())->GetGraphicsQueueFamily(); - (*barriers)[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR; - (*barriers)[0].newLayout = VK_IMAGE_LAYOUT_GENERAL; + (*barriers)[transitionBarrierStart].srcQueueFamilyIndex = + ToBackend(GetDevice())->GetGraphicsQueueFamily(); + (*barriers)[transitionBarrierStart].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR; + (*barriers)[transitionBarrierStart].newLayout = VK_IMAGE_LAYOUT_GENERAL; mExternalState = ExternalState::Released; } @@ -714,8 +721,11 @@ namespace dawn_native { namespace vulkan { } void Texture::TransitionUsageForPass(CommandRecordingContext* recordingContext, - const std::vector& subresourceUsages) { - std::vector barriers; + const std::vector& subresourceUsages, + std::vector* imageBarriers, + VkPipelineStageFlags* srcStages, + VkPipelineStageFlags* dstStages) { + size_t transitionBarrierStart = imageBarriers->size(); const Format& format = GetFormat(); wgpu::TextureUsage allUsages = wgpu::TextureUsage::None; @@ -740,7 +750,7 @@ namespace dawn_native { namespace vulkan { continue; } - barriers.push_back( + imageBarriers->push_back( BuildMemoryBarrier(format, mHandle, mLastSubresourceUsages[index], subresourceUsages[index], mipLevel, arrayLayer)); @@ -751,14 +761,12 @@ namespace dawn_native { namespace vulkan { } if (mExternalState != ExternalState::InternalOnly) { - TweakTransitionForExternalUsage(recordingContext, &barriers); + TweakTransitionForExternalUsage(recordingContext, imageBarriers, + transitionBarrierStart); } - VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format); - VkPipelineStageFlags dstStages = VulkanPipelineStage(allUsages, format); - ToBackend(GetDevice()) - ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, - nullptr, 0, nullptr, barriers.size(), barriers.data()); + *srcStages |= VulkanPipelineStage(allLastUsages, format); + *dstStages |= VulkanPipelineStage(allUsages, format); } void Texture::TransitionUsageNow(CommandRecordingContext* recordingContext, @@ -796,7 +804,7 @@ namespace dawn_native { namespace vulkan { } if (mExternalState != ExternalState::InternalOnly) { - TweakTransitionForExternalUsage(recordingContext, &barriers); + TweakTransitionForExternalUsage(recordingContext, &barriers, 0); } VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format); diff --git a/src/dawn_native/vulkan/TextureVk.h b/src/dawn_native/vulkan/TextureVk.h index 1ac37c4992..a1df372a42 100644 --- a/src/dawn_native/vulkan/TextureVk.h +++ b/src/dawn_native/vulkan/TextureVk.h @@ -73,7 +73,10 @@ namespace dawn_native { namespace vulkan { uint32_t baseArrayLayer, uint32_t layerCount); void TransitionUsageForPass(CommandRecordingContext* recordingContext, - const std::vector& subresourceUsages); + const std::vector& subresourceUsages, + std::vector* imageBarriers, + VkPipelineStageFlags* srcStages, + VkPipelineStageFlags* dstStages); void EnsureSubresourceContentInitialized(CommandRecordingContext* recordingContext, uint32_t baseMipLevel, @@ -107,7 +110,8 @@ namespace dawn_native { namespace vulkan { TextureBase::ClearValue); void TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext, - std::vector* barriers); + std::vector* barriers, + size_t transitionBarrierStart); VkImage mHandle = VK_NULL_HANDLE; ResourceMemoryAllocation mMemoryAllocation;