Vulkan: Combine all pass barriers in a single call
This patch combines barriers in a render/compute pass into one function call. Previously, we need to dispatch barrier(s) for each buffer/texture in a pass. So we may need quite a lot function calls to deliver barriers in a pass in real web applications. One example is that we did see that too many function calls to deliver barriers in Aquarium (WebGPU porting) contributed to CPU usage and bottleneck. Bug: dawn:441 Change-Id: Ibe44967fefd2e1e6e64df4587146c4fb7fbe8e73 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/22700 Commit-Queue: Yunchao He <yunchao.he@intel.com> Reviewed-by: Austin Eng <enga@chromium.org>
This commit is contained in:
parent
f5657af110
commit
0a79bee135
|
@ -186,6 +186,25 @@ namespace dawn_native { namespace vulkan {
|
|||
|
||||
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
|
||||
wgpu::BufferUsage usage) {
|
||||
std::vector<VkBufferMemoryBarrier> barriers;
|
||||
VkPipelineStageFlags srcStages = 0;
|
||||
VkPipelineStageFlags dstStages = 0;
|
||||
|
||||
TransitionUsageNow(recordingContext, usage, &barriers, &srcStages, &dstStages);
|
||||
|
||||
if (barriers.size() > 0) {
|
||||
ASSERT(barriers.size() == 1);
|
||||
ToBackend(GetDevice())
|
||||
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
|
||||
nullptr, barriers.size(), barriers.data(), 0, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
|
||||
wgpu::BufferUsage usage,
|
||||
std::vector<VkBufferMemoryBarrier>* bufferBarriers,
|
||||
VkPipelineStageFlags* srcStages,
|
||||
VkPipelineStageFlags* dstStages) {
|
||||
bool lastIncludesTarget = (mLastUsage & usage) == usage;
|
||||
bool lastReadOnly = (mLastUsage & kReadOnlyBufferUsages) == mLastUsage;
|
||||
|
||||
|
@ -200,8 +219,8 @@ namespace dawn_native { namespace vulkan {
|
|||
return;
|
||||
}
|
||||
|
||||
VkPipelineStageFlags srcStages = VulkanPipelineStage(mLastUsage);
|
||||
VkPipelineStageFlags dstStages = VulkanPipelineStage(usage);
|
||||
*srcStages |= VulkanPipelineStage(mLastUsage);
|
||||
*dstStages |= VulkanPipelineStage(usage);
|
||||
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
|
@ -214,9 +233,7 @@ namespace dawn_native { namespace vulkan {
|
|||
barrier.offset = 0;
|
||||
barrier.size = GetSize();
|
||||
|
||||
ToBackend(GetDevice())
|
||||
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
|
||||
nullptr, 1, &barrier, 0, nullptr);
|
||||
bufferBarriers->push_back(barrier);
|
||||
|
||||
mLastUsage = usage;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,11 @@ namespace dawn_native { namespace vulkan {
|
|||
// `commands`.
|
||||
// TODO(cwallez@chromium.org): coalesce barriers and do them early when possible.
|
||||
void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage);
|
||||
void TransitionUsageNow(CommandRecordingContext* recordingContext,
|
||||
wgpu::BufferUsage usage,
|
||||
std::vector<VkBufferMemoryBarrier>* bufferBarriers,
|
||||
VkPipelineStageFlags* srcStages,
|
||||
VkPipelineStageFlags* dstStages);
|
||||
|
||||
private:
|
||||
~Buffer() override;
|
||||
|
|
|
@ -376,12 +376,19 @@ namespace dawn_native { namespace vulkan {
|
|||
VkCommandBuffer commands = recordingContext->commandBuffer;
|
||||
|
||||
// Records the necessary barriers for the resource usage pre-computed by the frontend
|
||||
auto TransitionForPass = [](CommandRecordingContext* recordingContext,
|
||||
auto TransitionForPass = [](Device* device, CommandRecordingContext* recordingContext,
|
||||
const PassResourceUsage& usages) {
|
||||
std::vector<VkBufferMemoryBarrier> bufferBarriers;
|
||||
std::vector<VkImageMemoryBarrier> imageBarriers;
|
||||
VkPipelineStageFlags srcStages = 0;
|
||||
VkPipelineStageFlags dstStages = 0;
|
||||
|
||||
for (size_t i = 0; i < usages.buffers.size(); ++i) {
|
||||
Buffer* buffer = ToBackend(usages.buffers[i]);
|
||||
buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i]);
|
||||
buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i],
|
||||
&bufferBarriers, &srcStages, &dstStages);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < usages.textures.size(); ++i) {
|
||||
Texture* texture = ToBackend(usages.textures[i]);
|
||||
// Clear textures that are not output attachments. Output attachments will be
|
||||
|
@ -393,9 +400,18 @@ namespace dawn_native { namespace vulkan {
|
|||
texture->GetArrayLayers());
|
||||
}
|
||||
texture->TransitionUsageForPass(recordingContext,
|
||||
usages.textureUsages[i].subresourceUsages);
|
||||
usages.textureUsages[i].subresourceUsages,
|
||||
&imageBarriers, &srcStages, &dstStages);
|
||||
}
|
||||
|
||||
if (bufferBarriers.size() || imageBarriers.size()) {
|
||||
device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages,
|
||||
0, 0, nullptr, bufferBarriers.size(),
|
||||
bufferBarriers.data(), imageBarriers.size(),
|
||||
imageBarriers.data());
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
|
||||
size_t nextPassNumber = 0;
|
||||
|
||||
|
@ -562,7 +578,7 @@ namespace dawn_native { namespace vulkan {
|
|||
case Command::BeginRenderPass: {
|
||||
BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();
|
||||
|
||||
TransitionForPass(recordingContext, passResourceUsages[nextPassNumber]);
|
||||
TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
|
||||
|
||||
LazyClearRenderPassAttachments(cmd);
|
||||
DAWN_TRY(RecordRenderPass(recordingContext, cmd));
|
||||
|
@ -574,7 +590,7 @@ namespace dawn_native { namespace vulkan {
|
|||
case Command::BeginComputePass: {
|
||||
mCommands.NextCommand<BeginComputePassCmd>();
|
||||
|
||||
TransitionForPass(recordingContext, passResourceUsages[nextPassNumber]);
|
||||
TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
|
||||
RecordComputePass(recordingContext);
|
||||
|
||||
nextPassNumber++;
|
||||
|
|
|
@ -670,34 +670,41 @@ namespace dawn_native { namespace vulkan {
|
|||
}
|
||||
|
||||
void Texture::TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
|
||||
std::vector<VkImageMemoryBarrier>* barriers) {
|
||||
std::vector<VkImageMemoryBarrier>* barriers,
|
||||
size_t transitionBarrierStart) {
|
||||
ASSERT(GetNumMipLevels() == 1 && GetArrayLayers() == 1);
|
||||
ASSERT(barriers->size() <= 1);
|
||||
|
||||
// transitionBarrierStart specify the index where barriers for current transition start in
|
||||
// the vector. barriers->size() - transitionBarrierStart is the number of barriers that we
|
||||
// have already added into the vector during current transition.
|
||||
ASSERT(barriers->size() - transitionBarrierStart <= 1);
|
||||
|
||||
if (mExternalState == ExternalState::PendingAcquire) {
|
||||
if (!barriers->size()) {
|
||||
if (barriers->size() == transitionBarrierStart) {
|
||||
barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
|
||||
wgpu::TextureUsage::None,
|
||||
wgpu::TextureUsage::None, 0, 0));
|
||||
}
|
||||
|
||||
// Transfer texture from external queue to graphics queue
|
||||
(*barriers)[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
|
||||
(*barriers)[0].dstQueueFamilyIndex = ToBackend(GetDevice())->GetGraphicsQueueFamily();
|
||||
(*barriers)[transitionBarrierStart].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
|
||||
(*barriers)[transitionBarrierStart].dstQueueFamilyIndex =
|
||||
ToBackend(GetDevice())->GetGraphicsQueueFamily();
|
||||
// Don't override oldLayout to leave it as VK_IMAGE_LAYOUT_UNDEFINED
|
||||
// TODO(http://crbug.com/dawn/200)
|
||||
mExternalState = ExternalState::Acquired;
|
||||
} else if (mExternalState == ExternalState::PendingRelease) {
|
||||
if (!barriers->size()) {
|
||||
if (barriers->size() == transitionBarrierStart) {
|
||||
barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
|
||||
wgpu::TextureUsage::None,
|
||||
wgpu::TextureUsage::None, 0, 0));
|
||||
}
|
||||
|
||||
// Transfer texture from graphics queue to external queue
|
||||
(*barriers)[0].srcQueueFamilyIndex = ToBackend(GetDevice())->GetGraphicsQueueFamily();
|
||||
(*barriers)[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
|
||||
(*barriers)[0].newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
(*barriers)[transitionBarrierStart].srcQueueFamilyIndex =
|
||||
ToBackend(GetDevice())->GetGraphicsQueueFamily();
|
||||
(*barriers)[transitionBarrierStart].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
|
||||
(*barriers)[transitionBarrierStart].newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mExternalState = ExternalState::Released;
|
||||
}
|
||||
|
||||
|
@ -714,8 +721,11 @@ namespace dawn_native { namespace vulkan {
|
|||
}
|
||||
|
||||
void Texture::TransitionUsageForPass(CommandRecordingContext* recordingContext,
|
||||
const std::vector<wgpu::TextureUsage>& subresourceUsages) {
|
||||
std::vector<VkImageMemoryBarrier> barriers;
|
||||
const std::vector<wgpu::TextureUsage>& subresourceUsages,
|
||||
std::vector<VkImageMemoryBarrier>* imageBarriers,
|
||||
VkPipelineStageFlags* srcStages,
|
||||
VkPipelineStageFlags* dstStages) {
|
||||
size_t transitionBarrierStart = imageBarriers->size();
|
||||
const Format& format = GetFormat();
|
||||
|
||||
wgpu::TextureUsage allUsages = wgpu::TextureUsage::None;
|
||||
|
@ -740,7 +750,7 @@ namespace dawn_native { namespace vulkan {
|
|||
continue;
|
||||
}
|
||||
|
||||
barriers.push_back(
|
||||
imageBarriers->push_back(
|
||||
BuildMemoryBarrier(format, mHandle, mLastSubresourceUsages[index],
|
||||
subresourceUsages[index], mipLevel, arrayLayer));
|
||||
|
||||
|
@ -751,14 +761,12 @@ namespace dawn_native { namespace vulkan {
|
|||
}
|
||||
|
||||
if (mExternalState != ExternalState::InternalOnly) {
|
||||
TweakTransitionForExternalUsage(recordingContext, &barriers);
|
||||
TweakTransitionForExternalUsage(recordingContext, imageBarriers,
|
||||
transitionBarrierStart);
|
||||
}
|
||||
|
||||
VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format);
|
||||
VkPipelineStageFlags dstStages = VulkanPipelineStage(allUsages, format);
|
||||
ToBackend(GetDevice())
|
||||
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
|
||||
nullptr, 0, nullptr, barriers.size(), barriers.data());
|
||||
*srcStages |= VulkanPipelineStage(allLastUsages, format);
|
||||
*dstStages |= VulkanPipelineStage(allUsages, format);
|
||||
}
|
||||
|
||||
void Texture::TransitionUsageNow(CommandRecordingContext* recordingContext,
|
||||
|
@ -796,7 +804,7 @@ namespace dawn_native { namespace vulkan {
|
|||
}
|
||||
|
||||
if (mExternalState != ExternalState::InternalOnly) {
|
||||
TweakTransitionForExternalUsage(recordingContext, &barriers);
|
||||
TweakTransitionForExternalUsage(recordingContext, &barriers, 0);
|
||||
}
|
||||
|
||||
VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format);
|
||||
|
|
|
@ -73,7 +73,10 @@ namespace dawn_native { namespace vulkan {
|
|||
uint32_t baseArrayLayer,
|
||||
uint32_t layerCount);
|
||||
void TransitionUsageForPass(CommandRecordingContext* recordingContext,
|
||||
const std::vector<wgpu::TextureUsage>& subresourceUsages);
|
||||
const std::vector<wgpu::TextureUsage>& subresourceUsages,
|
||||
std::vector<VkImageMemoryBarrier>* imageBarriers,
|
||||
VkPipelineStageFlags* srcStages,
|
||||
VkPipelineStageFlags* dstStages);
|
||||
|
||||
void EnsureSubresourceContentInitialized(CommandRecordingContext* recordingContext,
|
||||
uint32_t baseMipLevel,
|
||||
|
@ -107,7 +110,8 @@ namespace dawn_native { namespace vulkan {
|
|||
TextureBase::ClearValue);
|
||||
|
||||
void TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
|
||||
std::vector<VkImageMemoryBarrier>* barriers);
|
||||
std::vector<VkImageMemoryBarrier>* barriers,
|
||||
size_t transitionBarrierStart);
|
||||
|
||||
VkImage mHandle = VK_NULL_HANDLE;
|
||||
ResourceMemoryAllocation mMemoryAllocation;
|
||||
|
|
Loading…
Reference in New Issue