Vulkan: Combine all pass barriers in a single call
This patch combines barriers in a render/compute pass into one function call. Previously, we need to dispatch barrier(s) for each buffer/texture in a pass. So we may need quite a lot function calls to deliver barriers in a pass in real web applications. One example is that we did see that too many function calls to deliver barriers in Aquarium (WebGPU porting) contributed to CPU usage and bottleneck. Bug: dawn:441 Change-Id: Ibe44967fefd2e1e6e64df4587146c4fb7fbe8e73 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/22700 Commit-Queue: Yunchao He <yunchao.he@intel.com> Reviewed-by: Austin Eng <enga@chromium.org>
This commit is contained in:
parent
f5657af110
commit
0a79bee135
|
@ -186,6 +186,25 @@ namespace dawn_native { namespace vulkan {
|
||||||
|
|
||||||
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
|
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
|
||||||
wgpu::BufferUsage usage) {
|
wgpu::BufferUsage usage) {
|
||||||
|
std::vector<VkBufferMemoryBarrier> barriers;
|
||||||
|
VkPipelineStageFlags srcStages = 0;
|
||||||
|
VkPipelineStageFlags dstStages = 0;
|
||||||
|
|
||||||
|
TransitionUsageNow(recordingContext, usage, &barriers, &srcStages, &dstStages);
|
||||||
|
|
||||||
|
if (barriers.size() > 0) {
|
||||||
|
ASSERT(barriers.size() == 1);
|
||||||
|
ToBackend(GetDevice())
|
||||||
|
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
|
||||||
|
nullptr, barriers.size(), barriers.data(), 0, nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
|
||||||
|
wgpu::BufferUsage usage,
|
||||||
|
std::vector<VkBufferMemoryBarrier>* bufferBarriers,
|
||||||
|
VkPipelineStageFlags* srcStages,
|
||||||
|
VkPipelineStageFlags* dstStages) {
|
||||||
bool lastIncludesTarget = (mLastUsage & usage) == usage;
|
bool lastIncludesTarget = (mLastUsage & usage) == usage;
|
||||||
bool lastReadOnly = (mLastUsage & kReadOnlyBufferUsages) == mLastUsage;
|
bool lastReadOnly = (mLastUsage & kReadOnlyBufferUsages) == mLastUsage;
|
||||||
|
|
||||||
|
@ -200,8 +219,8 @@ namespace dawn_native { namespace vulkan {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPipelineStageFlags srcStages = VulkanPipelineStage(mLastUsage);
|
*srcStages |= VulkanPipelineStage(mLastUsage);
|
||||||
VkPipelineStageFlags dstStages = VulkanPipelineStage(usage);
|
*dstStages |= VulkanPipelineStage(usage);
|
||||||
|
|
||||||
VkBufferMemoryBarrier barrier;
|
VkBufferMemoryBarrier barrier;
|
||||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
|
@ -214,9 +233,7 @@ namespace dawn_native { namespace vulkan {
|
||||||
barrier.offset = 0;
|
barrier.offset = 0;
|
||||||
barrier.size = GetSize();
|
barrier.size = GetSize();
|
||||||
|
|
||||||
ToBackend(GetDevice())
|
bufferBarriers->push_back(barrier);
|
||||||
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
|
|
||||||
nullptr, 1, &barrier, 0, nullptr);
|
|
||||||
|
|
||||||
mLastUsage = usage;
|
mLastUsage = usage;
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,11 @@ namespace dawn_native { namespace vulkan {
|
||||||
// `commands`.
|
// `commands`.
|
||||||
// TODO(cwallez@chromium.org): coalesce barriers and do them early when possible.
|
// TODO(cwallez@chromium.org): coalesce barriers and do them early when possible.
|
||||||
void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage);
|
void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage);
|
||||||
|
void TransitionUsageNow(CommandRecordingContext* recordingContext,
|
||||||
|
wgpu::BufferUsage usage,
|
||||||
|
std::vector<VkBufferMemoryBarrier>* bufferBarriers,
|
||||||
|
VkPipelineStageFlags* srcStages,
|
||||||
|
VkPipelineStageFlags* dstStages);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
~Buffer() override;
|
~Buffer() override;
|
||||||
|
|
|
@ -376,12 +376,19 @@ namespace dawn_native { namespace vulkan {
|
||||||
VkCommandBuffer commands = recordingContext->commandBuffer;
|
VkCommandBuffer commands = recordingContext->commandBuffer;
|
||||||
|
|
||||||
// Records the necessary barriers for the resource usage pre-computed by the frontend
|
// Records the necessary barriers for the resource usage pre-computed by the frontend
|
||||||
auto TransitionForPass = [](CommandRecordingContext* recordingContext,
|
auto TransitionForPass = [](Device* device, CommandRecordingContext* recordingContext,
|
||||||
const PassResourceUsage& usages) {
|
const PassResourceUsage& usages) {
|
||||||
|
std::vector<VkBufferMemoryBarrier> bufferBarriers;
|
||||||
|
std::vector<VkImageMemoryBarrier> imageBarriers;
|
||||||
|
VkPipelineStageFlags srcStages = 0;
|
||||||
|
VkPipelineStageFlags dstStages = 0;
|
||||||
|
|
||||||
for (size_t i = 0; i < usages.buffers.size(); ++i) {
|
for (size_t i = 0; i < usages.buffers.size(); ++i) {
|
||||||
Buffer* buffer = ToBackend(usages.buffers[i]);
|
Buffer* buffer = ToBackend(usages.buffers[i]);
|
||||||
buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i]);
|
buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i],
|
||||||
|
&bufferBarriers, &srcStages, &dstStages);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < usages.textures.size(); ++i) {
|
for (size_t i = 0; i < usages.textures.size(); ++i) {
|
||||||
Texture* texture = ToBackend(usages.textures[i]);
|
Texture* texture = ToBackend(usages.textures[i]);
|
||||||
// Clear textures that are not output attachments. Output attachments will be
|
// Clear textures that are not output attachments. Output attachments will be
|
||||||
|
@ -393,9 +400,18 @@ namespace dawn_native { namespace vulkan {
|
||||||
texture->GetArrayLayers());
|
texture->GetArrayLayers());
|
||||||
}
|
}
|
||||||
texture->TransitionUsageForPass(recordingContext,
|
texture->TransitionUsageForPass(recordingContext,
|
||||||
usages.textureUsages[i].subresourceUsages);
|
usages.textureUsages[i].subresourceUsages,
|
||||||
|
&imageBarriers, &srcStages, &dstStages);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bufferBarriers.size() || imageBarriers.size()) {
|
||||||
|
device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages,
|
||||||
|
0, 0, nullptr, bufferBarriers.size(),
|
||||||
|
bufferBarriers.data(), imageBarriers.size(),
|
||||||
|
imageBarriers.data());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
|
const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
|
||||||
size_t nextPassNumber = 0;
|
size_t nextPassNumber = 0;
|
||||||
|
|
||||||
|
@ -562,7 +578,7 @@ namespace dawn_native { namespace vulkan {
|
||||||
case Command::BeginRenderPass: {
|
case Command::BeginRenderPass: {
|
||||||
BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();
|
BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();
|
||||||
|
|
||||||
TransitionForPass(recordingContext, passResourceUsages[nextPassNumber]);
|
TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
|
||||||
|
|
||||||
LazyClearRenderPassAttachments(cmd);
|
LazyClearRenderPassAttachments(cmd);
|
||||||
DAWN_TRY(RecordRenderPass(recordingContext, cmd));
|
DAWN_TRY(RecordRenderPass(recordingContext, cmd));
|
||||||
|
@ -574,7 +590,7 @@ namespace dawn_native { namespace vulkan {
|
||||||
case Command::BeginComputePass: {
|
case Command::BeginComputePass: {
|
||||||
mCommands.NextCommand<BeginComputePassCmd>();
|
mCommands.NextCommand<BeginComputePassCmd>();
|
||||||
|
|
||||||
TransitionForPass(recordingContext, passResourceUsages[nextPassNumber]);
|
TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
|
||||||
RecordComputePass(recordingContext);
|
RecordComputePass(recordingContext);
|
||||||
|
|
||||||
nextPassNumber++;
|
nextPassNumber++;
|
||||||
|
|
|
@ -670,34 +670,41 @@ namespace dawn_native { namespace vulkan {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Texture::TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
|
void Texture::TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
|
||||||
std::vector<VkImageMemoryBarrier>* barriers) {
|
std::vector<VkImageMemoryBarrier>* barriers,
|
||||||
|
size_t transitionBarrierStart) {
|
||||||
ASSERT(GetNumMipLevels() == 1 && GetArrayLayers() == 1);
|
ASSERT(GetNumMipLevels() == 1 && GetArrayLayers() == 1);
|
||||||
ASSERT(barriers->size() <= 1);
|
|
||||||
|
// transitionBarrierStart specify the index where barriers for current transition start in
|
||||||
|
// the vector. barriers->size() - transitionBarrierStart is the number of barriers that we
|
||||||
|
// have already added into the vector during current transition.
|
||||||
|
ASSERT(barriers->size() - transitionBarrierStart <= 1);
|
||||||
|
|
||||||
if (mExternalState == ExternalState::PendingAcquire) {
|
if (mExternalState == ExternalState::PendingAcquire) {
|
||||||
if (!barriers->size()) {
|
if (barriers->size() == transitionBarrierStart) {
|
||||||
barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
|
barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
|
||||||
wgpu::TextureUsage::None,
|
wgpu::TextureUsage::None,
|
||||||
wgpu::TextureUsage::None, 0, 0));
|
wgpu::TextureUsage::None, 0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transfer texture from external queue to graphics queue
|
// Transfer texture from external queue to graphics queue
|
||||||
(*barriers)[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
|
(*barriers)[transitionBarrierStart].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
|
||||||
(*barriers)[0].dstQueueFamilyIndex = ToBackend(GetDevice())->GetGraphicsQueueFamily();
|
(*barriers)[transitionBarrierStart].dstQueueFamilyIndex =
|
||||||
|
ToBackend(GetDevice())->GetGraphicsQueueFamily();
|
||||||
// Don't override oldLayout to leave it as VK_IMAGE_LAYOUT_UNDEFINED
|
// Don't override oldLayout to leave it as VK_IMAGE_LAYOUT_UNDEFINED
|
||||||
// TODO(http://crbug.com/dawn/200)
|
// TODO(http://crbug.com/dawn/200)
|
||||||
mExternalState = ExternalState::Acquired;
|
mExternalState = ExternalState::Acquired;
|
||||||
} else if (mExternalState == ExternalState::PendingRelease) {
|
} else if (mExternalState == ExternalState::PendingRelease) {
|
||||||
if (!barriers->size()) {
|
if (barriers->size() == transitionBarrierStart) {
|
||||||
barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
|
barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
|
||||||
wgpu::TextureUsage::None,
|
wgpu::TextureUsage::None,
|
||||||
wgpu::TextureUsage::None, 0, 0));
|
wgpu::TextureUsage::None, 0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transfer texture from graphics queue to external queue
|
// Transfer texture from graphics queue to external queue
|
||||||
(*barriers)[0].srcQueueFamilyIndex = ToBackend(GetDevice())->GetGraphicsQueueFamily();
|
(*barriers)[transitionBarrierStart].srcQueueFamilyIndex =
|
||||||
(*barriers)[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
|
ToBackend(GetDevice())->GetGraphicsQueueFamily();
|
||||||
(*barriers)[0].newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
(*barriers)[transitionBarrierStart].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR;
|
||||||
|
(*barriers)[transitionBarrierStart].newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
mExternalState = ExternalState::Released;
|
mExternalState = ExternalState::Released;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -714,8 +721,11 @@ namespace dawn_native { namespace vulkan {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Texture::TransitionUsageForPass(CommandRecordingContext* recordingContext,
|
void Texture::TransitionUsageForPass(CommandRecordingContext* recordingContext,
|
||||||
const std::vector<wgpu::TextureUsage>& subresourceUsages) {
|
const std::vector<wgpu::TextureUsage>& subresourceUsages,
|
||||||
std::vector<VkImageMemoryBarrier> barriers;
|
std::vector<VkImageMemoryBarrier>* imageBarriers,
|
||||||
|
VkPipelineStageFlags* srcStages,
|
||||||
|
VkPipelineStageFlags* dstStages) {
|
||||||
|
size_t transitionBarrierStart = imageBarriers->size();
|
||||||
const Format& format = GetFormat();
|
const Format& format = GetFormat();
|
||||||
|
|
||||||
wgpu::TextureUsage allUsages = wgpu::TextureUsage::None;
|
wgpu::TextureUsage allUsages = wgpu::TextureUsage::None;
|
||||||
|
@ -740,7 +750,7 @@ namespace dawn_native { namespace vulkan {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
barriers.push_back(
|
imageBarriers->push_back(
|
||||||
BuildMemoryBarrier(format, mHandle, mLastSubresourceUsages[index],
|
BuildMemoryBarrier(format, mHandle, mLastSubresourceUsages[index],
|
||||||
subresourceUsages[index], mipLevel, arrayLayer));
|
subresourceUsages[index], mipLevel, arrayLayer));
|
||||||
|
|
||||||
|
@ -751,14 +761,12 @@ namespace dawn_native { namespace vulkan {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mExternalState != ExternalState::InternalOnly) {
|
if (mExternalState != ExternalState::InternalOnly) {
|
||||||
TweakTransitionForExternalUsage(recordingContext, &barriers);
|
TweakTransitionForExternalUsage(recordingContext, imageBarriers,
|
||||||
|
transitionBarrierStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format);
|
*srcStages |= VulkanPipelineStage(allLastUsages, format);
|
||||||
VkPipelineStageFlags dstStages = VulkanPipelineStage(allUsages, format);
|
*dstStages |= VulkanPipelineStage(allUsages, format);
|
||||||
ToBackend(GetDevice())
|
|
||||||
->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
|
|
||||||
nullptr, 0, nullptr, barriers.size(), barriers.data());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Texture::TransitionUsageNow(CommandRecordingContext* recordingContext,
|
void Texture::TransitionUsageNow(CommandRecordingContext* recordingContext,
|
||||||
|
@ -796,7 +804,7 @@ namespace dawn_native { namespace vulkan {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mExternalState != ExternalState::InternalOnly) {
|
if (mExternalState != ExternalState::InternalOnly) {
|
||||||
TweakTransitionForExternalUsage(recordingContext, &barriers);
|
TweakTransitionForExternalUsage(recordingContext, &barriers, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format);
|
VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format);
|
||||||
|
|
|
@ -73,7 +73,10 @@ namespace dawn_native { namespace vulkan {
|
||||||
uint32_t baseArrayLayer,
|
uint32_t baseArrayLayer,
|
||||||
uint32_t layerCount);
|
uint32_t layerCount);
|
||||||
void TransitionUsageForPass(CommandRecordingContext* recordingContext,
|
void TransitionUsageForPass(CommandRecordingContext* recordingContext,
|
||||||
const std::vector<wgpu::TextureUsage>& subresourceUsages);
|
const std::vector<wgpu::TextureUsage>& subresourceUsages,
|
||||||
|
std::vector<VkImageMemoryBarrier>* imageBarriers,
|
||||||
|
VkPipelineStageFlags* srcStages,
|
||||||
|
VkPipelineStageFlags* dstStages);
|
||||||
|
|
||||||
void EnsureSubresourceContentInitialized(CommandRecordingContext* recordingContext,
|
void EnsureSubresourceContentInitialized(CommandRecordingContext* recordingContext,
|
||||||
uint32_t baseMipLevel,
|
uint32_t baseMipLevel,
|
||||||
|
@ -107,7 +110,8 @@ namespace dawn_native { namespace vulkan {
|
||||||
TextureBase::ClearValue);
|
TextureBase::ClearValue);
|
||||||
|
|
||||||
void TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
|
void TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
|
||||||
std::vector<VkImageMemoryBarrier>* barriers);
|
std::vector<VkImageMemoryBarrier>* barriers,
|
||||||
|
size_t transitionBarrierStart);
|
||||||
|
|
||||||
VkImage mHandle = VK_NULL_HANDLE;
|
VkImage mHandle = VK_NULL_HANDLE;
|
||||||
ResourceMemoryAllocation mMemoryAllocation;
|
ResourceMemoryAllocation mMemoryAllocation;
|
||||||
|
|
Loading…
Reference in New Issue