Transition bind group resource states before dispatch in compute pass

This patch fixes a crash issue in both D3D12 and Vulkan backends. Previously on D3D12 and Vulkan before a compute pass we transitioned the states of all the resources used in the pass, and before each dispatch call we only checked if the states of the storage buffers, read-only storage textures and write-only storage textures need to be transitioned. This behavior causes two issues: 1. In a compute pass a buffer or texture can be used as both read-only and writable usages in different dispatch calls (e.g. as storage buffer in the first dispatch, and as the uniform buffer in the next dispatch), while this is invalid state combination on D3D12 and isn't allowed by D3D12 validation layer. 2. In the above case, the state of the buffer is not transitioned into UNIFORM, which does not match the required state in the next dispatch. This patch fixes this issue by transitioning all the states in the current bind group before each dispatch() instead of the beginning of the compute pass. BUG=dawn:522 TEST=dawn_end2end_tests Change-Id: Ibeb6c41dc493ee1068b43bd89ed5a15f2331ef75 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/27942 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
2025-12-10 05:57:51 +00:00 · 2020-09-09 01:14:38 +00:00
parent 7b7e098b11
commit 12e97ed6a7
7 changed files with 414 additions and 154 deletions
--- a/src/dawn_native/BUILD.gn
+++ b/src/dawn_native/BUILD.gn
@@ -153,7 +153,6 @@ source_set("dawn_native_sources") {
    "BackendConnection.h",
    "BindGroup.cpp",
    "BindGroup.h",
-    "BindGroupAndStorageBarrierTracker.h",
    "BindGroupLayout.cpp",
    "BindGroupLayout.h",
    "BindGroupTracker.h",
--- a/src/dawn_native/BindGroupAndStorageBarrierTracker.h
+++ b/src/dawn_native/BindGroupAndStorageBarrierTracker.h
@@ -1,112 +0,0 @@
-// Copyright 2019 The Dawn Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DAWNNATIVE_BINDGROUPANDSTORAGEBARRIERTRACKER_H_
-#define DAWNNATIVE_BINDGROUPANDSTORAGEBARRIERTRACKER_H_
-
-#include "common/ityp_bitset.h"
-#include "common/ityp_stack_vec.h"
-#include "dawn_native/BindGroup.h"
-#include "dawn_native/BindGroupTracker.h"
-#include "dawn_native/Buffer.h"
-#include "dawn_native/Texture.h"
-
-namespace dawn_native {
-
-    // Extends BindGroupTrackerBase to also keep track of resources that need a usage transition.
-    template <bool CanInheritBindGroups, typename DynamicOffset>
-    class BindGroupAndStorageBarrierTrackerBase
-        : public BindGroupTrackerBase<CanInheritBindGroups, DynamicOffset> {
-        using Base = BindGroupTrackerBase<CanInheritBindGroups, DynamicOffset>;
-
-      public:
-        BindGroupAndStorageBarrierTrackerBase() = default;
-
-        void OnSetBindGroup(BindGroupIndex index,
-                            BindGroupBase* bindGroup,
-                            uint32_t dynamicOffsetCount,
-                            uint32_t* dynamicOffsets) {
-            ASSERT(index < kMaxBindGroupsTyped);
-
-            if (this->mBindGroups[index] != bindGroup) {
-                const BindGroupLayoutBase* layout = bindGroup->GetLayout();
-
-                mBindings[index].resize(layout->GetBindingCount());
-                mBindingTypes[index].resize(layout->GetBindingCount());
-                mBindingsNeedingBarrier[index] = {};
-
-                for (BindingIndex bindingIndex{0}; bindingIndex < layout->GetBindingCount();
-                     ++bindingIndex) {
-                    const BindingInfo& bindingInfo = layout->GetBindingInfo(bindingIndex);
-
-                    if ((bindingInfo.visibility & wgpu::ShaderStage::Compute) == 0) {
-                        continue;
-                    }
-
-                    mBindingTypes[index][bindingIndex] = bindingInfo.type;
-                    switch (bindingInfo.type) {
-                        case wgpu::BindingType::UniformBuffer:
-                        case wgpu::BindingType::ReadonlyStorageBuffer:
-                        case wgpu::BindingType::Sampler:
-                        case wgpu::BindingType::ComparisonSampler:
-                        case wgpu::BindingType::SampledTexture:
-                            // Don't require barriers.
-                            break;
-
-                        case wgpu::BindingType::StorageBuffer:
-                            mBindingsNeedingBarrier[index].set(bindingIndex);
-                            mBindings[index][bindingIndex] = static_cast<ObjectBase*>(
-                                bindGroup->GetBindingAsBufferBinding(bindingIndex).buffer);
-                            break;
-
-                        // Read-only and write-only storage textures must use general layout
-                        // because load and store operations on storage images can only be done on
-                        // the images in VK_IMAGE_LAYOUT_GENERAL layout.
-                        case wgpu::BindingType::ReadonlyStorageTexture:
-                        case wgpu::BindingType::WriteonlyStorageTexture:
-                            mBindingsNeedingBarrier[index].set(bindingIndex);
-                            mBindings[index][bindingIndex] = static_cast<ObjectBase*>(
-                                bindGroup->GetBindingAsTextureView(bindingIndex));
-                            break;
-
-                        case wgpu::BindingType::StorageTexture:
-                            // Not implemented.
-                        default:
-                            UNREACHABLE();
-                            break;
-                    }
-                }
-            }
-
-            Base::OnSetBindGroup(index, bindGroup, dynamicOffsetCount, dynamicOffsets);
-        }
-
-      protected:
-        ityp::array<BindGroupIndex,
-                    ityp::bitset<BindingIndex, kMaxBindingsPerPipelineLayout>,
-                    kMaxBindGroups>
-            mBindingsNeedingBarrier = {};
-        ityp::array<BindGroupIndex,
-                    ityp::stack_vec<BindingIndex, wgpu::BindingType, kMaxOptimalBindingsPerGroup>,
-                    kMaxBindGroups>
-            mBindingTypes = {};
-        ityp::array<BindGroupIndex,
-                    ityp::stack_vec<BindingIndex, ObjectBase*, kMaxOptimalBindingsPerGroup>,
-                    kMaxBindGroups>
-            mBindings = {};
-    };
-
-}  // namespace dawn_native
-
-#endif  // DAWNNATIVE_BINDGROUPANDSTORAGEBARRIERTRACKER_H_
--- a/src/dawn_native/CMakeLists.txt
+++ b/src/dawn_native/CMakeLists.txt
@@ -31,7 +31,6 @@ target_sources(dawn_native PRIVATE
    "BackendConnection.h"
    "BindGroup.cpp"
    "BindGroup.h"
-    "BindGroupAndStorageBarrierTracker.h"
    "BindGroupLayout.cpp"
    "BindGroupLayout.h"
    "BindGroupTracker.h"
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -15,7 +15,7 @@
 #include "dawn_native/d3d12/CommandBufferD3D12.h"

 #include "common/Assert.h"
-#include "dawn_native/BindGroupAndStorageBarrierTracker.h"
+#include "dawn_native/BindGroupTracker.h"
 #include "dawn_native/CommandEncoder.h"
 #include "dawn_native/CommandValidation.h"
 #include "dawn_native/Commands.h"
@@ -143,12 +143,12 @@ namespace dawn_native { namespace d3d12 {
        }
    }  // anonymous namespace

-    class BindGroupStateTracker : public BindGroupAndStorageBarrierTrackerBase<false, uint64_t> {
-        using Base = BindGroupAndStorageBarrierTrackerBase;
+    class BindGroupStateTracker : public BindGroupTrackerBase<false, uint64_t> {
+        using Base = BindGroupTrackerBase;

      public:
        BindGroupStateTracker(Device* device)
-            : BindGroupAndStorageBarrierTrackerBase(),
+            : BindGroupTrackerBase(),
              mDevice(device),
              mViewAllocator(device->GetViewShaderVisibleDescriptorAllocator()),
              mSamplerAllocator(device->GetSamplerShaderVisibleDescriptorAllocator()) {
@@ -225,12 +225,14 @@ namespace dawn_native { namespace d3d12 {
            if (mInCompute) {
                std::vector<D3D12_RESOURCE_BARRIER> barriers;
                for (BindGroupIndex index : IterateBitSet(mBindGroupLayoutsMask)) {
-                    for (BindingIndex binding : IterateBitSet(mBindingsNeedingBarrier[index])) {
-                        wgpu::BindingType bindingType = mBindingTypes[index][binding];
-                        switch (bindingType) {
+                    BindGroupLayoutBase* layout = mBindGroups[index]->GetLayout();
+                    for (BindingIndex binding{0}; binding < layout->GetBindingCount(); ++binding) {
+                        switch (layout->GetBindingInfo(binding).type) {
                            case wgpu::BindingType::StorageBuffer: {
                                D3D12_RESOURCE_BARRIER barrier;
-                                if (static_cast<Buffer*>(mBindings[index][binding])
+                                if (ToBackend(mBindGroups[index]
+                                                  ->GetBindingAsBufferBinding(binding)
+                                                  .buffer)
                                        ->TrackUsageAndGetResourceBarrier(
                                            commandContext, &barrier, wgpu::BufferUsage::Storage)) {
                                    barriers.push_back(barrier);
@@ -240,7 +242,7 @@ namespace dawn_native { namespace d3d12 {

                            case wgpu::BindingType::ReadonlyStorageTexture: {
                                TextureViewBase* view =
-                                    static_cast<TextureViewBase*>(mBindings[index][binding]);
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
                                ToBackend(view->GetTexture())
                                    ->TransitionUsageAndGetResourceBarrier(
                                        commandContext, &barriers, kReadonlyStorageTexture,
@@ -249,22 +251,52 @@ namespace dawn_native { namespace d3d12 {
                            }
                            case wgpu::BindingType::WriteonlyStorageTexture: {
                                TextureViewBase* view =
-                                    static_cast<TextureViewBase*>(mBindings[index][binding]);
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
                                ToBackend(view->GetTexture())
                                    ->TransitionUsageAndGetResourceBarrier(
                                        commandContext, &barriers, wgpu::TextureUsage::Storage,
                                        view->GetSubresourceRange());
                                break;
                            }
-                            case wgpu::BindingType::StorageTexture:
-                                // Not implemented.
+                            case wgpu::BindingType::ReadonlyStorageBuffer: {
+                                D3D12_RESOURCE_BARRIER barrier;
+                                if (ToBackend(mBindGroups[index]
+                                                  ->GetBindingAsBufferBinding(binding)
+                                                  .buffer)
+                                        ->TrackUsageAndGetResourceBarrier(commandContext, &barrier,
+                                                                          kReadOnlyStorageBuffer)) {
+                                    barriers.push_back(barrier);
+                                }
+                                break;
+                            }
+                            case wgpu::BindingType::SampledTexture: {
+                                TextureViewBase* view =
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
+                                ToBackend(view->GetTexture())
+                                    ->TransitionUsageAndGetResourceBarrier(
+                                        commandContext, &barriers, wgpu::TextureUsage::Sampled,
+                                        view->GetSubresourceRange());
+                                break;
+                            }
+                            case wgpu::BindingType::UniformBuffer: {
+                                D3D12_RESOURCE_BARRIER barrier;
+                                if (ToBackend(mBindGroups[index]
+                                                  ->GetBindingAsBufferBinding(binding)
+                                                  .buffer)
+                                        ->TrackUsageAndGetResourceBarrier(
+                                            commandContext, &barrier, wgpu::BufferUsage::Uniform)) {
+                                    barriers.push_back(barrier);
+                                }
+                                break;
+                            }

-                            case wgpu::BindingType::UniformBuffer:
-                            case wgpu::BindingType::ReadonlyStorageBuffer:
                            case wgpu::BindingType::Sampler:
                            case wgpu::BindingType::ComparisonSampler:
-                            case wgpu::BindingType::SampledTexture:
                                // Don't require barriers.
+                                break;
+
+                            case wgpu::BindingType::StorageTexture:
+                                // Not implemented.

                            default:
                                UNREACHABLE();
@@ -572,7 +604,7 @@ namespace dawn_native { namespace d3d12 {
        bindingTracker.SetID3D12DescriptorHeaps(commandList);

        // Records the necessary barriers for the resource usage pre-computed by the frontend
-        auto PrepareResourcesForSubmission = [](CommandRecordingContext* commandContext,
+        auto PrepareResourcesForRenderPass = [](CommandRecordingContext* commandContext,
                                                const PassResourceUsage& usages) -> bool {
            std::vector<D3D12_RESOURCE_BARRIER> barriers;

@@ -595,6 +627,8 @@ namespace dawn_native { namespace d3d12 {
                bufferUsages |= usages.bufferUsages[i];
            }

+            wgpu::TextureUsage textureUsages = wgpu::TextureUsage::None;
+
            for (size_t i = 0; i < usages.textures.size(); ++i) {
                Texture* texture = ToBackend(usages.textures[i]);
                // Clear textures that are not output attachments. Output attachments will be
@@ -604,11 +638,7 @@ namespace dawn_native { namespace d3d12 {
                    texture->EnsureSubresourceContentInitialized(commandContext,
                                                                 texture->GetAllSubresources());
                }
-            }

-            wgpu::TextureUsage textureUsages = wgpu::TextureUsage::None;
-
-            for (size_t i = 0; i < usages.textures.size(); ++i) {
                ToBackend(usages.textures[i])
                    ->TrackUsageAndGetResourceBarrierForPass(commandContext, &barriers,
                                                             usages.textureUsages[i]);
@@ -623,6 +653,25 @@ namespace dawn_native { namespace d3d12 {
                    textureUsages & wgpu::TextureUsage::Storage);
        };

+        // TODO(jiawei.shao@intel.com): move the resource lazy clearing inside the barrier tracking
+        // for compute passes.
+        auto PrepareResourcesForComputePass = [](CommandRecordingContext* commandContext,
+                                                 const PassResourceUsage& usages) -> void {
+            for (size_t i = 0; i < usages.buffers.size(); ++i) {
+                Buffer* buffer = ToBackend(usages.buffers[i]);
+
+                // TODO(jiawei.shao@intel.com): clear storage buffers with
+                // ClearUnorderedAccessView*().
+                buffer->GetDevice()->ConsumedError(buffer->EnsureDataInitialized(commandContext));
+            }
+
+            for (size_t i = 0; i < usages.textures.size(); ++i) {
+                Texture* texture = ToBackend(usages.textures[i]);
+                texture->EnsureSubresourceContentInitialized(commandContext,
+                                                             texture->GetAllSubresources());
+            }
+        };
+
        const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
        uint32_t nextPassNumber = 0;

@@ -632,7 +681,7 @@ namespace dawn_native { namespace d3d12 {
                case Command::BeginComputePass: {
                    mCommands.NextCommand<BeginComputePassCmd>();

-                    PrepareResourcesForSubmission(commandContext,
+                    PrepareResourcesForComputePass(commandContext,
                                                   passResourceUsages[nextPassNumber]);
                    bindingTracker.SetInComputePass(true);
                    DAWN_TRY(RecordComputePass(commandContext, &bindingTracker));
@@ -645,7 +694,7 @@ namespace dawn_native { namespace d3d12 {
                    BeginRenderPassCmd* beginRenderPassCmd =
                        mCommands.NextCommand<BeginRenderPassCmd>();

-                    const bool passHasUAV = PrepareResourcesForSubmission(
+                    const bool passHasUAV = PrepareResourcesForRenderPass(
                        commandContext, passResourceUsages[nextPassNumber]);
                    bindingTracker.SetInComputePass(false);

@@ -892,6 +941,7 @@ namespace dawn_native { namespace d3d12 {

                    DAWN_TRY(bindingTracker->Apply(commandContext));
                    Buffer* buffer = ToBackend(dispatch->indirectBuffer.Get());
+                    buffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::Indirect);
                    ComPtr<ID3D12CommandSignature> signature =
                        ToBackend(GetDevice())->GetDispatchIndirectSignature();
                    commandList->ExecuteIndirect(signature.Get(), 1, buffer->GetD3D12Resource(),
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -14,7 +14,7 @@

 #include "dawn_native/vulkan/CommandBufferVk.h"

-#include "dawn_native/BindGroupAndStorageBarrierTracker.h"
+#include "dawn_native/BindGroupTracker.h"
 #include "dawn_native/CommandEncoder.h"
 #include "dawn_native/CommandValidation.h"
 #include "dawn_native/Commands.h"
@@ -134,8 +134,7 @@ namespace dawn_native { namespace vulkan {
            }
        };

-        class ComputeDescriptorSetTracker
-            : public BindGroupAndStorageBarrierTrackerBase<true, uint32_t> {
+        class ComputeDescriptorSetTracker : public BindGroupTrackerBase<true, uint32_t> {
          public:
            ComputeDescriptorSetTracker() = default;

@@ -147,12 +146,16 @@ namespace dawn_native { namespace vulkan {
                                    mDirtyBindGroupsObjectChangedOrIsDynamic, mBindGroups,
                                    mDynamicOffsetCounts, mDynamicOffsets);

+                // TODO(jiawei.shao@intel.com): combine the following barriers in one
+                // vkCmdPipelineBarrier() call.
                for (BindGroupIndex index : IterateBitSet(mBindGroupLayoutsMask)) {
-                    for (BindingIndex bindingIndex :
-                         IterateBitSet(mBindingsNeedingBarrier[index])) {
-                        switch (mBindingTypes[index][bindingIndex]) {
+                    BindGroupLayoutBase* layout = mBindGroups[index]->GetLayout();
+                    for (BindingIndex binding{0}; binding < layout->GetBindingCount(); ++binding) {
+                        switch (layout->GetBindingInfo(binding).type) {
                            case wgpu::BindingType::StorageBuffer:
-                                static_cast<Buffer*>(mBindings[index][bindingIndex])
+                            case wgpu::BindingType::ReadonlyStorageBuffer:
+                                ToBackend(
+                                    mBindGroups[index]->GetBindingAsBufferBinding(binding).buffer)
                                    ->TransitionUsageNow(recordingContext,
                                                         wgpu::BufferUsage::Storage);
                                break;
@@ -160,22 +163,36 @@ namespace dawn_native { namespace vulkan {
                            case wgpu::BindingType::ReadonlyStorageTexture:
                            case wgpu::BindingType::WriteonlyStorageTexture: {
                                TextureViewBase* view =
-                                    static_cast<TextureViewBase*>(mBindings[index][bindingIndex]);
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
                                ToBackend(view->GetTexture())
                                    ->TransitionUsageNow(recordingContext,
                                                         wgpu::TextureUsage::Storage,
                                                         view->GetSubresourceRange());
                                break;
                            }
-                            case wgpu::BindingType::StorageTexture:
-                                // Not implemented.
-
                            case wgpu::BindingType::UniformBuffer:
-                            case wgpu::BindingType::ReadonlyStorageBuffer:
+                                ToBackend(
+                                    mBindGroups[index]->GetBindingAsBufferBinding(binding).buffer)
+                                    ->TransitionUsageNow(recordingContext,
+                                                         wgpu::BufferUsage::Uniform);
+                                break;
+                            case wgpu::BindingType::SampledTexture: {
+                                TextureViewBase* view =
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
+                                ToBackend(view->GetTexture())
+                                    ->TransitionUsageNow(recordingContext,
+                                                         wgpu::TextureUsage::Sampled,
+                                                         view->GetSubresourceRange());
+                                break;
+                            }
+
                            case wgpu::BindingType::Sampler:
                            case wgpu::BindingType::ComparisonSampler:
-                            case wgpu::BindingType::SampledTexture:
                                // Don't require barriers.
+                                break;
+
+                            case wgpu::BindingType::StorageTexture:
+                                // Not implemented.

                            default:
                                UNREACHABLE();
@@ -431,7 +448,8 @@ namespace dawn_native { namespace vulkan {
        VkCommandBuffer commands = recordingContext->commandBuffer;

        // Records the necessary barriers for the resource usage pre-computed by the frontend
-        auto TransitionForPass = [](Device* device, CommandRecordingContext* recordingContext,
+        auto PrepareResourcesForRenderPass = [](Device* device,
+                                                CommandRecordingContext* recordingContext,
                                                const PassResourceUsage& usages) {
            std::vector<VkBufferMemoryBarrier> bufferBarriers;
            std::vector<VkImageMemoryBarrier> imageBarriers;
@@ -466,6 +484,23 @@ namespace dawn_native { namespace vulkan {
            }
        };

+        // TODO(jiawei.shao@intel.com): move the resource lazy clearing inside the barrier tracking
+        // for compute passes.
+        auto PrepareResourcesForComputePass = [](Device* device,
+                                                 CommandRecordingContext* recordingContext,
+                                                 const PassResourceUsage& usages) {
+            for (size_t i = 0; i < usages.buffers.size(); ++i) {
+                Buffer* buffer = ToBackend(usages.buffers[i]);
+                buffer->EnsureDataInitialized(recordingContext);
+            }
+
+            for (size_t i = 0; i < usages.textures.size(); ++i) {
+                Texture* texture = ToBackend(usages.textures[i]);
+                texture->EnsureSubresourceContentInitialized(recordingContext,
+                                                             texture->GetAllSubresources());
+            }
+        };
+
        const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
        size_t nextPassNumber = 0;

@@ -644,7 +679,8 @@ namespace dawn_native { namespace vulkan {
                case Command::BeginRenderPass: {
                    BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();

-                    TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
+                    PrepareResourcesForRenderPass(device, recordingContext,
+                                                  passResourceUsages[nextPassNumber]);

                    LazyClearRenderPassAttachments(cmd);
                    DAWN_TRY(RecordRenderPass(recordingContext, cmd));
@@ -656,7 +692,8 @@ namespace dawn_native { namespace vulkan {
                case Command::BeginComputePass: {
                    mCommands.NextCommand<BeginComputePassCmd>();

-                    TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
+                    PrepareResourcesForComputePass(device, recordingContext,
+                                                   passResourceUsages[nextPassNumber]);
                    DAWN_TRY(RecordComputePass(recordingContext));

                    nextPassNumber++;
@@ -705,6 +742,8 @@ namespace dawn_native { namespace vulkan {

                case Command::DispatchIndirect: {
                    DispatchIndirectCmd* dispatch = mCommands.NextCommand<DispatchIndirectCmd>();
+                    ToBackend(dispatch->indirectBuffer)
+                        ->TransitionUsageNow(recordingContext, wgpu::BufferUsage::Indirect);
                    VkBuffer indirectBuffer = ToBackend(dispatch->indirectBuffer)->GetHandle();

                    descriptorSets.Apply(device, recordingContext, VK_PIPELINE_BIND_POINT_COMPUTE);
--- a/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
+++ b/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
@@ -127,6 +127,70 @@ TEST_P(ComputeStorageBufferBarrierTests, AddPingPong) {
    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
 }

+// Test that multiple dispatches to increment values by ping-ponging between storage buffers and
+// read-only storage buffers are synchronized in one compute pass.
+TEST_P(ComputeStorageBufferBarrierTests, StorageAndReadonlyStoragePingPongInOnePass) {
+    std::vector<uint32_t> data(kNumValues, 0);
+    std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
+    std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
+
+    uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
+
+    wgpu::Buffer bufferA = utils::CreateBufferFromData(
+        device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
+
+    wgpu::Buffer bufferB = utils::CreateBufferFromData(
+        device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        #define kNumValues 100
+        layout(std430, set = 0, binding = 0) readonly buffer Src { uint src[kNumValues]; };
+        layout(std430, set = 0, binding = 1) buffer Dst { uint dst[kNumValues]; };
+        void main() {
+            uint index = gl_GlobalInvocationID.x;
+            dst[index] = src[index] + 0x1234;
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferA, 0, bufferSize},
+                                                          {1, bufferB, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferB, 0, bufferSize},
+                                                          {1, bufferA, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    for (uint32_t i = 0; i < kIterations / 2; ++i) {
+        pass.SetBindGroup(0, bindGroups[0]);
+        pass.Dispatch(kNumValues);
+        pass.SetBindGroup(0, bindGroups[1]);
+        pass.Dispatch(kNumValues);
+    }
+    pass.EndPass();
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
+}
+
 // Test that Storage to Uniform buffer transitions work and synchronize correctly
 // by ping-ponging between Storage/Uniform usage in sequential compute passes.
 TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) {
@@ -192,6 +256,70 @@ TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) {
    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
 }

+// Test that Storage to Uniform buffer transitions work and synchronize correctly
+// by ping-ponging between Storage/Uniform usage in one compute pass.
+TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPongInOnePass) {
+    std::vector<uint32_t> data(kNumValues, 0);
+    std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
+    std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
+
+    uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
+
+    wgpu::Buffer bufferA = utils::CreateBufferFromData(
+        device, data.data(), bufferSize,
+        wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
+
+    wgpu::Buffer bufferB = utils::CreateBufferFromData(
+        device, data.data(), bufferSize,
+        wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        #define kNumValues 100
+        layout(std140, set = 0, binding = 0) uniform Src { uvec4 src[kNumValues / 4]; };
+        layout(std430, set = 0, binding = 1) buffer Dst { uvec4 dst[kNumValues / 4]; };
+        void main() {
+            uint index = gl_GlobalInvocationID.x;
+            dst[index] = src[index] + 0x1234;
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferA, 0, bufferSize},
+                                                          {1, bufferB, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferB, 0, bufferSize},
+                                                          {1, bufferA, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    for (uint32_t i = 0, b = 0; i < kIterations; ++i, b = 1 - b) {
+        pass.SetPipeline(pipeline);
+        pass.SetBindGroup(0, bindGroups[b]);
+        pass.Dispatch(kNumValues / 4);
+    }
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
+}
+
 DAWN_INSTANTIATE_TEST(ComputeStorageBufferBarrierTests,
                      D3D12Backend(),
                      MetalBackend(),
--- a/src/tests/end2end/StorageTextureTests.cpp
+++ b/src/tests/end2end/StorageTextureTests.cpp
@@ -944,6 +944,163 @@ TEST_P(StorageTextureTests, Writeonly2DArrayStorageTexture) {
    CheckOutputStorageTexture(writeonlyStorageTexture, kTextureFormat, kArrayLayerCount);
 }

+// Test that multiple dispatches to increment values by ping-ponging between a read-only storage
+// texture and a write-only storage texture are synchronized in one pass.
+TEST_P(StorageTextureTests, ReadonlyAndWriteonlyStorageTexturePingPong) {
+    constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
+    wgpu::Texture storageTexture1 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u, 1u);
+    wgpu::Texture storageTexture2 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u, 1u);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        layout(set = 0, binding = 0, r32ui) uniform readonly uimage2D Src;
+        layout(set = 0, binding = 1, r32ui) uniform writeonly uimage2D Dst;
+        void main() {
+            uvec4 srcValue = imageLoad(Src, ivec2(0, 0));
+            ++srcValue.x;
+            imageStore(Dst, ivec2(0, 0), srcValue);
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    // In bindGroupA storageTexture1 is bound as read-only storage texture and storageTexture2 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, storageTexture1.CreateView()},
+                                                          {1, storageTexture2.CreateView()},
+                                                      });
+
+    // In bindGroupA storageTexture2 is bound as read-only storage texture and storageTexture1 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, storageTexture2.CreateView()},
+                                                          {1, storageTexture1.CreateView()},
+                                                      });
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    // After the first dispatch the value in storageTexture2 should be 1u.
+    pass.SetBindGroup(0, bindGroupA);
+    pass.Dispatch(1);
+
+    // After the second dispatch the value in storageTexture1 should be 2u;
+    pass.SetBindGroup(0, bindGroupB);
+    pass.Dispatch(1);
+
+    pass.EndPass();
+
+    wgpu::BufferDescriptor bufferDescriptor;
+    bufferDescriptor.size = sizeof(uint32_t);
+    bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer resultBuffer = device.CreateBuffer(&bufferDescriptor);
+
+    wgpu::TextureCopyView textureCopyView;
+    textureCopyView.texture = storageTexture1;
+
+    wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(resultBuffer, 0, 256, 1);
+    wgpu::Extent3D extent3D = {1, 1, 1};
+    encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &extent3D);
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    constexpr uint32_t kFinalPixelValueInTexture1 = 2u;
+    EXPECT_BUFFER_U32_EQ(kFinalPixelValueInTexture1, resultBuffer, 0);
+}
+
+// Test that multiple dispatches to increment values by ping-ponging between a sampled texture and
+// a write-only storage texture are synchronized in one pass.
+TEST_P(StorageTextureTests, SampledAndWriteonlyStorageTexturePingPong) {
+    constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
+    wgpu::Texture storageTexture1 = CreateTexture(
+        kTextureFormat,
+        wgpu::TextureUsage::Sampled | wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u,
+        1u);
+    wgpu::Texture storageTexture2 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Sampled | wgpu::TextureUsage::Storage, 1u, 1u);
+    wgpu::SamplerDescriptor samplerDesc;
+    wgpu::Sampler sampler = device.CreateSampler(&samplerDesc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        layout(set = 0, binding = 0) uniform sampler mySampler;
+        layout(set = 0, binding = 1) uniform utexture2D Src;
+        layout(set = 0, binding = 2, r32ui) uniform writeonly uimage2D Dst;
+        void main() {
+            uvec4 srcValue = texelFetch(usampler2D(Src, mySampler), ivec2(0, 0), 0);
+            ++srcValue.x;
+            imageStore(Dst, ivec2(0, 0), srcValue);
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    // In bindGroupA storageTexture1 is bound as read-only storage texture and storageTexture2 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, sampler},
+                                                          {1, storageTexture1.CreateView()},
+                                                          {2, storageTexture2.CreateView()},
+                                                      });
+
+    // In bindGroupA storageTexture2 is bound as read-only storage texture and storageTexture1 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, sampler},
+                                                          {1, storageTexture2.CreateView()},
+                                                          {2, storageTexture1.CreateView()},
+                                                      });
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    // After the first dispatch the value in storageTexture2 should be 1u.
+    pass.SetBindGroup(0, bindGroupA);
+    pass.Dispatch(1);
+
+    // After the second dispatch the value in storageTexture1 should be 2u;
+    pass.SetBindGroup(0, bindGroupB);
+    pass.Dispatch(1);
+
+    pass.EndPass();
+
+    wgpu::BufferDescriptor bufferDescriptor;
+    bufferDescriptor.size = sizeof(uint32_t);
+    bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer resultBuffer = device.CreateBuffer(&bufferDescriptor);
+
+    wgpu::TextureCopyView textureCopyView;
+    textureCopyView.texture = storageTexture1;
+
+    wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(resultBuffer, 0, 256, 1);
+    wgpu::Extent3D extent3D = {1, 1, 1};
+    encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &extent3D);
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    constexpr uint32_t kFinalPixelValueInTexture1 = 2u;
+    EXPECT_BUFFER_U32_EQ(kFinalPixelValueInTexture1, resultBuffer, 0);
+}
+
 DAWN_INSTANTIATE_TEST(StorageTextureTests,
                      D3D12Backend(),
                      MetalBackend(),