Transition bind group resource states before dispatch in compute pass

This patch fixes a crash issue in both D3D12 and Vulkan backends. Previously on D3D12 and Vulkan before a compute pass we transitioned the states of all the resources used in the pass, and before each dispatch call we only checked if the states of the storage buffers, read-only storage textures and write-only storage textures need to be transitioned. This behavior causes two issues: 1. In a compute pass a buffer or texture can be used as both read-only and writable usages in different dispatch calls (e.g. as storage buffer in the first dispatch, and as the uniform buffer in the next dispatch), while this is invalid state combination on D3D12 and isn't allowed by D3D12 validation layer. 2. In the above case, the state of the buffer is not transitioned into UNIFORM, which does not match the required state in the next dispatch. This patch fixes this issue by transitioning all the states in the current bind group before each dispatch() instead of the beginning of the compute pass. BUG=dawn:522 TEST=dawn_end2end_tests Change-Id: Ibeb6c41dc493ee1068b43bd89ed5a15f2331ef75 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/27942 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
2025-12-14 23:56:16 +00:00 · 2020-09-09 01:14:38 +00:00
parent 7b7e098b11
commit 12e97ed6a7
7 changed files with 414 additions and 154 deletions
--- a/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
+++ b/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
@@ -127,6 +127,70 @@ TEST_P(ComputeStorageBufferBarrierTests, AddPingPong) {
    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
 }

+// Test that multiple dispatches to increment values by ping-ponging between storage buffers and
+// read-only storage buffers are synchronized in one compute pass.
+TEST_P(ComputeStorageBufferBarrierTests, StorageAndReadonlyStoragePingPongInOnePass) {
+    std::vector<uint32_t> data(kNumValues, 0);
+    std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
+    std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
+
+    uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
+
+    wgpu::Buffer bufferA = utils::CreateBufferFromData(
+        device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
+
+    wgpu::Buffer bufferB = utils::CreateBufferFromData(
+        device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        #define kNumValues 100
+        layout(std430, set = 0, binding = 0) readonly buffer Src { uint src[kNumValues]; };
+        layout(std430, set = 0, binding = 1) buffer Dst { uint dst[kNumValues]; };
+        void main() {
+            uint index = gl_GlobalInvocationID.x;
+            dst[index] = src[index] + 0x1234;
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferA, 0, bufferSize},
+                                                          {1, bufferB, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferB, 0, bufferSize},
+                                                          {1, bufferA, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    for (uint32_t i = 0; i < kIterations / 2; ++i) {
+        pass.SetBindGroup(0, bindGroups[0]);
+        pass.Dispatch(kNumValues);
+        pass.SetBindGroup(0, bindGroups[1]);
+        pass.Dispatch(kNumValues);
+    }
+    pass.EndPass();
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
+}
+
 // Test that Storage to Uniform buffer transitions work and synchronize correctly
 // by ping-ponging between Storage/Uniform usage in sequential compute passes.
 TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) {
@@ -192,6 +256,70 @@ TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) {
    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
 }

+// Test that Storage to Uniform buffer transitions work and synchronize correctly
+// by ping-ponging between Storage/Uniform usage in one compute pass.
+TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPongInOnePass) {
+    std::vector<uint32_t> data(kNumValues, 0);
+    std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
+    std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
+
+    uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
+
+    wgpu::Buffer bufferA = utils::CreateBufferFromData(
+        device, data.data(), bufferSize,
+        wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
+
+    wgpu::Buffer bufferB = utils::CreateBufferFromData(
+        device, data.data(), bufferSize,
+        wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        #define kNumValues 100
+        layout(std140, set = 0, binding = 0) uniform Src { uvec4 src[kNumValues / 4]; };
+        layout(std430, set = 0, binding = 1) buffer Dst { uvec4 dst[kNumValues / 4]; };
+        void main() {
+            uint index = gl_GlobalInvocationID.x;
+            dst[index] = src[index] + 0x1234;
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferA, 0, bufferSize},
+                                                          {1, bufferB, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferB, 0, bufferSize},
+                                                          {1, bufferA, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    for (uint32_t i = 0, b = 0; i < kIterations; ++i, b = 1 - b) {
+        pass.SetPipeline(pipeline);
+        pass.SetBindGroup(0, bindGroups[b]);
+        pass.Dispatch(kNumValues / 4);
+    }
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
+}
+
 DAWN_INSTANTIATE_TEST(ComputeStorageBufferBarrierTests,
                      D3D12Backend(),
                      MetalBackend(),
--- a/src/tests/end2end/StorageTextureTests.cpp
+++ b/src/tests/end2end/StorageTextureTests.cpp
@@ -944,6 +944,163 @@ TEST_P(StorageTextureTests, Writeonly2DArrayStorageTexture) {
    CheckOutputStorageTexture(writeonlyStorageTexture, kTextureFormat, kArrayLayerCount);
 }

+// Test that multiple dispatches to increment values by ping-ponging between a read-only storage
+// texture and a write-only storage texture are synchronized in one pass.
+TEST_P(StorageTextureTests, ReadonlyAndWriteonlyStorageTexturePingPong) {
+    constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
+    wgpu::Texture storageTexture1 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u, 1u);
+    wgpu::Texture storageTexture2 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u, 1u);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        layout(set = 0, binding = 0, r32ui) uniform readonly uimage2D Src;
+        layout(set = 0, binding = 1, r32ui) uniform writeonly uimage2D Dst;
+        void main() {
+            uvec4 srcValue = imageLoad(Src, ivec2(0, 0));
+            ++srcValue.x;
+            imageStore(Dst, ivec2(0, 0), srcValue);
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    // In bindGroupA storageTexture1 is bound as read-only storage texture and storageTexture2 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, storageTexture1.CreateView()},
+                                                          {1, storageTexture2.CreateView()},
+                                                      });
+
+    // In bindGroupA storageTexture2 is bound as read-only storage texture and storageTexture1 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, storageTexture2.CreateView()},
+                                                          {1, storageTexture1.CreateView()},
+                                                      });
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    // After the first dispatch the value in storageTexture2 should be 1u.
+    pass.SetBindGroup(0, bindGroupA);
+    pass.Dispatch(1);
+
+    // After the second dispatch the value in storageTexture1 should be 2u;
+    pass.SetBindGroup(0, bindGroupB);
+    pass.Dispatch(1);
+
+    pass.EndPass();
+
+    wgpu::BufferDescriptor bufferDescriptor;
+    bufferDescriptor.size = sizeof(uint32_t);
+    bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer resultBuffer = device.CreateBuffer(&bufferDescriptor);
+
+    wgpu::TextureCopyView textureCopyView;
+    textureCopyView.texture = storageTexture1;
+
+    wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(resultBuffer, 0, 256, 1);
+    wgpu::Extent3D extent3D = {1, 1, 1};
+    encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &extent3D);
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    constexpr uint32_t kFinalPixelValueInTexture1 = 2u;
+    EXPECT_BUFFER_U32_EQ(kFinalPixelValueInTexture1, resultBuffer, 0);
+}
+
+// Test that multiple dispatches to increment values by ping-ponging between a sampled texture and
+// a write-only storage texture are synchronized in one pass.
+TEST_P(StorageTextureTests, SampledAndWriteonlyStorageTexturePingPong) {
+    constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
+    wgpu::Texture storageTexture1 = CreateTexture(
+        kTextureFormat,
+        wgpu::TextureUsage::Sampled | wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u,
+        1u);
+    wgpu::Texture storageTexture2 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Sampled | wgpu::TextureUsage::Storage, 1u, 1u);
+    wgpu::SamplerDescriptor samplerDesc;
+    wgpu::Sampler sampler = device.CreateSampler(&samplerDesc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        layout(set = 0, binding = 0) uniform sampler mySampler;
+        layout(set = 0, binding = 1) uniform utexture2D Src;
+        layout(set = 0, binding = 2, r32ui) uniform writeonly uimage2D Dst;
+        void main() {
+            uvec4 srcValue = texelFetch(usampler2D(Src, mySampler), ivec2(0, 0), 0);
+            ++srcValue.x;
+            imageStore(Dst, ivec2(0, 0), srcValue);
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    // In bindGroupA storageTexture1 is bound as read-only storage texture and storageTexture2 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, sampler},
+                                                          {1, storageTexture1.CreateView()},
+                                                          {2, storageTexture2.CreateView()},
+                                                      });
+
+    // In bindGroupA storageTexture2 is bound as read-only storage texture and storageTexture1 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, sampler},
+                                                          {1, storageTexture2.CreateView()},
+                                                          {2, storageTexture1.CreateView()},
+                                                      });
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    // After the first dispatch the value in storageTexture2 should be 1u.
+    pass.SetBindGroup(0, bindGroupA);
+    pass.Dispatch(1);
+
+    // After the second dispatch the value in storageTexture1 should be 2u;
+    pass.SetBindGroup(0, bindGroupB);
+    pass.Dispatch(1);
+
+    pass.EndPass();
+
+    wgpu::BufferDescriptor bufferDescriptor;
+    bufferDescriptor.size = sizeof(uint32_t);
+    bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer resultBuffer = device.CreateBuffer(&bufferDescriptor);
+
+    wgpu::TextureCopyView textureCopyView;
+    textureCopyView.texture = storageTexture1;
+
+    wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(resultBuffer, 0, 256, 1);
+    wgpu::Extent3D extent3D = {1, 1, 1};
+    encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &extent3D);
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    constexpr uint32_t kFinalPixelValueInTexture1 = 2u;
+    EXPECT_BUFFER_U32_EQ(kFinalPixelValueInTexture1, resultBuffer, 0);
+}
+
 DAWN_INSTANTIATE_TEST(StorageTextureTests,
                      D3D12Backend(),
                      MetalBackend(),