Add missing lock to APIBeginRender/ComputePass.

Add tests to verify multithreading behaviors of encoding render/compute passes. Bug: dawn:1662 Change-Id: I9bc6a0dd5d94b53b59e7e49a4611d4d55cc36e60 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/126980 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Quyen Le <lehoangquyen@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com>
2025-12-10 14:08:04 +00:00 · 2023-04-12 21:07:31 +00:00
parent 3dba94c1d4
commit 3e7a114a6e
5 changed files with 204 additions and 8 deletions
--- a/src/dawn/native/CommandEncoder.cpp
+++ b/src/dawn/native/CommandEncoder.cpp
@@ -771,11 +771,15 @@ void CommandEncoder::TrackQueryAvailability(QuerySetBase* querySet, uint32_t que
 // Implementation of the API's command recording methods
 ComputePassEncoder* CommandEncoder::APIBeginComputePass(const ComputePassDescriptor* descriptor) {
    // This function will create new object, need to lock the Device.
    auto deviceLock(GetDevice()->GetScopedLock());
    return BeginComputePass(descriptor).Detach();
 }
 Ref<ComputePassEncoder> CommandEncoder::BeginComputePass(const ComputePassDescriptor* descriptor) {
    DeviceBase* device = GetDevice();
    ASSERT(device->IsLockedByCurrentThreadIfNeeded());
    bool success = mEncodingContext.TryEncode(
        this,
@@ -830,11 +834,15 @@ Ref<ComputePassEncoder> CommandEncoder::BeginComputePass(const ComputePassDescri
 }
 RenderPassEncoder* CommandEncoder::APIBeginRenderPass(const RenderPassDescriptor* descriptor) {
    // This function will create new object, need to lock the Device.
    auto deviceLock(GetDevice()->GetScopedLock());
    return BeginRenderPass(descriptor).Detach();
 }
 Ref<RenderPassEncoder> CommandEncoder::BeginRenderPass(const RenderPassDescriptor* descriptor) {
    DeviceBase* device = GetDevice();
    ASSERT(device->IsLockedByCurrentThreadIfNeeded());
    RenderPassResourceUsageTracker usageTracker;
@@ -1047,13 +1055,14 @@ ResultOrError<std::function<void()>> CommandEncoder::ApplyRenderPassWorkarounds(
                descriptor.dimension = wgpu::TextureDimension::e2D;
                descriptor.mipLevelCount = 1;
-                // We are creating new resources. Need to lock the Device.
+                // We are creating new resources. Device must already be locked via
                // APIBeginRenderPass -> ApplyRenderPassWorkarounds.
                // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
                // Command Submit time, so the locking would be removed from here at that point.
                Ref<TextureBase> temporaryResolveTexture;
                Ref<TextureViewBase> temporaryResolveView;
                {
-                    auto deviceLock(GetDevice()->GetScopedLock());
+                    ASSERT(device->IsLockedByCurrentThreadIfNeeded());
                    DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor));
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -92,10 +92,6 @@ struct DeviceBase::DeprecationWarnings {
 };
 namespace {
 bool IsMutexLockedByCurrentThreadIfNeeded(const Ref<Mutex>& mutex) {
    return mutex == nullptr || mutex->IsLockedByCurrentThread();
 }
 struct LoggingCallbackTask : CallbackTask {
  public:
    LoggingCallbackTask() = delete;
@@ -873,7 +869,7 @@ Ref<RenderPipelineBase> DeviceBase::GetCachedRenderPipeline(
 Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
    Ref<ComputePipelineBase> computePipeline) {
-    ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex));
+    ASSERT(IsLockedByCurrentThreadIfNeeded());
    auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get());
    if (inserted) {
        computePipeline->SetIsCachedReference();
@@ -885,7 +881,7 @@ Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
 Ref<RenderPipelineBase> DeviceBase::AddOrGetCachedRenderPipeline(
    Ref<RenderPipelineBase> renderPipeline) {
-    ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex));
+    ASSERT(IsLockedByCurrentThreadIfNeeded());
    auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get());
    if (inserted) {
        renderPipeline->SetIsCachedReference();
@@ -2048,6 +2044,10 @@ Mutex::AutoLock DeviceBase::GetScopedLock() {
    return Mutex::AutoLock(mMutex.Get());
 }
 bool DeviceBase::IsLockedByCurrentThreadIfNeeded() const {
    return mMutex == nullptr || mMutex->IsLockedByCurrentThread();
 }
 IgnoreLazyClearCountScope::IgnoreLazyClearCountScope(DeviceBase* device)
    : mDevice(device), mLazyClearCountForTesting(device->mLazyClearCountForTesting) {}
--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@@ -431,6 +431,12 @@ class DeviceBase : public RefCountedWithExternalCount {
    // AutoLock. It would crash if such thing happens.
    [[nodiscard]] Mutex::AutoLock GetScopedLock();
    // This method returns true if Feature::ImplicitDeviceSynchronization is turned on and the
    // device is locked by current thread. This method is only enabled when DAWN_ENABLE_ASSERTS is
    // turned on. Thus it should only be wrapped inside ASSERT() macro. i.e.
    // ASSERT(device.IsLockedByCurrentThread())
    bool IsLockedByCurrentThreadIfNeeded() const;
    // In the 'Normal' mode, currently recorded commands in the backend normally will be actually
    // submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed
    // as late as possible, for example, until the client has explictly issued a submission.
--- a/src/dawn/tests/BUILD.gn
+++ b/src/dawn/tests/BUILD.gn
@@ -531,6 +531,7 @@ source_set("end2end_tests_sources") {
    "end2end/MemoryAllocationStressTests.cpp",
    "end2end/MultisampledRenderingTests.cpp",
    "end2end/MultisampledSamplingTests.cpp",
    "end2end/MultithreadTests.cpp",
    "end2end/NonzeroBufferCreationTests.cpp",
    "end2end/NonzeroTextureCreationTests.cpp",
    "end2end/ObjectCachingTests.cpp",
--- a/src/dawn/tests/end2end/MultithreadTests.cpp
+++ b/src/dawn/tests/end2end/MultithreadTests.cpp
@@ -0,0 +1,180 @@
 // Copyright 2023 The Dawn Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <functional>
 #include <limits>
 #include <memory>
 #include <thread>
 #include <vector>
 #include "dawn/common/Constants.h"
 #include "dawn/common/Math.h"
 #include "dawn/tests/DawnTest.h"
 #include "dawn/utils/TestUtils.h"
 #include "dawn/utils/TextureUtils.h"
 #include "dawn/utils/WGPUHelpers.h"
 class MultithreadTests : public DawnTest {
  protected:
    std::vector<wgpu::FeatureName> GetRequiredFeatures() override {
        std::vector<wgpu::FeatureName> features;
        // TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet.
        if (!UsesWire()) {
            features.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
        }
        return features;
    }
    void SetUp() override {
        DawnTest::SetUp();
        // TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet.
        DAWN_TEST_UNSUPPORTED_IF(UsesWire());
        // TODO(crbug.com/dawn/1679): OpenGL/D3D11 backend doesn't support thread safe API yet.
        DAWN_TEST_UNSUPPORTED_IF(IsOpenGL() || IsOpenGLES() || IsD3D11());
    }
    wgpu::Buffer CreateBuffer(uint32_t size, wgpu::BufferUsage usage) {
        wgpu::BufferDescriptor descriptor;
        descriptor.size = size;
        descriptor.usage = usage;
        return device.CreateBuffer(&descriptor);
    }
    wgpu::Texture CreateTexture(uint32_t width,
                                uint32_t height,
                                wgpu::TextureFormat format,
                                wgpu::TextureUsage usage,
                                uint32_t mipLevelCount = 1,
                                uint32_t sampleCount = 1) {
        wgpu::TextureDescriptor texDescriptor = {};
        texDescriptor.size = {width, height, 1};
        texDescriptor.format = format;
        texDescriptor.usage = usage;
        texDescriptor.mipLevelCount = mipLevelCount;
        texDescriptor.sampleCount = sampleCount;
        return device.CreateTexture(&texDescriptor);
    }
    void RunInParallel(uint32_t numThreads, const std::function<void(uint32_t)>& workerFunc) {
        std::vector<std::unique_ptr<std::thread>> threads(numThreads);
        for (uint32_t i = 0; i < threads.size(); ++i) {
            threads[i] = std::make_unique<std::thread>([i, workerFunc] { workerFunc(i); });
        }
        for (auto& thread : threads) {
            thread->join();
        }
    }
 };
 class MultithreadEncodingTests : public MultithreadTests {};
 // Test that encoding render passes in parallel should work
 TEST_P(MultithreadEncodingTests, RenderPassEncodersInParallel) {
    constexpr uint32_t kRTSize = 16;
    constexpr uint32_t kNumThreads = 10;
    wgpu::Texture msaaRenderTarget =
        CreateTexture(kRTSize, kRTSize, wgpu::TextureFormat::RGBA8Unorm,
                      wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc,
                      /*mipLevelCount=*/1, /*sampleCount=*/4);
    wgpu::TextureView msaaRenderTargetView = msaaRenderTarget.CreateView();
    wgpu::Texture resolveTarget =
        CreateTexture(kRTSize, kRTSize, wgpu::TextureFormat::RGBA8Unorm,
                      wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc);
    wgpu::TextureView resolveTargetView = resolveTarget.CreateView();
    std::vector<wgpu::CommandBuffer> commandBuffers(kNumThreads);
    RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) {
        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
        // Clear the renderTarget to red.
        utils::ComboRenderPassDescriptor renderPass({msaaRenderTargetView});
        renderPass.cColorAttachments[0].resolveTarget = resolveTargetView;
        renderPass.cColorAttachments[0].clearValue = {1.0f, 0.0f, 0.0f, 1.0f};
        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
        pass.End();
        commandBuffers[index] = encoder.Finish();
    });
    // Verify that the command buffers executed correctly.
    for (auto& commandBuffer : commandBuffers) {
        queue.Submit(1, &commandBuffer);
        EXPECT_TEXTURE_EQ(utils::RGBA8::kRed, resolveTarget, {0, 0});
        EXPECT_TEXTURE_EQ(utils::RGBA8::kRed, resolveTarget, {kRTSize - 1, kRTSize - 1});
    }
 }
 // Test that encoding compute passes in parallel should work
 TEST_P(MultithreadEncodingTests, ComputePassEncodersInParallel) {
    constexpr uint32_t kNumThreads = 10;
    constexpr uint32_t kExpected = 0xFFFFFFFFu;
    wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
            @group(0) @binding(0) var<storage, read_write> output : u32;
            @compute @workgroup_size(1, 1, 1)
            fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3u) {
                output = 0xFFFFFFFFu;
            })");
    wgpu::ComputePipelineDescriptor csDesc;
    csDesc.compute.module = module;
    csDesc.compute.entryPoint = "main";
    auto pipeline = device.CreateComputePipeline(&csDesc);
    wgpu::Buffer dstBuffer =
        CreateBuffer(sizeof(uint32_t), wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc |
                                           wgpu::BufferUsage::CopyDst);
    wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
                                                     {
                                                         {0, dstBuffer, 0, sizeof(uint32_t)},
                                                     });
    std::vector<wgpu::CommandBuffer> commandBuffers(kNumThreads);
    RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) {
        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
        wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
        pass.SetPipeline(pipeline);
        pass.SetBindGroup(0, bindGroup);
        pass.DispatchWorkgroups(1, 1, 1);
        pass.End();
        commandBuffers[index] = encoder.Finish();
    });
    // Verify that the command buffers executed correctly.
    for (auto& commandBuffer : commandBuffers) {
        constexpr uint32_t kSentinelData = 0;
        queue.WriteBuffer(dstBuffer, 0, &kSentinelData, sizeof(kSentinelData));
        queue.Submit(1, &commandBuffer);
        EXPECT_BUFFER_U32_EQ(kExpected, dstBuffer, 0);
    }
 }
 DAWN_INSTANTIATE_TEST(MultithreadEncodingTests,
                      D3D11Backend(),
                      D3D12Backend(),
                      MetalBackend(),
                      OpenGLBackend(),
                      OpenGLESBackend(),
                      VulkanBackend());