Lock ApiObjectBase::APIRealease()

Normal behavior of ApiObjectBase's APIRelease() which only locks the device when last ref dropped is not thread safe if the object is cached as raw pointers by the device. Example of cached objects: bind group layout, pipeline, sampler, shader module. The following scenario could happen: - thread A: - shaderModuleA.APIRealease() - shaderModuleA.refCount.Decrement() == true (ref count has reached zero) - going to call shaderModuleA.LockAndDeleteThis(). - thread B: - device.CreateShaderModule(). - lock() - device.GetOrCreateShaderModule() - shaderModuleA is in the cache, so return it. - unlock() - thread A: - starting to call shaderModuleA.LockAndDeleteThis() - lock() - erase shaderModuleA from the cache. - delete shaderModuleA. - unlock() This CL fixes this bug by locking the entire APIRelease() method until we find a better solution. Bug: dawn:1769 Change-Id: I1161af66fc24f3a7bafee22b9614b783e0dc4503 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/128441 Commit-Queue: Quyen Le <lehoangquyen@chromium.org> Reviewed-by: Corentin Wallez <cwallez@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: Austin Eng <enga@chromium.org>
2023-04-24 17:33:37 +00:00 · 2023-04-24 17:33:37 +00:00 · c5c2fce3ed
parent 0c33c143dc
commit c5c2fce3ed
3 changed files with 119 additions and 0 deletions
--- a/src/dawn/native/ObjectBase.cpp
+++ b/src/dawn/native/ObjectBase.cpp
@ -86,6 +86,15 @@ void ApiObjectBase::APISetLabel(const char* label) {
    SetLabelImpl();
 }

+void ApiObjectBase::APIRelease() {
+    // TODO(crbug.com/dawn/1769): We have to lock the entire APIRelease() method.
+    // This is because some objects are cached as raw pointers by the device. And the cache lookup
+    // would have been racing with the ref count's decrement here if there had not been any locking
+    // in place. This is temporary solution until we improve the cache's implementation.
+    auto deviceLock(GetDevice()->GetScopedLockSafeForDelete());
+    Release();
+}
+
 const std::string& ApiObjectBase::GetLabel() const {
    return mLabel;
 }
--- a/src/dawn/native/ObjectBase.h
+++ b/src/dawn/native/ObjectBase.h
@ -94,6 +94,7 @@ class ApiObjectBase : public ObjectBase, public LinkNode<ApiObjectBase> {

    // Dawn API
    void APISetLabel(const char* label);
+    void APIRelease();

  protected:
    // Overriding of the RefCounted's DeleteThis function ensures that instances of objects
--- a/src/dawn/tests/end2end/MultithreadTests.cpp
+++ b/src/dawn/tests/end2end/MultithreadTests.cpp
@ -91,6 +91,108 @@ class MultithreadTests : public DawnTest {
    dawn::Mutex mutex;
 };

+class MultithreadCachingTests : public MultithreadTests {
+  protected:
+    wgpu::ShaderModule CreateComputeShaderModule() const {
+        return utils::CreateShaderModule(device, R"(
+            struct SSBO {
+                value : u32
+            }
+            @group(0) @binding(0) var<storage, read_write> ssbo : SSBO;
+
+            @compute @workgroup_size(1) fn main() {
+                ssbo.value = 1;
+            })");
+    }
+
+    wgpu::BindGroupLayout CreateComputeBindGroupLayout() const {
+        return utils::MakeBindGroupLayout(
+            device, {
+                        {0, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage},
+                    });
+    }
+};
+
+// Test that creating a same shader module (which will return the cached shader module) and release
+// it on multiple threads won't race.
+TEST_P(MultithreadCachingTests, RefAndReleaseCachedShaderModulesInParallel) {
+    RunInParallel(100, [this](uint32_t) {
+        wgpu::ShaderModule csModule = CreateComputeShaderModule();
+        EXPECT_NE(nullptr, csModule.Get());
+    });
+}
+
+// Test that creating a same compute pipeline (which will return the cached pipeline) and release it
+// on multiple threads won't race.
+TEST_P(MultithreadCachingTests, RefAndReleaseCachedComputePipelinesInParallel) {
+    wgpu::ShaderModule csModule = CreateComputeShaderModule();
+    wgpu::BindGroupLayout bglayout = CreateComputeBindGroupLayout();
+    wgpu::PipelineLayout pipelineLayout = utils::MakePipelineLayout(device, {bglayout});
+
+    wgpu::ComputePipelineDescriptor csDesc;
+    csDesc.compute.module = csModule;
+    csDesc.compute.entryPoint = "main";
+    csDesc.layout = pipelineLayout;
+
+    RunInParallel(100, [&, this](uint32_t) {
+        wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);
+        EXPECT_NE(nullptr, pipeline.Get());
+    });
+}
+
+// Test that creating a same bind group layout (which will return the cached layout) and
+// release it on multiple threads won't race.
+TEST_P(MultithreadCachingTests, RefAndReleaseCachedBindGroupLayoutsInParallel) {
+    RunInParallel(100, [&, this](uint32_t) {
+        wgpu::BindGroupLayout layout = CreateComputeBindGroupLayout();
+        EXPECT_NE(nullptr, layout.Get());
+    });
+}
+
+// Test that creating a same pipeline layout (which will return the cached layout) and
+// release it on multiple threads won't race.
+TEST_P(MultithreadCachingTests, RefAndReleaseCachedPipelineLayoutsInParallel) {
+    wgpu::BindGroupLayout bglayout = CreateComputeBindGroupLayout();
+
+    RunInParallel(100, [&, this](uint32_t) {
+        wgpu::PipelineLayout pipelineLayout = utils::MakePipelineLayout(device, {bglayout});
+        EXPECT_NE(nullptr, pipelineLayout.Get());
+    });
+}
+
+// Test that creating a same render pipeline (which will return the cached pipeline) and release it
+// on multiple threads won't race.
+TEST_P(MultithreadCachingTests, RefAndReleaseCachedRenderPipelinesInParallel) {
+    utils::ComboRenderPipelineDescriptor renderPipelineDescriptor;
+    wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"(
+        @vertex fn main() -> @builtin(position) vec4f {
+            return vec4f(0.0, 0.0, 0.0, 1.0);
+        })");
+    wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"(
+        @fragment fn main() -> @location(0) vec4f {
+            return vec4f(0.0, 1.0, 0.0, 1.0);
+        })");
+    renderPipelineDescriptor.vertex.module = vsModule;
+    renderPipelineDescriptor.cFragment.module = fsModule;
+    renderPipelineDescriptor.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
+    renderPipelineDescriptor.primitive.topology = wgpu::PrimitiveTopology::PointList;
+
+    RunInParallel(100, [&, this](uint32_t) {
+        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDescriptor);
+        EXPECT_NE(nullptr, pipeline.Get());
+    });
+}
+
+// Test that creating a same sampler pipeline (which will return the cached sampler) and release it
+// on multiple threads won't race.
+TEST_P(MultithreadCachingTests, RefAndReleaseCachedSamplersInParallel) {
+    wgpu::SamplerDescriptor desc = {};
+    RunInParallel(100, [&, this](uint32_t) {
+        wgpu::Sampler sampler = device.CreateSampler(&desc);
+        EXPECT_NE(nullptr, sampler.Get());
+    });
+}
+
 class MultithreadEncodingTests : public MultithreadTests {};

 // Test that encoding render passes in parallel should work
@ -388,6 +490,13 @@ TEST_P(MultithreadTimestampQueryTests, ResolveQuerySets_InParallel) {

 }  // namespace

+DAWN_INSTANTIATE_TEST(MultithreadCachingTests,
+                      D3D12Backend(),
+                      MetalBackend(),
+                      OpenGLBackend(),
+                      OpenGLESBackend(),
+                      VulkanBackend());
+
 DAWN_INSTANTIATE_TEST(MultithreadEncodingTests,
                      D3D12Backend(),
                      MetalBackend(),