Revert "Disable frontend cache when implicit device sync is on."

This reverts commit 8cc6205bf7cfba2816052a0cc5d81debd8d52028. Reason for revert: Graphite actually reuses the bind groups between draw calls using different pipelines and this change prevents it from happening. Original change's description: > Disable frontend cache when implicit device sync is on. > > Normal behavior of ApiObjectBase's APIRelease() which only locks the > device when last ref dropped is not thread safe if the object is cached > as raw pointers by the device. Example of cached objects: bind group > layout, pipeline, sampler, shader module. > > The following scenario could happen: > - thread A: > - shaderModuleA.APIRealease() > - shaderModuleA.refCount.Decrement() == true (ref count has reached zero) > - going to call shaderModuleA.LockAndDeleteThis(). > - thread B: > - device.CreateShaderModule(). > - lock() > - device.GetOrCreateShaderModule() > - shaderModuleA is in the cache, so return it. > - unlock() > - thread A: > - starting to call shaderModuleA.LockAndDeleteThis() > - lock() > - erase shaderModuleA from the cache. > - delete shaderModuleA. > - unlock() > > This CL disables caching when ImplicitDeviceSynchronization is turned on > until we find a better solution. > > Bug: dawn:1769 > Change-Id: Ideb2a717ece0a40e18bd1c2bef00817262bd25da > Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127900 > Commit-Queue: Quyen Le <lehoangquyen@chromium.org> > Reviewed-by: Austin Eng <enga@chromium.org> > Kokoro: Kokoro <noreply+kokoro@google.com> TBR=cwallez@chromium.org,enga@chromium.org,noreply+kokoro@google.com,dawn-scoped@luci-project-accounts.iam.gserviceaccount.com,lehoangquyen@chromium.org Change-Id: Ib13bba8005402d06963865fae919388a91e718f0 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: dawn:1769 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/128440 Reviewed-by: Austin Eng <enga@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Quyen Le <lehoangquyen@chromium.org>
2025-07-30 17:05:48 +00:00 · 2023-04-21 03:10:12 +00:00 · 2023-04-21 03:10:12 +00:00 · 87f51237b4
commit 87f51237b4
parent cf54bb8c2a
3 changed files with 8 additions and 182 deletions
--- a/docs/dawn/features/implicit_device_sync.md
+++ b/docs/dawn/features/implicit_device_sync.md
@ -1,31 +0,0 @@
-# Implicit Device Synchronization
-
-`implicit-device-sync` is an experimental feature that offers multithreading support for `dawn_native`.
-
-Additional functionality:
- - `wgpu::Device` and most of its child objects are safe to be used on multiple threads, including:
-   - `wgpu::Queue`.
-   - `wgpu::BindGroup`.
-   - `wgpu::BindGroupLayout`.
-   - `wgpu::Buffer`. See Notes.
-   - `wgpu::Texture`.
-   - `wgpu::TextureView`.
-   - `wgpu::ComputePipeline`.
-   - `wgpu::RenderPipeline`.
-   - `wgpu::PipelineLayout`.
-   - `wgpu::Sampler`.
-   - `wgpu::ShaderModule`.
-   - `wgpu::SwapChain`.
- - These objects are *not* safe to be used concurrently:
-   - `wgpu:CommandEncoder`.
-   - `wgpu:ComputePassEncoder`.
-   - `wgpu:RenderPassEncoder`.
-   - `wgpu:RenderBundleEncoder`.
-   - Except that the creation, referencing, releasing and destruction of these objects are guaranteed to be thread safe.
-
-
-Notes:
- - This feature is experimental, meaning currently it has some limitations:
-   - For `wgpu::Buffer` to be safe on multiple threads. Proper manual synchronization must be done by users to ensure that the buffer is not currently being mapped and being used by any render/compute pass encoder at the same time on different threads.
-   - Enabling this feature will disable the compatibility between `wgpu::BindGroupLayout`. That means the two different `wgpu::BindGroupLayout` objects created with equivalent `wgpu::BindGroupLayoutDescriptor` won't be considered "group-equivalent" anymore. You can only use a `wgpu::BindGroup` with a `wgpu::RenderPipeline`/`wgpu::ComputePipeline` if they are both created with the same `wgpu::BindGroupLayout`.
-     - Consider using `wgpu::RenderPipeline::GetBindGroupLayout()`/`wgpu::ComputePipeline::GetBindGroupLayout()` when creating a `wgpu::BindGroup`.
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@ -818,14 +818,9 @@ ResultOrError<Ref<BindGroupLayoutBase>> DeviceBase::GetOrCreateBindGroupLayout(
        result = *iter;
    } else {
        DAWN_TRY_ASSIGN(result, CreateBindGroupLayoutImpl(descriptor, pipelineCompatibilityToken));
+        result->SetIsCachedReference();
        result->SetContentHash(blueprintHash);
-        // TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache
-        // would have been racing with the object's ref counting since it only stores raw pointers
-        // to the objects.
-        if (!HasFeature(Feature::ImplicitDeviceSynchronization)) {
-            result->SetIsCachedReference();
-            mCaches->bindGroupLayouts.insert(result.Get());
-        }
+        mCaches->bindGroupLayouts.insert(result.Get());
    }

    return std::move(result);
@ -875,13 +870,6 @@ Ref<RenderPipelineBase> DeviceBase::GetCachedRenderPipeline(
 Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
    Ref<ComputePipelineBase> computePipeline) {
    ASSERT(IsLockedByCurrentThreadIfNeeded());
-    // TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache would
-    // have been racing with the object's ref counting since it only stores raw pointers to the
-    // objects.
-    if (HasFeature(Feature::ImplicitDeviceSynchronization)) {
-        return computePipeline;
-    }
-
    auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get());
    if (inserted) {
        computePipeline->SetIsCachedReference();
@ -894,13 +882,6 @@ Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
 Ref<RenderPipelineBase> DeviceBase::AddOrGetCachedRenderPipeline(
    Ref<RenderPipelineBase> renderPipeline) {
    ASSERT(IsLockedByCurrentThreadIfNeeded());
-    // TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache would
-    // have been racing with the object's ref counting since it only stores raw pointers to the
-    // objects.
-    if (HasFeature(Feature::ImplicitDeviceSynchronization)) {
-        return renderPipeline;
-    }
-
    auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get());
    if (inserted) {
        renderPipeline->SetIsCachedReference();
@ -958,14 +939,9 @@ ResultOrError<Ref<PipelineLayoutBase>> DeviceBase::GetOrCreatePipelineLayout(
        result = *iter;
    } else {
        DAWN_TRY_ASSIGN(result, CreatePipelineLayoutImpl(descriptor));
+        result->SetIsCachedReference();
        result->SetContentHash(blueprintHash);
-        // TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache
-        // would have been racing with the object's ref counting since it only stores raw pointers
-        // to the objects.
-        if (!HasFeature(Feature::ImplicitDeviceSynchronization)) {
-            result->SetIsCachedReference();
-            mCaches->pipelineLayouts.insert(result.Get());
-        }
+        mCaches->pipelineLayouts.insert(result.Get());
    }

    return std::move(result);
@ -996,14 +972,9 @@ ResultOrError<Ref<SamplerBase>> DeviceBase::GetOrCreateSampler(
        result = *iter;
    } else {
        DAWN_TRY_ASSIGN(result, CreateSamplerImpl(descriptor));
+        result->SetIsCachedReference();
        result->SetContentHash(blueprintHash);
-        // TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache
-        // would have been racing with the object's ref counting since it only stores raw pointers
-        // to the objects.
-        if (!HasFeature(Feature::ImplicitDeviceSynchronization)) {
-            result->SetIsCachedReference();
-            mCaches->samplers.insert(result.Get());
-        }
+        mCaches->samplers.insert(result.Get());
    }

    return std::move(result);
@ -1041,14 +1012,9 @@ ResultOrError<Ref<ShaderModuleBase>> DeviceBase::GetOrCreateShaderModule(
        }
        DAWN_TRY_ASSIGN(result,
                        CreateShaderModuleImpl(descriptor, parseResult, compilationMessages));
+        result->SetIsCachedReference();
        result->SetContentHash(blueprintHash);
-        // TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache
-        // would have been racing with the object's ref counting since it only stores raw pointers
-        // to the objects.
-        if (!HasFeature(Feature::ImplicitDeviceSynchronization)) {
-            result->SetIsCachedReference();
-            mCaches->shaderModules.insert(result.Get());
-        }
+        mCaches->shaderModules.insert(result.Get());
    }

    return std::move(result);
--- a/src/dawn/tests/end2end/MultithreadTests.cpp
+++ b/src/dawn/tests/end2end/MultithreadTests.cpp
@ -91,108 +91,6 @@ class MultithreadTests : public DawnTest {
    dawn::Mutex mutex;
 };

-class MultithreadCachingTests : public MultithreadTests {
-  protected:
-    wgpu::ShaderModule CreateComputeShaderModule() const {
-        return utils::CreateShaderModule(device, R"(
-            struct SSBO {
-                value : u32
-            }
-            @group(0) @binding(0) var<storage, read_write> ssbo : SSBO;
-
-            @compute @workgroup_size(1) fn main() {
-                ssbo.value = 1;
-            })");
-    }
-
-    wgpu::BindGroupLayout CreateComputeBindGroupLayout() const {
-        return utils::MakeBindGroupLayout(
-            device, {
-                        {0, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage},
-                    });
-    }
-};
-
-// Test that creating a same shader module (which will return the cached shader module) and release
-// it on multiple threads won't race.
-TEST_P(MultithreadCachingTests, RefAndReleaseCachedShaderModulesInParallel) {
-    RunInParallel(100, [this](uint32_t) {
-        wgpu::ShaderModule csModule = CreateComputeShaderModule();
-        EXPECT_NE(nullptr, csModule.Get());
-    });
-}
-
-// Test that creating a same compute pipeline (which will return the cached pipeline) and release it
-// on multiple threads won't race.
-TEST_P(MultithreadCachingTests, RefAndReleaseCachedComputePipelinesInParallel) {
-    wgpu::ShaderModule csModule = CreateComputeShaderModule();
-    wgpu::BindGroupLayout bglayout = CreateComputeBindGroupLayout();
-    wgpu::PipelineLayout pipelineLayout = utils::MakePipelineLayout(device, {bglayout});
-
-    wgpu::ComputePipelineDescriptor csDesc;
-    csDesc.compute.module = csModule;
-    csDesc.compute.entryPoint = "main";
-    csDesc.layout = pipelineLayout;
-
-    RunInParallel(100, [&, this](uint32_t) {
-        wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);
-        EXPECT_NE(nullptr, pipeline.Get());
-    });
-}
-
-// Test that creating a same bind group layout (which will return the cached layout) and
-// release it on multiple threads won't race.
-TEST_P(MultithreadCachingTests, RefAndReleaseCachedBindGroupLayoutsInParallel) {
-    RunInParallel(100, [&, this](uint32_t) {
-        wgpu::BindGroupLayout layout = CreateComputeBindGroupLayout();
-        EXPECT_NE(nullptr, layout.Get());
-    });
-}
-
-// Test that creating a same pipeline layout (which will return the cached layout) and
-// release it on multiple threads won't race.
-TEST_P(MultithreadCachingTests, RefAndReleaseCachedPipelineLayoutsInParallel) {
-    wgpu::BindGroupLayout bglayout = CreateComputeBindGroupLayout();
-
-    RunInParallel(100, [&, this](uint32_t) {
-        wgpu::PipelineLayout pipelineLayout = utils::MakePipelineLayout(device, {bglayout});
-        EXPECT_NE(nullptr, pipelineLayout.Get());
-    });
-}
-
-// Test that creating a same render pipeline (which will return the cached pipeline) and release it
-// on multiple threads won't race.
-TEST_P(MultithreadCachingTests, RefAndReleaseCachedRenderPipelinesInParallel) {
-    utils::ComboRenderPipelineDescriptor renderPipelineDescriptor;
-    wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"(
-        @vertex fn main() -> @builtin(position) vec4f {
-            return vec4f(0.0, 0.0, 0.0, 1.0);
-        })");
-    wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"(
-        @fragment fn main() -> @location(0) vec4f {
-            return vec4f(0.0, 1.0, 0.0, 1.0);
-        })");
-    renderPipelineDescriptor.vertex.module = vsModule;
-    renderPipelineDescriptor.cFragment.module = fsModule;
-    renderPipelineDescriptor.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
-    renderPipelineDescriptor.primitive.topology = wgpu::PrimitiveTopology::PointList;
-
-    RunInParallel(100, [&, this](uint32_t) {
-        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDescriptor);
-        EXPECT_NE(nullptr, pipeline.Get());
-    });
-}
-
-// Test that creating a same sampler pipeline (which will return the cached sampler) and release it
-// on multiple threads won't race.
-TEST_P(MultithreadCachingTests, RefAndReleaseCachedSamplersInParallel) {
-    wgpu::SamplerDescriptor desc = {};
-    RunInParallel(100, [&, this](uint32_t) {
-        wgpu::Sampler sampler = device.CreateSampler(&desc);
-        EXPECT_NE(nullptr, sampler.Get());
-    });
-}
-
 class MultithreadEncodingTests : public MultithreadTests {};

 // Test that encoding render passes in parallel should work
@ -490,13 +388,6 @@ TEST_P(MultithreadTimestampQueryTests, ResolveQuerySets_InParallel) {

 }  // namespace

-DAWN_INSTANTIATE_TEST(MultithreadCachingTests,
-                      D3D12Backend(),
-                      MetalBackend(),
-                      OpenGLBackend(),
-                      OpenGLESBackend(),
-                      VulkanBackend());
-
 DAWN_INSTANTIATE_TEST(MultithreadEncodingTests,
                      D3D12Backend(),
                      MetalBackend(),