Make child objects ref the device and add a mechanism to break cycles

Update child objects to ref the device. This allows them to outlive the device, making the implementation more robust such that it is OK to drop the device before other objects. Dropping the last external reference to the device is currently an implicit device.destroy(). This destruction breaks possible ref cycles where the device refs internal objects which have a back ref to the device. Bug: dawn:1164 Change-Id: I02d8e32a21dcc5f05e531bd690baac4a234b5f6b Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/90360 Reviewed-by: Loko Kung <lokokung@google.com> Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Austin Eng <enga@chromium.org>
2022-05-20 16:57:01 +00:00 · 2022-05-20 16:57:01 +00:00 · a526167e33
parent ece20a7948
commit a526167e33
14 changed files with 660 additions and 28 deletions
--- a/src/dawn/common/RefCounted.cpp
+++ b/src/dawn/common/RefCounted.cpp
@ -22,15 +22,15 @@ static constexpr size_t kPayloadBits = 1;
 static constexpr uint64_t kPayloadMask = (uint64_t(1) << kPayloadBits) - 1;
 static constexpr uint64_t kRefCountIncrement = (uint64_t(1) << kPayloadBits);

-RefCounted::RefCounted(uint64_t payload) : mRefCount(kRefCountIncrement + payload) {
+RefCount::RefCount(uint64_t payload) : mRefCount(kRefCountIncrement + payload) {
    ASSERT((payload & kPayloadMask) == payload);
 }

-uint64_t RefCounted::GetRefCountForTesting() const {
+uint64_t RefCount::GetValueForTesting() const {
    return mRefCount >> kPayloadBits;
 }

-uint64_t RefCounted::GetRefCountPayload() const {
+uint64_t RefCount::GetPayload() const {
    // We only care about the payload bits of the refcount. These never change after
    // initialization so we can use the relaxed memory order. The order doesn't guarantee
    // anything except the atomicity of the load, which is enough since any past values of the
@ -38,7 +38,7 @@ uint64_t RefCounted::GetRefCountPayload() const {
    return kPayloadMask & mRefCount.load(std::memory_order_relaxed);
 }

-void RefCounted::Reference() {
+void RefCount::Increment() {
    ASSERT((mRefCount & ~kPayloadMask) != 0);

    // The relaxed ordering guarantees only the atomicity of the update, which is enough here
@ -49,7 +49,7 @@ void RefCounted::Reference() {
    mRefCount.fetch_add(kRefCountIncrement, std::memory_order_relaxed);
 }

-void RefCounted::Release() {
+bool RefCount::Decrement() {
    ASSERT((mRefCount & ~kPayloadMask) != 0);

    // The release fence here is to make sure all accesses to the object on a thread A
@ -69,18 +69,32 @@ void RefCounted::Release() {
        // memory barrier, when an acquire load on mRefCount (using the `ldar` instruction)
        // should be enough and could end up being faster.
        std::atomic_thread_fence(std::memory_order_acquire);
+        return true;
+    }
+    return false;
+}
+
+RefCounted::RefCounted(uint64_t payload) : mRefCount(payload) {}
+RefCounted::~RefCounted() = default;
+
+uint64_t RefCounted::GetRefCountForTesting() const {
+    return mRefCount.GetValueForTesting();
+}
+
+uint64_t RefCounted::GetRefCountPayload() const {
+    return mRefCount.GetPayload();
+}
+
+void RefCounted::Reference() {
+    mRefCount.Increment();
+}
+
+void RefCounted::Release() {
+    if (mRefCount.Decrement()) {
        DeleteThis();
    }
 }

-void RefCounted::APIReference() {
-    Reference();
-}
-
-void RefCounted::APIRelease() {
-    Release();
-}
-
 void RefCounted::DeleteThis() {
    delete this;
 }
--- a/src/dawn/common/RefCounted.h
+++ b/src/dawn/common/RefCounted.h
@ -20,6 +20,24 @@

 #include "dawn/common/RefBase.h"

+class RefCount {
+  public:
+    // Create a refcount with a payload. The refcount starts initially at one.
+    explicit RefCount(uint64_t payload = 0);
+
+    uint64_t GetValueForTesting() const;
+    uint64_t GetPayload() const;
+
+    // Add a reference.
+    void Increment();
+
+    // Remove a reference. Returns true if this was the last reference.
+    bool Decrement();
+
+  private:
+    std::atomic<uint64_t> mRefCount;
+};
+
 class RefCounted {
  public:
    explicit RefCounted(uint64_t payload = 0);
@ -30,16 +48,17 @@ class RefCounted {
    void Reference();
    void Release();

-    void APIReference();
-    void APIRelease();
+    void APIReference() { Reference(); }
+    void APIRelease() { Release(); }

  protected:
-    virtual ~RefCounted() = default;
+    virtual ~RefCounted();
+
    // A Derived class may override this if they require a custom deleter.
    virtual void DeleteThis();

  private:
-    std::atomic<uint64_t> mRefCount;
+    RefCount mRefCount;
 };

 template <typename T>
--- a/src/dawn/native/BUILD.gn
+++ b/src/dawn/native/BUILD.gn
@ -155,9 +155,7 @@ source_set("sources") {
  libs = []
  data_deps = []

-  configs += [
-    ":internal",
-  ]
+  configs += [ ":internal" ]

  # Enable -Wglobal-constructors here only, instead of in internal_config,
  # because gtest and some other targets don't build with it.
@ -286,6 +284,8 @@ source_set("sources") {
    "QuerySet.h",
    "Queue.cpp",
    "Queue.h",
+    "RefCountedWithExternalCount.cpp",
+    "RefCountedWithExternalCount.h",
    "RenderBundle.cpp",
    "RenderBundle.h",
    "RenderBundleEncoder.cpp",
--- a/src/dawn/native/CMakeLists.txt
+++ b/src/dawn/native/CMakeLists.txt
@ -141,6 +141,8 @@ target_sources(dawn_native PRIVATE
    "QuerySet.h"
    "Queue.cpp"
    "Queue.h"
+    "RefCountedWithExternalCount.cpp"
+    "RefCountedWithExternalCount.h"
    "RenderBundle.cpp"
    "RenderBundle.h"
    "RenderBundleEncoder.cpp"
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@ -288,6 +288,39 @@ MaybeError DeviceBase::Initialize(Ref<QueueBase> defaultQueue) {
    return {};
 }

+void DeviceBase::WillDropLastExternalRef() {
+    // DeviceBase uses RefCountedWithExternalCount to break refcycles.
+    //
+    // DeviceBase holds multiple Refs to various API objects (pipelines, buffers, etc.) which are
+    // used to implement various device-level facilities. These objects are cached on the device,
+    // so we want to keep them around instead of making transient allocations. However, many of
+    // the objects also hold a Ref<Device> back to their parent device.
+    //
+    // In order to break this cycle and prevent leaks, when the application drops the last external
+    // ref and WillDropLastExternalRef is called, the device clears out any member refs to API
+    // objects that hold back-refs to the device - thus breaking any reference cycles.
+    //
+    // Currently, this is done by calling Destroy on the device to cease all in-flight work and
+    // drop references to internal objects. We may want to lift this in the future, but it would
+    // make things more complex because there might be pending tasks which hold a ref back to the
+    // device - either directly or indirectly. We would need to ensure those tasks don't create new
+    // reference cycles, and we would need to continuously try draining the pending tasks to clear
+    // out all remaining refs.
+    Destroy();
+
+    // Reset callbacks since after this, since after dropping the last external reference, the
+    // application may have freed any device-scope memory needed to run the callback.
+    mUncapturedErrorCallback = [](WGPUErrorType, char const* message, void*) {
+        dawn::WarningLog() << "Uncaptured error after last external device reference dropped.\n"
+                           << message;
+    };
+
+    mDeviceLostCallback = [](WGPUDeviceLostReason, char const* message, void*) {
+        dawn::WarningLog() << "Device lost after last external device reference dropped.\n"
+                           << message;
+    };
+}
+
 void DeviceBase::DestroyObjects() {
    // List of object types in reverse "dependency" order so we can iterate and delete the
    // objects safely. We define dependent here such that if B has a ref to A, then B depends on
@ -345,6 +378,15 @@ void DeviceBase::Destroy() {
        return;
    }

+    // This function may be called re-entrantly inside APITick(). Tick triggers callbacks
+    // inside which the application may destroy the device. Thus, we should be careful not
+    // to delete objects that are needed inside Tick after callbacks have been called.
+    //  - mCallbackTaskManager is not deleted since we flush the callback queue at the end
+    // of Tick(). Note: that flush should always be emtpy since all callbacks are drained
+    // inside Destroy() so there should be no outstanding tasks holding objects alive.
+    //  - Similiarly, mAsyncTaskManager is not deleted since we use it to return a status
+    // from Tick() whether or not there is any more pending work.
+
    // Skip handling device facilities if they haven't even been created (or failed doing so)
    if (mState != State::BeingCreated) {
        // The device is being destroyed so it will be lost, call the application callback.
@ -413,11 +455,10 @@ void DeviceBase::Destroy() {
    mState = State::Disconnected;

    mDynamicUploader = nullptr;
-    mCallbackTaskManager = nullptr;
-    mAsyncTaskManager = nullptr;
    mEmptyBindGroupLayout = nullptr;
    mInternalPipelineStore = nullptr;
    mExternalTexturePlaceholderView = nullptr;
+    mQueue = nullptr;

    AssumeCommandsComplete();

@ -1162,6 +1203,9 @@ BufferBase* DeviceBase::APICreateErrorBuffer() {

 // Returns true if future ticking is needed.
 bool DeviceBase::APITick() {
+    // Tick may trigger callbacks which drop a ref to the device itself. Hold a Ref to ourselves
+    // to avoid deleting |this| in the middle of this function call.
+    Ref<DeviceBase> self(this);
    if (IsLost() || ConsumedError(Tick())) {
        return false;
    }
@ -1334,6 +1378,7 @@ void DeviceBase::APIInjectError(wgpu::ErrorType type, const char* message) {
 }

 QueueBase* DeviceBase::GetQueue() const {
+    ASSERT(mQueue != nullptr);
    return mQueue.Get();
 }

--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@ -32,6 +32,7 @@
 #include "dawn/native/Limits.h"
 #include "dawn/native/ObjectBase.h"
 #include "dawn/native/ObjectType_autogen.h"
+#include "dawn/native/RefCountedWithExternalCount.h"
 #include "dawn/native/StagingBuffer.h"
 #include "dawn/native/Toggles.h"

@ -57,7 +58,7 @@ struct ShaderModuleParseResult;

 using WGSLExtensionSet = std::unordered_set<std::string>;

-class DeviceBase : public RefCounted {
+class DeviceBase : public RefCountedWithExternalCount {
  public:
    DeviceBase(AdapterBase* adapter, const DeviceDescriptor* descriptor);
    ~DeviceBase() override;
@ -404,6 +405,8 @@ class DeviceBase : public RefCounted {
    void IncrementLastSubmittedCommandSerial();

  private:
+    void WillDropLastExternalRef() override;
+
    virtual ResultOrError<Ref<BindGroupBase>> CreateBindGroupImpl(
        const BindGroupDescriptor* descriptor) = 0;
    virtual ResultOrError<Ref<BindGroupLayoutBase>> CreateBindGroupLayoutImpl(
--- a/src/dawn/native/ObjectBase.cpp
+++ b/src/dawn/native/ObjectBase.cpp
@ -27,7 +27,7 @@ ObjectBase::ObjectBase(DeviceBase* device) : RefCounted(kNotErrorPayload), mDevi
 ObjectBase::ObjectBase(DeviceBase* device, ErrorTag) : RefCounted(kErrorPayload), mDevice(device) {}

 DeviceBase* ObjectBase::GetDevice() const {
-    return mDevice;
+    return mDevice.Get();
 }

 bool ObjectBase::IsError() const {
--- a/src/dawn/native/ObjectBase.h
+++ b/src/dawn/native/ObjectBase.h
@ -37,8 +37,8 @@ class ObjectBase : public RefCounted {
    bool IsError() const;

  private:
-    // Pointer to owning device.
-    DeviceBase* mDevice;
+    // Ref to owning device.
+    Ref<DeviceBase> mDevice;
 };

 class ApiObjectBase : public ObjectBase, public LinkNode<ApiObjectBase> {
--- a/src/dawn/native/RefCountedWithExternalCount.cpp
+++ b/src/dawn/native/RefCountedWithExternalCount.cpp
@ -0,0 +1,31 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dawn/native/RefCountedWithExternalCount.h"
+
+namespace dawn::native {
+
+void RefCountedWithExternalCount::APIReference() {
+    mExternalRefCount.Increment();
+    RefCounted::APIReference();
+}
+
+void RefCountedWithExternalCount::APIRelease() {
+    if (mExternalRefCount.Decrement()) {
+        WillDropLastExternalRef();
+    }
+    RefCounted::APIRelease();
+}
+
+}  // namespace dawn::native
--- a/src/dawn/native/RefCountedWithExternalCount.h
+++ b/src/dawn/native/RefCountedWithExternalCount.h
@ -0,0 +1,45 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SRC_DAWN_NATIVE_REFCOUNTEDWITHEXTERNALCOUNT_H_
+#define SRC_DAWN_NATIVE_REFCOUNTEDWITHEXTERNALCOUNT_H_
+
+#include "dawn/common/RefCounted.h"
+
+namespace dawn::native {
+
+// RecCountedWithExternalCount is a version of RefCounted which tracks a separate
+// refcount for calls to APIReference/APIRelease (refs added/removed by the application).
+// The external refcount starts at 1, and the total refcount starts at 1 - i.e. the first
+// ref is the external ref.
+// Then, when the external refcount drops to zero, WillDropLastExternalRef is called.
+// The derived class should override the behavior of WillDropLastExternalRef.
+class RefCountedWithExternalCount : private RefCounted {
+  public:
+    using RefCounted::RefCounted;
+    using RefCounted::Reference;
+    using RefCounted::Release;
+
+    void APIReference();
+    void APIRelease();
+
+  private:
+    virtual void WillDropLastExternalRef() = 0;
+
+    RefCount mExternalRefCount;
+};
+
+}  // namespace dawn::native
+
+#endif  // SRC_DAWN_NATIVE_REFCOUNTEDWITHEXTERNALCOUNT_H_
--- a/src/dawn/native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/DeviceD3D12.cpp
@ -704,6 +704,8 @@ void Device::AppendDebugLayerMessages(ErrorData* error) {
 void Device::DestroyImpl() {
    ASSERT(GetState() == State::Disconnected);

+    mZeroBuffer = nullptr;
+
    // Immediately forget about all pending commands for the case where device is lost on its
    // own and WaitForIdleForDestruction isn't called.
    mPendingCommands.Release();
--- a/src/dawn/tests/BUILD.gn
+++ b/src/dawn/tests/BUILD.gn
@ -440,6 +440,7 @@ source_set("end2end_tests_sources") {
    "end2end/DepthStencilStateTests.cpp",
    "end2end/DestroyTests.cpp",
    "end2end/DeviceInitializationTests.cpp",
+    "end2end/DeviceLifetimeTests.cpp",
    "end2end/DeviceLostTests.cpp",
    "end2end/DrawIndexedIndirectTests.cpp",
    "end2end/DrawIndexedTests.cpp",
--- a/src/dawn/tests/DawnTest.cpp
+++ b/src/dawn/tests/DawnTest.cpp
@ -1066,7 +1066,7 @@ void DawnTestBase::TearDown() {
        mReadbackSlots[i].buffer.Unmap();
    }

-    if (!UsesWire()) {
+    if (!UsesWire() && device) {
        EXPECT_EQ(mLastWarningCount,
                  dawn::native::GetDeprecationWarningCountForTesting(device.Get()));
    }
@ -1454,7 +1454,9 @@ std::ostringstream& DawnTestBase::ExpectAttachmentDepthStencilTestData(
 }

 void DawnTestBase::WaitABit() {
-    device.Tick();
+    if (device) {
+        device.Tick();
+    }
    FlushWire();

    utils::USleep(100);
--- a/src/dawn/tests/end2end/DeviceLifetimeTests.cpp
+++ b/src/dawn/tests/end2end/DeviceLifetimeTests.cpp
@ -0,0 +1,468 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <utility>
+
+#include "dawn/tests/DawnTest.h"
+#include "dawn/utils/WGPUHelpers.h"
+
+class DeviceLifetimeTests : public DawnTest {
+    void SetUp() override {
+        DawnTest::SetUp();
+        // The wire currently has a different device / device-child lifetime mechanism
+        // which will be removed soon and these tests enabled.
+        DAWN_TEST_UNSUPPORTED_IF(UsesWire());
+    }
+};
+
+// Test that the device can be dropped before its queue.
+TEST_P(DeviceLifetimeTests, DroppedBeforeQueue) {
+    wgpu::Queue queue = device.GetQueue();
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped while an onSubmittedWorkDone callback is in flight.
+TEST_P(DeviceLifetimeTests, DroppedWhileQueueOnSubmittedWorkDone) {
+    // Submit some work.
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder(nullptr);
+    wgpu::CommandBuffer commandBuffer = encoder.Finish();
+    queue.Submit(1, &commandBuffer);
+
+    // Ask for an onSubmittedWorkDone callback and drop the device.
+    queue.OnSubmittedWorkDone(
+        0,
+        [](WGPUQueueWorkDoneStatus status, void*) {
+            EXPECT_EQ(status, WGPUQueueWorkDoneStatus_Success);
+        },
+        nullptr);
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped inside an onSubmittedWorkDone callback.
+TEST_P(DeviceLifetimeTests, DroppedInsideQueueOnSubmittedWorkDone) {
+    // Submit some work.
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder(nullptr);
+    wgpu::CommandBuffer commandBuffer = encoder.Finish();
+    queue.Submit(1, &commandBuffer);
+
+    struct Userdata {
+        wgpu::Device device;
+        bool done;
+    };
+    // Ask for an onSubmittedWorkDone callback and drop the device inside the callback.
+    Userdata data = Userdata{std::move(device), false};
+    queue.OnSubmittedWorkDone(
+        0,
+        [](WGPUQueueWorkDoneStatus status, void* userdata) {
+            EXPECT_EQ(status, WGPUQueueWorkDoneStatus_Success);
+            static_cast<Userdata*>(userdata)->device = nullptr;
+            static_cast<Userdata*>(userdata)->done = true;
+        },
+        &data);
+
+    while (!data.done) {
+        // WaitABit no longer can call tick since we've moved the device from the fixture into the
+        // userdata.
+        if (data.device) {
+            data.device.Tick();
+        }
+        WaitABit();
+    }
+}
+
+// Test that the device can be dropped while a popErrorScope callback is in flight.
+TEST_P(DeviceLifetimeTests, DroppedWhilePopErrorScope) {
+    device.PushErrorScope(wgpu::ErrorFilter::Validation);
+    device.PopErrorScope(
+        [](WGPUErrorType type, const char*, void*) { EXPECT_EQ(type, WGPUErrorType_NoError); },
+        nullptr);
+    device = nullptr;
+}
+
+// Test that the device can be dropped inside an onSubmittedWorkDone callback.
+TEST_P(DeviceLifetimeTests, DroppedInsidePopErrorScope) {
+    struct Userdata {
+        wgpu::Device device;
+        bool done;
+    };
+    device.PushErrorScope(wgpu::ErrorFilter::Validation);
+
+    // Ask for a popErrorScope callback and drop the device inside the callback.
+    Userdata data = Userdata{std::move(device), false};
+    data.device.PopErrorScope(
+        [](WGPUErrorType type, const char*, void* userdata) {
+            EXPECT_EQ(type, WGPUErrorType_NoError);
+            static_cast<Userdata*>(userdata)->device = nullptr;
+            static_cast<Userdata*>(userdata)->done = true;
+        },
+        &data);
+
+    while (!data.done) {
+        // WaitABit no longer can call tick since we've moved the device from the fixture into the
+        // userdata.
+        if (data.device) {
+            data.device.Tick();
+        }
+        WaitABit();
+    }
+}
+
+// Test that the device can be dropped before a buffer created from it.
+TEST_P(DeviceLifetimeTests, DroppedBeforeBuffer) {
+    wgpu::BufferDescriptor desc = {};
+    desc.size = 4;
+    desc.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&desc);
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped while a buffer created from it is being mapped.
+TEST_P(DeviceLifetimeTests, DroppedWhileMappingBuffer) {
+    wgpu::BufferDescriptor desc = {};
+    desc.size = 4;
+    desc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&desc);
+
+    buffer.MapAsync(
+        wgpu::MapMode::Read, 0, wgpu::kWholeMapSize,
+        [](WGPUBufferMapAsyncStatus status, void*) {
+            EXPECT_EQ(status, WGPUBufferMapAsyncStatus_DestroyedBeforeCallback);
+        },
+        nullptr);
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped before a mapped buffer created from it.
+TEST_P(DeviceLifetimeTests, DroppedBeforeMappedBuffer) {
+    wgpu::BufferDescriptor desc = {};
+    desc.size = 4;
+    desc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&desc);
+
+    bool done = false;
+    buffer.MapAsync(
+        wgpu::MapMode::Read, 0, wgpu::kWholeMapSize,
+        [](WGPUBufferMapAsyncStatus status, void* userdata) {
+            EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success);
+            *static_cast<bool*>(userdata) = true;
+        },
+        &done);
+
+    while (!done) {
+        WaitABit();
+    }
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped before a mapped at creation buffer created from it.
+TEST_P(DeviceLifetimeTests, DroppedBeforeMappedAtCreationBuffer) {
+    wgpu::BufferDescriptor desc = {};
+    desc.size = 4;
+    desc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
+    desc.mappedAtCreation = true;
+    wgpu::Buffer buffer = device.CreateBuffer(&desc);
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped before a buffer created from it, then mapping the buffer
+// fails.
+TEST_P(DeviceLifetimeTests, DroppedThenMapBuffer) {
+    wgpu::BufferDescriptor desc = {};
+    desc.size = 4;
+    desc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&desc);
+
+    device = nullptr;
+
+    bool done = false;
+    buffer.MapAsync(
+        wgpu::MapMode::Read, 0, wgpu::kWholeMapSize,
+        [](WGPUBufferMapAsyncStatus status, void* userdata) {
+            EXPECT_EQ(status, WGPUBufferMapAsyncStatus_DeviceLost);
+            *static_cast<bool*>(userdata) = true;
+        },
+        &done);
+
+    while (!done) {
+        WaitABit();
+    }
+}
+
+// Test that the device can be dropped inside a buffer map callback.
+TEST_P(DeviceLifetimeTests, DroppedInsideBufferMapCallback) {
+    wgpu::BufferDescriptor desc = {};
+    desc.size = 4;
+    desc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&desc);
+
+    struct Userdata {
+        wgpu::Device device;
+        wgpu::Buffer buffer;
+        bool done;
+    };
+
+    // Ask for a mapAsync callback and drop the device inside the callback.
+    Userdata data = Userdata{std::move(device), buffer, false};
+    buffer.MapAsync(
+        wgpu::MapMode::Read, 0, wgpu::kWholeMapSize,
+        [](WGPUBufferMapAsyncStatus status, void* userdata) {
+            EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success);
+            auto* data = static_cast<Userdata*>(userdata);
+            data->device = nullptr;
+            data->done = true;
+
+            // Mapped data should be null since the buffer is implicitly destroyed.
+            EXPECT_EQ(data->buffer.GetConstMappedRange(), nullptr);
+        },
+        &data);
+
+    while (!data.done) {
+        // WaitABit no longer can call tick since we've moved the device from the fixture into the
+        // userdata.
+        if (data.device) {
+            data.device.Tick();
+        }
+        WaitABit();
+    }
+
+    // Mapped data should be null since the buffer is implicitly destroyed.
+    EXPECT_EQ(buffer.GetConstMappedRange(), nullptr);
+}
+
+// Test that the device can be dropped while a write buffer operation is enqueued.
+TEST_P(DeviceLifetimeTests, DroppedWhileWriteBuffer) {
+    wgpu::BufferDescriptor desc = {};
+    desc.size = 4;
+    desc.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&desc);
+
+    uint32_t value = 7;
+    queue.WriteBuffer(buffer, 0, &value, sizeof(value));
+    device = nullptr;
+}
+
+// Test that the device can be dropped while a write buffer operation is enqueued and then
+// a queue submit occurs. This is slightly different from the former test since it ensures
+// that pending work is flushed.
+TEST_P(DeviceLifetimeTests, DroppedWhileWriteBufferAndSubmit) {
+    wgpu::BufferDescriptor desc = {};
+    desc.size = 4;
+    desc.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&desc);
+
+    uint32_t value = 7;
+    queue.WriteBuffer(buffer, 0, &value, sizeof(value));
+    queue.Submit(0, nullptr);
+    device = nullptr;
+}
+
+// Test that the device can be dropped while createPipelineAsync is in flight
+TEST_P(DeviceLifetimeTests, DroppedWhileCreatePipelineAsync) {
+    wgpu::ComputePipelineDescriptor desc;
+    desc.compute.module = utils::CreateShaderModule(device, R"(
+    @stage(compute) @workgroup_size(1) fn main() {
+    })");
+    desc.compute.entryPoint = "main";
+
+    device.CreateComputePipelineAsync(
+        &desc,
+        [](WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline cPipeline, const char* message,
+           void* userdata) {
+            wgpu::ComputePipeline::Acquire(cPipeline);
+            EXPECT_EQ(status, WGPUCreatePipelineAsyncStatus_DeviceDestroyed);
+        },
+        nullptr);
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped inside a createPipelineAsync callback
+TEST_P(DeviceLifetimeTests, DroppedInsideCreatePipelineAsync) {
+    wgpu::ComputePipelineDescriptor desc;
+    desc.compute.module = utils::CreateShaderModule(device, R"(
+    @stage(compute) @workgroup_size(1) fn main() {
+    })");
+    desc.compute.entryPoint = "main";
+
+    struct Userdata {
+        wgpu::Device device;
+        bool done;
+    };
+    // Call CreateComputePipelineAsync and drop the device inside the callback.
+    Userdata data = Userdata{std::move(device), false};
+    data.device.CreateComputePipelineAsync(
+        &desc,
+        [](WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline cPipeline, const char* message,
+           void* userdata) {
+            wgpu::ComputePipeline::Acquire(cPipeline);
+            EXPECT_EQ(status, WGPUCreatePipelineAsyncStatus_Success);
+
+            static_cast<Userdata*>(userdata)->device = nullptr;
+            static_cast<Userdata*>(userdata)->done = true;
+        },
+        &data);
+
+    while (!data.done) {
+        // WaitABit no longer can call tick since we've moved the device from the fixture into the
+        // userdata.
+        if (data.device) {
+            data.device.Tick();
+        }
+        WaitABit();
+    }
+}
+
+// Test that the device can be dropped while createPipelineAsync which will hit the frontend cache
+// is in flight
+TEST_P(DeviceLifetimeTests, DroppedWhileCreatePipelineAsyncAlreadyCached) {
+    wgpu::ComputePipelineDescriptor desc;
+    desc.compute.module = utils::CreateShaderModule(device, R"(
+    @stage(compute) @workgroup_size(1) fn main() {
+    })");
+    desc.compute.entryPoint = "main";
+
+    // Create a pipeline ahead of time so it's in the cache.
+    wgpu::ComputePipeline p = device.CreateComputePipeline(&desc);
+
+    device.CreateComputePipelineAsync(
+        &desc,
+        [](WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline cPipeline, const char* message,
+           void* userdata) {
+            wgpu::ComputePipeline::Acquire(cPipeline);
+            // Success because it hits the frontend cache immediately.
+            EXPECT_EQ(status, WGPUCreatePipelineAsyncStatus_Success);
+        },
+        nullptr);
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped inside a createPipelineAsync callback which will hit the
+// frontend cache
+TEST_P(DeviceLifetimeTests, DroppedInsideCreatePipelineAsyncAlreadyCached) {
+    wgpu::ComputePipelineDescriptor desc;
+    desc.compute.module = utils::CreateShaderModule(device, R"(
+    @stage(compute) @workgroup_size(1) fn main() {
+    })");
+    desc.compute.entryPoint = "main";
+
+    // Create a pipeline ahead of time so it's in the cache.
+    wgpu::ComputePipeline p = device.CreateComputePipeline(&desc);
+
+    struct Userdata {
+        wgpu::Device device;
+        bool done;
+    };
+    // Call CreateComputePipelineAsync and drop the device inside the callback.
+    Userdata data = Userdata{std::move(device), false};
+    data.device.CreateComputePipelineAsync(
+        &desc,
+        [](WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline cPipeline, const char* message,
+           void* userdata) {
+            wgpu::ComputePipeline::Acquire(cPipeline);
+            // Success because it hits the frontend cache immediately.
+            EXPECT_EQ(status, WGPUCreatePipelineAsyncStatus_Success);
+
+            static_cast<Userdata*>(userdata)->device = nullptr;
+            static_cast<Userdata*>(userdata)->done = true;
+        },
+        &data);
+
+    while (!data.done) {
+        // WaitABit no longer can call tick since we've moved the device from the fixture into the
+        // userdata.
+        if (data.device) {
+            data.device.Tick();
+        }
+        WaitABit();
+    }
+}
+
+// Test that the device can be dropped while createPipelineAsync which will race with a compilation
+// to add the same pipeline to the frontend cache
+TEST_P(DeviceLifetimeTests, DroppedWhileCreatePipelineAsyncRaceCache) {
+    wgpu::ComputePipelineDescriptor desc;
+    desc.compute.module = utils::CreateShaderModule(device, R"(
+    @stage(compute) @workgroup_size(1) fn main() {
+    })");
+    desc.compute.entryPoint = "main";
+
+    device.CreateComputePipelineAsync(
+        &desc,
+        [](WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline cPipeline, const char* message,
+           void* userdata) {
+            wgpu::ComputePipeline::Acquire(cPipeline);
+            EXPECT_EQ(status, WGPUCreatePipelineAsyncStatus_DeviceDestroyed);
+        },
+        nullptr);
+
+    // Create the same pipeline synchronously which will get added to the cache.
+    wgpu::ComputePipeline p = device.CreateComputePipeline(&desc);
+
+    device = nullptr;
+}
+
+// Test that the device can be dropped inside a createPipelineAsync callback which which will race
+// with a compilation to add the same pipeline to the frontend cache
+TEST_P(DeviceLifetimeTests, DroppedInsideCreatePipelineAsyncRaceCache) {
+    wgpu::ComputePipelineDescriptor desc;
+    desc.compute.module = utils::CreateShaderModule(device, R"(
+    @stage(compute) @workgroup_size(1) fn main() {
+    })");
+    desc.compute.entryPoint = "main";
+
+    struct Userdata {
+        wgpu::Device device;
+        bool done;
+    };
+    // Call CreateComputePipelineAsync and drop the device inside the callback.
+    Userdata data = Userdata{std::move(device), false};
+    data.device.CreateComputePipelineAsync(
+        &desc,
+        [](WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline cPipeline, const char* message,
+           void* userdata) {
+            wgpu::ComputePipeline::Acquire(cPipeline);
+            EXPECT_EQ(status, WGPUCreatePipelineAsyncStatus_Success);
+
+            static_cast<Userdata*>(userdata)->device = nullptr;
+            static_cast<Userdata*>(userdata)->done = true;
+        },
+        &data);
+
+    // Create the same pipeline synchronously which will get added to the cache.
+    wgpu::ComputePipeline p = data.device.CreateComputePipeline(&desc);
+
+    while (!data.done) {
+        // WaitABit no longer can call tick since we've moved the device from the fixture into the
+        // userdata.
+        if (data.device) {
+            data.device.Tick();
+        }
+        WaitABit();
+    }
+}
+
+DAWN_INSTANTIATE_TEST(DeviceLifetimeTests,
+                      D3D12Backend(),
+                      MetalBackend(),
+                      NullBackend(),
+                      OpenGLBackend(),
+                      OpenGLESBackend(),
+                      VulkanBackend());