diff --git a/dawn.json b/dawn.json
index 268b83576c..6b4804f3c0 100644
--- a/dawn.json
+++ b/dawn.json
@@ -77,6 +77,7 @@
     },
     "adapter": {
         "category": "object",
+        "no autolock": true,
         "methods": [
             {
                 "name": "get instance",
@@ -579,6 +580,7 @@
     },
     "command encoder": {
         "category": "object",
+        "no autolock": true,
         "methods": [
             {
                 "name": "finish",
@@ -792,6 +794,7 @@
     },
     "compute pass encoder": {
         "category": "object",
+        "no autolock": true,
         "methods": [
             {
                 "name": "insert debug marker",
@@ -1203,10 +1206,12 @@
             },
             {
                 "name": "tick",
+                "no autolock": true,
                 "tags": ["dawn"]
             },
             {
                 "name": "set uncaptured error callback",
+                "no autolock": true,
                 "args": [
                     {"name": "callback", "type": "error callback"},
                     {"name": "userdata", "type": "void", "annotation": "*"}
@@ -1214,6 +1219,7 @@
             },
             {
                 "name": "set logging callback",
+                "no autolock": true,
                 "tags": ["dawn"],
                 "args": [
                     {"name": "callback", "type": "logging callback"},
@@ -1222,6 +1228,7 @@
             },
             {
                 "name": "set device lost callback",
+                "no autolock": true,
                 "args": [
                     {"name": "callback", "type": "device lost callback"},
                     {"name": "userdata", "type": "void", "annotation": "*"}
@@ -1469,7 +1476,8 @@
             {"value": 1003, "name": "dawn multi planar formats", "tags": ["dawn"]},
             {"value": 1004, "name": "dawn native", "tags": ["dawn", "native"]},
             {"value": 1005, "name": "chromium experimental dp4a", "tags": ["dawn"]},
-            {"value": 1006, "name": "timestamp query inside passes", "tags": ["dawn"]}
+            {"value": 1006, "name": "timestamp query inside passes", "tags": ["dawn"]},
+            {"value": 1007, "name": "implicit device synchronization", "tags": ["dawn", "native"]}
         ]
     },
     "filter mode": {
@@ -1527,6 +1535,7 @@
     },
     "instance": {
         "category": "object",
+        "no autolock": true,
         "methods": [
             {
                 "name": "create surface",
@@ -1856,6 +1865,7 @@
 
     "render bundle encoder": {
         "category": "object",
+        "no autolock": true,
         "methods": [
             {
                 "name": "set pipeline",
@@ -2027,6 +2037,7 @@
     },
     "render pass encoder": {
         "category": "object",
+        "no autolock": true,
         "methods": [
             {
                 "name": "set pipeline",
@@ -2482,6 +2493,7 @@
     },
     "surface": {
         "category": "object",
+        "no autolock": true,
         "methods": [
             {
                 "name": "get preferred format",
diff --git a/docs/dawn/codegen.md b/docs/dawn/codegen.md
index 359fddea7e..1b0868dd45 100644
--- a/docs/dawn/codegen.md
+++ b/docs/dawn/codegen.md
@@ -76,6 +76,8 @@ A **record** is a list of **record members**, each of which is a dictionary with
    - `"name"` a string
    - `"return_type"` (default to no return type) a string that's the name of the return type.
    - `"arguments"` a **record**, so an array of **record members**
+   - `"no autolock"`: a boolean flag (default is false) indicates that the method's generated code won't automatically do thread synchronization. This flag can only be true on device or device child objects currently.
+ - `"no autolock"`: a boolean flag (default is false) to indicate that the object's generated code won't automatically do thread synchronization. This will override individual method's `"no autolock"` flag. This flag can only be true on device or device child objects currently.
 
 **`"constant"`**
  - `"type"`: a string, the name of the base data type
diff --git a/generator/dawn_json_generator.py b/generator/dawn_json_generator.py
index 705948cd49..5b2d5f9450 100644
--- a/generator/dawn_json_generator.py
+++ b/generator/dawn_json_generator.py
@@ -189,8 +189,8 @@ class RecordMember:
         self.handle_type = handle_type
 
 
-Method = namedtuple('Method',
-                    ['name', 'return_type', 'arguments', 'json_data'])
+Method = namedtuple(
+    'Method', ['name', 'return_type', 'arguments', 'autolock', 'json_data'])
 
 
 class ObjectType(Type):
@@ -337,11 +337,16 @@ def linked_record_members(json_data, types):
 
 
 def link_object(obj, types):
+    # Disable method's autolock if obj's "no autolock" = True
+    obj_scoped_autolock_enabled = not obj.json_data.get('no autolock', False)
+
     def make_method(json_data):
         arguments = linked_record_members(json_data.get('args', []), types)
+        autolock_enabled = obj_scoped_autolock_enabled and not json_data.get(
+            'no autolock', False)
         return Method(Name(json_data['name']),
-                      types[json_data.get('returns',
-                                          'void')], arguments, json_data)
+                      types[json_data.get('returns', 'void')], arguments,
+                      autolock_enabled, json_data)
 
     obj.methods = [make_method(m) for m in obj.json_data.get('methods', [])]
     obj.methods.sort(key=lambda method: method.name.canonical_case())
@@ -769,9 +774,9 @@ def as_formatType(typ):
 def c_methods(params, typ):
     return typ.methods + [
         x for x in [
-            Method(Name('reference'), params['types']['void'], [],
+            Method(Name('reference'), params['types']['void'], [], False,
                    {'tags': ['dawn', 'emscripten']}),
-            Method(Name('release'), params['types']['void'], [],
+            Method(Name('release'), params['types']['void'], [], False,
                    {'tags': ['dawn', 'emscripten']}),
         ] if item_is_enabled(params['enabled_tags'], x.json_data)
         and not item_is_disabled(params['disabled_tags'], x.json_data)
diff --git a/generator/templates/dawn/native/ProcTable.cpp b/generator/templates/dawn/native/ProcTable.cpp
index f9980ffc61..7b1922b1f5 100644
--- a/generator/templates/dawn/native/ProcTable.cpp
+++ b/generator/templates/dawn/native/ProcTable.cpp
@@ -56,6 +56,17 @@ namespace {{native_namespace}} {
                     {% endif %}
                 {%- endfor-%}
 
+                {% if method.autolock %}
+                    {% if type.name.get() != "device" %}
+                        auto device = self->GetDevice();
+                    {% else %}
+                        auto device = self;
+                    {% endif %}
+                    auto deviceLock(device->GetScopedLock());
+                {% else %}
+                    // This method is specified to not use AutoLock in json script.
+                {% endif %}
+
                 {% if method.return_type.name.canonical_case() != "void" %}
                     auto result =
                 {%- endif %}
diff --git a/src/dawn/common/BUILD.gn b/src/dawn/common/BUILD.gn
index df20a6c498..808e8b7f4b 100644
--- a/src/dawn/common/BUILD.gn
+++ b/src/dawn/common/BUILD.gn
@@ -246,6 +246,8 @@ if (is_win || is_linux || is_chromeos || is_mac || is_fuchsia || is_android) {
       "Log.h",
       "Math.cpp",
       "Math.h",
+      "Mutex.cpp",
+      "Mutex.h",
       "NSRef.h",
       "NonCopyable.h",
       "Numeric.h",
diff --git a/src/dawn/common/CMakeLists.txt b/src/dawn/common/CMakeLists.txt
index 45de3dc0cb..c78d6b0c2e 100644
--- a/src/dawn/common/CMakeLists.txt
+++ b/src/dawn/common/CMakeLists.txt
@@ -52,6 +52,8 @@ target_sources(dawn_common PRIVATE
     "Log.h"
     "Math.cpp"
     "Math.h"
+    "Mutex.cpp"
+    "Mutex.h"
     "NSRef.h"
     "NonCopyable.h"
     "Numeric.h"
diff --git a/src/dawn/common/Mutex.cpp b/src/dawn/common/Mutex.cpp
new file mode 100644
index 0000000000..b8b891f93d
--- /dev/null
+++ b/src/dawn/common/Mutex.cpp
@@ -0,0 +1,49 @@
+// Copyright 2023 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dawn/common/Mutex.h"
+
+namespace dawn {
+Mutex::~Mutex() = default;
+
+void Mutex::Lock() {
+#if defined(DAWN_ENABLE_ASSERTS)
+    auto currentThread = std::this_thread::get_id();
+    ASSERT(mOwner.load(std::memory_order_acquire) != currentThread);
+#endif  // DAWN_ENABLE_ASSERTS
+
+    mNativeMutex.lock();
+
+#if defined(DAWN_ENABLE_ASSERTS)
+    mOwner.store(currentThread, std::memory_order_release);
+#endif  // DAWN_ENABLE_ASSERTS
+}
+void Mutex::Unlock() {
+#if defined(DAWN_ENABLE_ASSERTS)
+    ASSERT(IsLockedByCurrentThread());
+    mOwner.store(std::thread::id(), std::memory_order_release);
+#endif  // DAWN_ENABLE_ASSERTS
+
+    mNativeMutex.unlock();
+}
+
+bool Mutex::IsLockedByCurrentThread() {
+#if defined(DAWN_ENABLE_ASSERTS)
+    return mOwner.load(std::memory_order_acquire) == std::this_thread::get_id();
+#else
+    // This is not supported.
+    abort();
+#endif
+}
+}  // namespace dawn
diff --git a/src/dawn/common/Mutex.h b/src/dawn/common/Mutex.h
new file mode 100644
index 0000000000..c5a3a8cbf7
--- /dev/null
+++ b/src/dawn/common/Mutex.h
@@ -0,0 +1,72 @@
+// Copyright 2023 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SRC_DAWN_COMMON_MUTEX_H_
+#define SRC_DAWN_COMMON_MUTEX_H_
+
+#include <atomic>
+#include <mutex>
+#include <thread>
+#include <utility>
+
+#include "dawn/common/Assert.h"
+#include "dawn/common/NonCopyable.h"
+#include "dawn/common/RefCounted.h"
+
+namespace dawn {
+class Mutex : public RefCounted, NonCopyable {
+  public:
+    template <typename MutexRef>
+    struct AutoLockBase : NonMovable {
+        AutoLockBase() = delete;
+        explicit AutoLockBase(MutexRef mutex) : mMutex(std::move(mutex)) {
+            if (mMutex != nullptr) {
+                mMutex->Lock();
+            }
+        }
+        ~AutoLockBase() {
+            if (mMutex != nullptr) {
+                mMutex->Unlock();
+            }
+        }
+
+      private:
+        MutexRef mMutex;
+    };
+
+    // This scoped lock won't keep the mutex alive.
+    using AutoLock = AutoLockBase<Mutex*>;
+    // This scoped lock will keep the mutex alive until out of scope.
+    using AutoLockAndHoldRef = AutoLockBase<Ref<Mutex>>;
+
+    ~Mutex() override;
+
+    void Lock();
+    void Unlock();
+
+    // This method is only enabled when DAWN_ENABLE_ASSERTS is turned on.
+    // Thus it should only be wrapped inside ASSERT() macro.
+    // i.e. ASSERT(mutex.IsLockedByCurrentThread())
+    bool IsLockedByCurrentThread();
+
+  private:
+#if defined(DAWN_ENABLE_ASSERTS)
+    std::atomic<std::thread::id> mOwner;
+#endif  // DAWN_ENABLE_ASSERTS
+    std::mutex mNativeMutex;
+};
+
+}  // namespace dawn
+
+#endif
diff --git a/src/dawn/common/RefCounted.cpp b/src/dawn/common/RefCounted.cpp
index 14bd0b1df7..b5d244134a 100644
--- a/src/dawn/common/RefCounted.cpp
+++ b/src/dawn/common/RefCounted.cpp
@@ -15,6 +15,11 @@
 #include "dawn/common/RefCounted.h"
 
 #include <cstddef>
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#include <sanitizer/tsan_interface.h>
+#endif
+#endif
 
 #include "dawn/common/Assert.h"
 
@@ -68,6 +73,14 @@ bool RefCount::Decrement() {
         // Note that on ARM64 this will generate a `dmb ish` instruction which is a global
         // memory barrier, when an acquire load on mRefCount (using the `ldar` instruction)
         // should be enough and could end up being faster.
+
+        // https://github.com/google/sanitizers/issues/1415 There is false positive bug in TSAN
+        // when using standalone fence.
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+        __tsan_acquire(&mRefCount);
+#endif
+#endif
         std::atomic_thread_fence(std::memory_order_acquire);
         return true;
     }
@@ -95,6 +108,16 @@ void RefCounted::Release() {
     }
 }
 
+void RefCounted::ReleaseAndLockBeforeDestroy() {
+    if (mRefCount.Decrement()) {
+        LockAndDeleteThis();
+    }
+}
+
 void RefCounted::DeleteThis() {
     delete this;
 }
+
+void RefCounted::LockAndDeleteThis() {
+    DeleteThis();
+}
diff --git a/src/dawn/common/RefCounted.h b/src/dawn/common/RefCounted.h
index b5d74f70ea..839b47dc16 100644
--- a/src/dawn/common/RefCounted.h
+++ b/src/dawn/common/RefCounted.h
@@ -46,18 +46,25 @@ class RefCounted {
     uint64_t GetRefCountPayload() const;
 
     void Reference();
+    // Release() is called by internal code, so it's assumed that there is already a thread
+    // synchronization in place for destruction.
     void Release();
 
     void APIReference() { Reference(); }
-    void APIRelease() { Release(); }
+    // APIRelease() can be called without any synchronization guarantees so we need to use a Release
+    // method that will call LockAndDeleteThis() on destruction.
+    void APIRelease() { ReleaseAndLockBeforeDestroy(); }
 
   protected:
     virtual ~RefCounted();
 
-    // A Derived class may override this if they require a custom deleter.
-    virtual void DeleteThis();
+    void ReleaseAndLockBeforeDestroy();
+
+    // A Derived class may override these if they require a custom deleter.
+    virtual void DeleteThis();
+    // This calls DeleteThis() by default.
+    virtual void LockAndDeleteThis();
 
-  private:
     RefCount mRefCount;
 };
 
diff --git a/src/dawn/native/Adapter.cpp b/src/dawn/native/Adapter.cpp
index 0a6d93eac1..a83d836a6b 100644
--- a/src/dawn/native/Adapter.cpp
+++ b/src/dawn/native/Adapter.cpp
@@ -42,6 +42,7 @@ MaybeError AdapterBase::Initialize() {
 
     EnableFeature(Feature::DawnNative);
     EnableFeature(Feature::DawnInternalUsages);
+    EnableFeature(Feature::ImplicitDeviceSynchronization);
     InitializeSupportedFeaturesImpl();
 
     DAWN_TRY_CONTEXT(
diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp
index cc4c30e725..1a72c90de0 100644
--- a/src/dawn/native/Buffer.cpp
+++ b/src/dawn/native/Buffer.cpp
@@ -43,9 +43,14 @@ struct MapRequestTask : TrackTaskCallback {
 
   private:
     void FinishImpl() override {
-        ASSERT(mSerial != kMaxExecutionSerial);
-        TRACE_EVENT1(mPlatform, General, "Buffer::TaskInFlight::Finished", "serial",
-                     uint64_t(mSerial));
+        {
+            // This is called from a callback, and no lock will be held by default. Hence, we need
+            // to lock the mutex now because mSerial might be changed by another thread.
+            auto deviceLock(buffer->GetDevice()->GetScopedLock());
+            ASSERT(mSerial != kMaxExecutionSerial);
+            TRACE_EVENT1(mPlatform, General, "Buffer::TaskInFlight::Finished", "serial",
+                         uint64_t(mSerial));
+        }
         buffer->CallbackOnMapRequestCompleted(id, WGPUBufferMapAsyncStatus_Success);
     }
     void HandleDeviceLossImpl() override {
@@ -597,9 +602,14 @@ MaybeError BufferBase::ValidateUnmap() const {
 
 void BufferBase::CallbackOnMapRequestCompleted(MapRequestID mapID,
                                                WGPUBufferMapAsyncStatus status) {
-    if (mapID == mLastMapID && status == WGPUBufferMapAsyncStatus_Success &&
-        mState == BufferState::PendingMap) {
-        mState = BufferState::Mapped;
+    {
+        // This is called from a callback, and no lock will be held by default. Hence, we need to
+        // lock the mutex now because this will modify the buffer's states.
+        auto deviceLock(GetDevice()->GetScopedLock());
+        if (mapID == mLastMapID && status == WGPUBufferMapAsyncStatus_Success &&
+            mState == BufferState::PendingMap) {
+            mState = BufferState::Mapped;
+        }
     }
 
     auto cb = PrepareMappingCallback(mapID, status);
diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp
index a696c41ee2..0279bdc7af 100644
--- a/src/dawn/native/CommandEncoder.cpp
+++ b/src/dawn/native/CommandEncoder.cpp
@@ -1047,14 +1047,21 @@ ResultOrError<std::function<void()>> CommandEncoder::ApplyRenderPassWorkarounds(
                 descriptor.dimension = wgpu::TextureDimension::e2D;
                 descriptor.mipLevelCount = 1;
 
+                // We are creating new resources. Need to lock the Device.
+                // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
+                // Command Submit time, so the locking would be removed from here at that point.
                 Ref<TextureBase> temporaryResolveTexture;
-                DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor));
-
-                TextureViewDescriptor viewDescriptor = {};
                 Ref<TextureViewBase> temporaryResolveView;
-                DAWN_TRY_ASSIGN(
-                    temporaryResolveView,
-                    device->CreateTextureView(temporaryResolveTexture.Get(), &viewDescriptor));
+                {
+                    auto deviceLock(GetDevice()->GetScopedLock());
+
+                    DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor));
+
+                    TextureViewDescriptor viewDescriptor = {};
+                    DAWN_TRY_ASSIGN(
+                        temporaryResolveView,
+                        device->CreateTextureView(temporaryResolveTexture.Get(), &viewDescriptor));
+                }
 
                 // Save the temporary and given render targets together for copying after
                 // the render pass ends.
@@ -1200,6 +1207,11 @@ void CommandEncoder::APICopyBufferToTexture(const ImageCopyBuffer* source,
 
             if (dst.aspect == Aspect::Depth &&
                 GetDevice()->IsToggleEnabled(Toggle::UseBlitForBufferToDepthTextureCopy)) {
+                // The below function might create new resources. Need to lock the Device.
+                // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
+                // Command Submit time, so the locking would be removed from here at that point.
+                auto deviceLock(GetDevice()->GetScopedLock());
+
                 DAWN_TRY_CONTEXT(
                     BlitBufferToDepth(GetDevice(), this, source->buffer, srcLayout, dst, *copySize),
                     "copying from %s to depth aspect of %s using blit workaround.", source->buffer,
@@ -1207,6 +1219,11 @@ void CommandEncoder::APICopyBufferToTexture(const ImageCopyBuffer* source,
                 return {};
             } else if (dst.aspect == Aspect::Stencil &&
                        GetDevice()->IsToggleEnabled(Toggle::UseBlitForBufferToStencilTextureCopy)) {
+                // The below function might create new resources. Need to lock the Device.
+                // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
+                // Command Submit time, so the locking would be removed from here at that point.
+                auto deviceLock(GetDevice()->GetScopedLock());
+
                 DAWN_TRY_CONTEXT(BlitBufferToStencil(GetDevice(), this, source->buffer, srcLayout,
                                                      dst, *copySize),
                                  "copying from %s to stencil aspect of %s using blit workaround.",
@@ -1375,6 +1392,11 @@ void CommandEncoder::APICopyTextureToTextureHelper(const ImageCopyTexture* sourc
 
             // Use a blit to copy the depth aspect.
             if (blitDepth) {
+                // This function might create new resources. Need to lock the Device.
+                // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
+                // Command Submit time, so the locking would be removed from here at that point.
+                auto deviceLock(GetDevice()->GetScopedLock());
+
                 DAWN_TRY_CONTEXT(BlitDepthToDepth(GetDevice(), this, src, dst, *copySize),
                                  "copying depth aspect from %s to %s using blit workaround.",
                                  source->texture, destination->texture);
@@ -1586,6 +1608,9 @@ void CommandEncoder::APIWriteTimestamp(QuerySetBase* querySet, uint32_t queryInd
 }
 
 CommandBufferBase* CommandEncoder::APIFinish(const CommandBufferDescriptor* descriptor) {
+    // This function will create new object, need to lock the Device.
+    auto deviceLock(GetDevice()->GetScopedLock());
+
     Ref<CommandBufferBase> commandBuffer;
     if (GetDevice()->ConsumedError(Finish(descriptor), &commandBuffer)) {
         return CommandBufferBase::MakeError(GetDevice());
diff --git a/src/dawn/native/ComputePassEncoder.cpp b/src/dawn/native/ComputePassEncoder.cpp
index 63505c7a44..7fbf82b1c3 100644
--- a/src/dawn/native/ComputePassEncoder.cpp
+++ b/src/dawn/native/ComputePassEncoder.cpp
@@ -238,6 +238,10 @@ ResultOrError<std::pair<Ref<BufferBase>, uint64_t>>
 ComputePassEncoder::TransformIndirectDispatchBuffer(Ref<BufferBase> indirectBuffer,
                                                     uint64_t indirectOffset) {
     DeviceBase* device = GetDevice();
+    // This function creates new resources, need to lock the Device.
+    // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at Command Submit
+    // time, so the locking would be removed from here at that point.
+    auto deviceLock(GetDevice()->GetScopedLock());
 
     const bool shouldDuplicateNumWorkgroups =
         device->ShouldDuplicateNumWorkgroupsForDispatchIndirect(
diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp
index 0079baa3c7..047131a9e5 100644
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -92,6 +92,10 @@ struct DeviceBase::DeprecationWarnings {
 };
 
 namespace {
+bool IsMutexLockedByCurrentThreadIfNeeded(const Ref<Mutex>& mutex) {
+    return mutex == nullptr || mutex->IsLockedByCurrentThread();
+}
+
 struct LoggingCallbackTask : CallbackTask {
   public:
     LoggingCallbackTask() = delete;
@@ -277,6 +281,13 @@ MaybeError DeviceBase::Initialize(Ref<QueueBase> defaultQueue) {
                         CreateShaderModule(&descriptor));
     }
 
+    if (HasFeature(Feature::ImplicitDeviceSynchronization)) {
+        mMutex = AcquireRef(new Mutex);
+    } else {
+        mMutex = nullptr;
+    }
+
+    // mAdapter is not set for mock test devices.
     // TODO(crbug.com/dawn/1702): using a mock adapter could avoid the null checking.
     if (mAdapter != nullptr) {
         mAdapter->GetInstance()->AddDevice(this);
@@ -286,30 +297,38 @@ MaybeError DeviceBase::Initialize(Ref<QueueBase> defaultQueue) {
 }
 
 void DeviceBase::WillDropLastExternalRef() {
-    // DeviceBase uses RefCountedWithExternalCount to break refcycles.
-    //
-    // DeviceBase holds multiple Refs to various API objects (pipelines, buffers, etc.) which are
-    // used to implement various device-level facilities. These objects are cached on the device,
-    // so we want to keep them around instead of making transient allocations. However, many of
-    // the objects also hold a Ref<Device> back to their parent device.
-    //
-    // In order to break this cycle and prevent leaks, when the application drops the last external
-    // ref and WillDropLastExternalRef is called, the device clears out any member refs to API
-    // objects that hold back-refs to the device - thus breaking any reference cycles.
-    //
-    // Currently, this is done by calling Destroy on the device to cease all in-flight work and
-    // drop references to internal objects. We may want to lift this in the future, but it would
-    // make things more complex because there might be pending tasks which hold a ref back to the
-    // device - either directly or indirectly. We would need to ensure those tasks don't create new
-    // reference cycles, and we would need to continuously try draining the pending tasks to clear
-    // out all remaining refs.
-    Destroy();
+    {
+        // This will be invoked by API side, so we need to lock.
+        // Note: we cannot hold the lock when flushing the callbacks so have to limit the scope of
+        // the lock.
+        auto deviceLock(GetScopedLock());
+
+        // DeviceBase uses RefCountedWithExternalCount to break refcycles.
+        //
+        // DeviceBase holds multiple Refs to various API objects (pipelines, buffers, etc.) which
+        // are used to implement various device-level facilities. These objects are cached on the
+        // device, so we want to keep them around instead of making transient allocations. However,
+        // many of the objects also hold a Ref<Device> back to their parent device.
+        //
+        // In order to break this cycle and prevent leaks, when the application drops the last
+        // external ref and WillDropLastExternalRef is called, the device clears out any member refs
+        // to API objects that hold back-refs to the device - thus breaking any reference cycles.
+        //
+        // Currently, this is done by calling Destroy on the device to cease all in-flight work and
+        // drop references to internal objects. We may want to lift this in the future, but it would
+        // make things more complex because there might be pending tasks which hold a ref back to
+        // the device - either directly or indirectly. We would need to ensure those tasks don't
+        // create new reference cycles, and we would need to continuously try draining the pending
+        // tasks to clear out all remaining refs.
+        Destroy();
+    }
 
     // Flush last remaining callback tasks.
     do {
-        mCallbackTaskManager->Flush();
+        FlushCallbackTaskQueue();
     } while (!mCallbackTaskManager->IsEmpty());
 
+    auto deviceLock(GetScopedLock());
     // Drop te device's reference to the queue. Because the application dropped the last external
     // references, they can no longer get the queue from APIGetQueue().
     mQueue = nullptr;
@@ -570,7 +589,8 @@ void DeviceBase::APISetLoggingCallback(wgpu::LoggingCallback callback, void* use
     // resetting) the resources pointed by such pointer may be freed. Flush all deferred
     // callback tasks to guarantee we are never going to use the previous callback after
     // this call.
-    mCallbackTaskManager->Flush();
+    FlushCallbackTaskQueue();
+    auto deviceLock(GetScopedLock());
     if (IsLost()) {
         return;
     }
@@ -584,7 +604,8 @@ void DeviceBase::APISetUncapturedErrorCallback(wgpu::ErrorCallback callback, voi
     // resetting) the resources pointed by such pointer may be freed. Flush all deferred
     // callback tasks to guarantee we are never going to use the previous callback after
     // this call.
-    mCallbackTaskManager->Flush();
+    FlushCallbackTaskQueue();
+    auto deviceLock(GetScopedLock());
     if (IsLost()) {
         return;
     }
@@ -598,7 +619,8 @@ void DeviceBase::APISetDeviceLostCallback(wgpu::DeviceLostCallback callback, voi
     // resetting) the resources pointed by such pointer may be freed. Flush all deferred
     // callback tasks to guarantee we are never going to use the previous callback after
     // this call.
-    mCallbackTaskManager->Flush();
+    FlushCallbackTaskQueue();
+    auto deviceLock(GetScopedLock());
     if (IsLost()) {
         return;
     }
@@ -851,6 +873,7 @@ Ref<RenderPipelineBase> DeviceBase::GetCachedRenderPipeline(
 
 Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
     Ref<ComputePipelineBase> computePipeline) {
+    ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex));
     auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get());
     if (inserted) {
         computePipeline->SetIsCachedReference();
@@ -862,6 +885,7 @@ Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
 
 Ref<RenderPipelineBase> DeviceBase::AddOrGetCachedRenderPipeline(
     Ref<RenderPipelineBase> renderPipeline) {
+    ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex));
     auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get());
     if (inserted) {
         renderPipeline->SetIsCachedReference();
@@ -1267,15 +1291,23 @@ bool DeviceBase::APITick() {
     // Tick may trigger callbacks which drop a ref to the device itself. Hold a Ref to ourselves
     // to avoid deleting |this| in the middle of this function call.
     Ref<DeviceBase> self(this);
-    if (ConsumedError(Tick())) {
-        mCallbackTaskManager->Flush();
-        return false;
+    bool tickError;
+    {
+        // Note: we cannot hold the lock when flushing the callbacks so have to limit the scope of
+        // the lock here.
+        auto deviceLock(GetScopedLock());
+        tickError = ConsumedError(Tick());
     }
 
     // We have to check callback tasks in every APITick because it is not related to any global
     // serials.
-    mCallbackTaskManager->Flush();
+    FlushCallbackTaskQueue();
 
+    if (tickError) {
+        return false;
+    }
+
+    auto deviceLock(GetScopedLock());
     // We don't throw an error when device is lost. This allows pending callbacks to be
     // executed even after the Device is lost/destroyed.
     if (IsLost()) {
@@ -1806,6 +1838,26 @@ void DeviceBase::ForceSetToggleForTesting(Toggle toggle, bool isEnabled) {
     mToggles.ForceSet(toggle, isEnabled);
 }
 
+void DeviceBase::FlushCallbackTaskQueue() {
+    // Callbacks might cause re-entrances. Mutex shouldn't be locked. So we expect there is no
+    // locked mutex before entering this method.
+    ASSERT(mMutex == nullptr || !mMutex->IsLockedByCurrentThread());
+
+    Ref<CallbackTaskManager> callbackTaskManager;
+
+    {
+        // This is a data race with the assignment to InstanceBase's callback queue manager in
+        // WillDropLastExternalRef(). Need to protect with a lock and keep the old
+        // mCallbackTaskManager alive.
+        // TODO(crbug.com/dawn/752): In future, all devices should use InstanceBase's callback queue
+        // manager from the start. So we won't need to care about data race at that point.
+        auto deviceLock(GetScopedLock());
+        callbackTaskManager = mCallbackTaskManager;
+    }
+
+    callbackTaskManager->Flush();
+}
+
 const CombinedLimits& DeviceBase::GetLimits() const {
     return mLimits;
 }
@@ -1836,7 +1888,15 @@ void DeviceBase::AddComputePipelineAsyncCallbackTask(
             // CreateComputePipelineAsyncTaskImpl::Run() when the front-end pipeline cache is
             // thread-safe.
             ASSERT(mPipeline != nullptr);
-            mPipeline = mPipeline->GetDevice()->AddOrGetCachedComputePipeline(mPipeline);
+            {
+                // This is called inside a callback, and no lock will be held by default so we have
+                // to lock now to protect the cache.
+                // Note: we don't lock inside AddOrGetCachedComputePipeline() to avoid deadlock
+                // because many places calling that method might already have the lock held. For
+                // example, APICreateComputePipeline()
+                auto deviceLock(mPipeline->GetDevice()->GetScopedLock());
+                mPipeline = mPipeline->GetDevice()->AddOrGetCachedComputePipeline(mPipeline);
+            }
 
             CreateComputePipelineAsyncCallbackTask::FinishImpl();
         }
@@ -1861,6 +1921,12 @@ void DeviceBase::AddRenderPipelineAsyncCallbackTask(Ref<RenderPipelineBase> pipe
             // CreateRenderPipelineAsyncTaskImpl::Run() when the front-end pipeline cache is
             // thread-safe.
             if (mPipeline.Get() != nullptr) {
+                // This is called inside a callback, and no lock will be held by default so we have
+                // to lock now to protect the cache.
+                // Note: we don't lock inside AddOrGetCachedRenderPipeline() to avoid deadlock
+                // because many places calling that method might already have the lock held. For
+                // example, APICreateRenderPipeline()
+                auto deviceLock(mPipeline->GetDevice()->GetScopedLock());
                 mPipeline = mPipeline->GetDevice()->AddOrGetCachedRenderPipeline(mPipeline);
             }
 
@@ -1974,6 +2040,14 @@ MaybeError DeviceBase::CopyFromStagingToTexture(BufferBase* source,
     return {};
 }
 
+Mutex::AutoLockAndHoldRef DeviceBase::GetScopedLockSafeForDelete() {
+    return Mutex::AutoLockAndHoldRef(mMutex);
+}
+
+Mutex::AutoLock DeviceBase::GetScopedLock() {
+    return Mutex::AutoLock(mMutex.Get());
+}
+
 IgnoreLazyClearCountScope::IgnoreLazyClearCountScope(DeviceBase* device)
     : mDevice(device), mLazyClearCountForTesting(device->mLazyClearCountForTesting) {}
 
diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h
index 2de70894c7..b47b98c761 100644
--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@@ -16,12 +16,12 @@
 #define SRC_DAWN_NATIVE_DEVICE_H_
 
 #include <memory>
-#include <mutex>
 #include <string>
 #include <unordered_set>
 #include <utility>
 #include <vector>
 
+#include "dawn/common/Mutex.h"
 #include "dawn/native/CacheKey.h"
 #include "dawn/native/Commands.h"
 #include "dawn/native/ComputePipeline.h"
@@ -424,6 +424,13 @@ class DeviceBase : public RefCountedWithExternalCount {
     // method makes them to be submitted as soon as possbile in next ticks.
     virtual void ForceEventualFlushOfCommands() = 0;
 
+    // It is guaranteed that the wrapped mutex will outlive the Device (if the Device is deleted
+    // before the AutoLockAndHoldRef).
+    [[nodiscard]] Mutex::AutoLockAndHoldRef GetScopedLockSafeForDelete();
+    // This lock won't guarantee the wrapped mutex will be alive if the Device is deleted before the
+    // AutoLock. It would crash if such thing happens.
+    [[nodiscard]] Mutex::AutoLock GetScopedLock();
+
     // In the 'Normal' mode, currently recorded commands in the backend normally will be actually
     // submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed
     // as late as possible, for example, until the client has explictly issued a submission.
@@ -480,6 +487,7 @@ class DeviceBase : public RefCountedWithExternalCount {
     virtual void SetLabelImpl();
 
     virtual MaybeError TickImpl() = 0;
+    void FlushCallbackTaskQueue();
 
     ResultOrError<Ref<BindGroupLayoutBase>> CreateEmptyBindGroupLayout();
 
@@ -595,6 +603,9 @@ class DeviceBase : public RefCountedWithExternalCount {
     std::unique_ptr<dawn::platform::WorkerTaskPool> mWorkerTaskPool;
     std::string mLabel;
     CacheKey mDeviceCacheKey;
+
+    // This pointer is non-null if Feature::ImplicitDeviceSynchronization is turned on.
+    Ref<Mutex> mMutex = nullptr;
 };
 
 ResultOrError<Ref<PipelineLayoutBase>> ValidateLayoutAndGetComputePipelineDescriptorWithDefaults(
diff --git a/src/dawn/native/EncodingContext.cpp b/src/dawn/native/EncodingContext.cpp
index 8628e282fe..0682a5a696 100644
--- a/src/dawn/native/EncodingContext.cpp
+++ b/src/dawn/native/EncodingContext.cpp
@@ -87,6 +87,9 @@ void EncodingContext::HandleError(std::unique_ptr<ErrorData> error) {
             mError = std::move(error);
         }
     } else {
+        // EncodingContext is unprotected from multiple threads by default, but this code will
+        // modify Device's internal states so we need to lock the device now.
+        auto deviceLock(mDevice->GetScopedLock());
         mDevice->HandleError(std::move(error));
     }
 }
diff --git a/src/dawn/native/Features.cpp b/src/dawn/native/Features.cpp
index f1098bc060..3d6cdce4bd 100644
--- a/src/dawn/native/Features.cpp
+++ b/src/dawn/native/Features.cpp
@@ -97,6 +97,11 @@ static constexpr FeatureEnumAndInfoList kFeatureNameAndInfoList = {{
       "https://dawn.googlesource.com/dawn/+/refs/heads/main/docs/dawn/features/"
       "dawn_native.md",
       FeatureInfo::FeatureState::Stable}},
+    {Feature::ImplicitDeviceSynchronization,
+     {"implicit-device-sync",
+      "Public API methods (except encoding) will have implicit device synchronization. So they "
+      "will be safe to be used on multiple threads.",
+      "https://bugs.chromium.org/p/dawn/issues/detail?id=1662", FeatureInfo::FeatureState::Stable}},
 }};
 
 Feature FromAPIFeature(wgpu::FeatureName feature) {
@@ -139,6 +144,8 @@ Feature FromAPIFeature(wgpu::FeatureName feature) {
             return Feature::RG11B10UfloatRenderable;
         case wgpu::FeatureName::BGRA8UnormStorage:
             return Feature::BGRA8UnormStorage;
+        case wgpu::FeatureName::ImplicitDeviceSynchronization:
+            return Feature::ImplicitDeviceSynchronization;
     }
     return Feature::InvalidEnum;
 }
@@ -177,6 +184,8 @@ wgpu::FeatureName ToAPIFeature(Feature feature) {
             return wgpu::FeatureName::RG11B10UfloatRenderable;
         case Feature::BGRA8UnormStorage:
             return wgpu::FeatureName::BGRA8UnormStorage;
+        case Feature::ImplicitDeviceSynchronization:
+            return wgpu::FeatureName::ImplicitDeviceSynchronization;
 
         case Feature::EnumCount:
             break;
diff --git a/src/dawn/native/Features.h b/src/dawn/native/Features.h
index 064d55d864..44d1e7ff23 100644
--- a/src/dawn/native/Features.h
+++ b/src/dawn/native/Features.h
@@ -45,6 +45,7 @@ enum class Feature {
     DawnInternalUsages,
     MultiPlanarFormats,
     DawnNative,
+    ImplicitDeviceSynchronization,
 
     EnumCount,
     InvalidEnum = EnumCount,
diff --git a/src/dawn/native/ObjectBase.cpp b/src/dawn/native/ObjectBase.cpp
index 6a2f151a16..be9331c97a 100644
--- a/src/dawn/native/ObjectBase.cpp
+++ b/src/dawn/native/ObjectBase.cpp
@@ -96,6 +96,11 @@ void ApiObjectBase::DeleteThis() {
     RefCounted::DeleteThis();
 }
 
+void ApiObjectBase::LockAndDeleteThis() {
+    auto deviceLock(GetDevice()->GetScopedLockSafeForDelete());
+    DeleteThis();
+}
+
 ApiObjectList* ApiObjectBase::GetObjectTrackingList() {
     ASSERT(GetDevice() != nullptr);
     return GetDevice()->GetObjectTrackingList(GetType());
diff --git a/src/dawn/native/ObjectBase.h b/src/dawn/native/ObjectBase.h
index c1268bb0c1..3bb77d20ee 100644
--- a/src/dawn/native/ObjectBase.h
+++ b/src/dawn/native/ObjectBase.h
@@ -107,6 +107,7 @@ class ApiObjectBase : public ObjectBase, public LinkNode<ApiObjectBase> {
     // and they should ensure that their overriding versions call this underlying version
     // somewhere.
     void DeleteThis() override;
+    void LockAndDeleteThis() override;
 
     // Returns the list where this object may be tracked for future destruction. This can be
     // overrided to create hierarchical object tracking ownership:
diff --git a/src/dawn/tests/BUILD.gn b/src/dawn/tests/BUILD.gn
index d0b70f5928..fff8168c6e 100644
--- a/src/dawn/tests/BUILD.gn
+++ b/src/dawn/tests/BUILD.gn
@@ -293,6 +293,7 @@ dawn_test("dawn_unittests") {
     "unittests/LimitsTests.cpp",
     "unittests/LinkedListTests.cpp",
     "unittests/MathTests.cpp",
+    "unittests/MutexTests.cpp",
     "unittests/NumericTests.cpp",
     "unittests/ObjectBaseTests.cpp",
     "unittests/PerStageTests.cpp",
diff --git a/src/dawn/tests/unittests/MutexTests.cpp b/src/dawn/tests/unittests/MutexTests.cpp
new file mode 100644
index 0000000000..6a4398b966
--- /dev/null
+++ b/src/dawn/tests/unittests/MutexTests.cpp
@@ -0,0 +1,97 @@
+// Copyright 2023 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dawn/common/Mutex.h"
+#include "gtest/gtest.h"
+
+namespace {
+#if defined(DAWN_ENABLE_ASSERTS)
+constexpr bool kAssertEnabled = true;
+#else
+constexpr bool kAssertEnabled = false;
+#endif
+}  // namespace
+
+class MutexTest : public ::testing::Test {
+  protected:
+    void SetUp() override {
+        // IsLockedByCurrentThread() requires DAWN_ENABLE_ASSERTS flag enabled.
+        if (!kAssertEnabled) {
+            GTEST_SKIP() << "DAWN_ENABLE_ASSERTS is not enabled";
+        }
+    }
+
+    dawn::Mutex mMutex;
+};
+
+// Simple Lock() then Unlock() test.
+TEST_F(MutexTest, SimpleLockUnlock) {
+    mMutex.Lock();
+    EXPECT_TRUE(mMutex.IsLockedByCurrentThread());
+    mMutex.Unlock();
+    EXPECT_FALSE(mMutex.IsLockedByCurrentThread());
+}
+
+// Test AutoLock automatically locks the mutex and unlocks it when out of scope.
+TEST_F(MutexTest, AutoLock) {
+    {
+        dawn::Mutex::AutoLock autoLock(&mMutex);
+        EXPECT_TRUE(mMutex.IsLockedByCurrentThread());
+    }
+    EXPECT_FALSE(mMutex.IsLockedByCurrentThread());
+}
+
+// Test AutoLockAndHoldRef will keep the mutex alive
+TEST_F(MutexTest, AutoLockAndHoldRef) {
+    auto* mutex = new dawn::Mutex();
+    EXPECT_EQ(mutex->GetRefCountForTesting(), 1u);
+    {
+        dawn::Mutex::AutoLockAndHoldRef autoLock(mutex);
+        EXPECT_TRUE(mutex->IsLockedByCurrentThread());
+        EXPECT_EQ(mutex->GetRefCountForTesting(), 2u);
+
+        mutex->Release();
+        EXPECT_EQ(mutex->GetRefCountForTesting(), 1u);
+    }
+}
+
+using MutexDeathTest = MutexTest;
+
+// Test that Unlock() call on unlocked mutex will cause assertion failure.
+TEST_F(MutexDeathTest, UnlockWhenNotLocked) {
+    ASSERT_DEATH({ mMutex.Unlock(); }, "");
+}
+
+// Double Lock() calls should be cause assertion failure
+TEST_F(MutexDeathTest, DoubleLockCalls) {
+    mMutex.Lock();
+    EXPECT_TRUE(mMutex.IsLockedByCurrentThread());
+    ASSERT_DEATH({ mMutex.Lock(); }, "");
+    mMutex.Unlock();
+}
+
+// Lock() then use AutoLock should cause assertion failure.
+TEST_F(MutexDeathTest, LockThenAutoLock) {
+    mMutex.Lock();
+    EXPECT_TRUE(mMutex.IsLockedByCurrentThread());
+    ASSERT_DEATH({ dawn::Mutex::AutoLock autoLock(&mMutex); }, "");
+    mMutex.Unlock();
+}
+
+// Use AutoLock then call Lock() should cause assertion failure.
+TEST_F(MutexDeathTest, AutoLockThenLock) {
+    dawn::Mutex::AutoLock autoLock(&mMutex);
+    EXPECT_TRUE(mMutex.IsLockedByCurrentThread());
+    ASSERT_DEATH({ mMutex.Lock(); }, "");
+}
diff --git a/src/dawn/wire/SupportedFeatures.cpp b/src/dawn/wire/SupportedFeatures.cpp
index d3b0849a71..c4d02c5f14 100644
--- a/src/dawn/wire/SupportedFeatures.cpp
+++ b/src/dawn/wire/SupportedFeatures.cpp
@@ -24,6 +24,7 @@ bool IsFeatureSupported(WGPUFeatureName feature) {
         case WGPUFeatureName_Force32:
         case WGPUFeatureName_DawnNative:
         case WGPUFeatureName_DawnShaderFloat16:  // Deprecated
+        case WGPUFeatureName_ImplicitDeviceSynchronization:
             return false;
         case WGPUFeatureName_Depth32FloatStencil8:
         case WGPUFeatureName_TimestampQuery: