diff --git a/dawn.json b/dawn.json index 268b83576c..6b4804f3c0 100644 --- a/dawn.json +++ b/dawn.json @@ -77,6 +77,7 @@ }, "adapter": { "category": "object", + "no autolock": true, "methods": [ { "name": "get instance", @@ -579,6 +580,7 @@ }, "command encoder": { "category": "object", + "no autolock": true, "methods": [ { "name": "finish", @@ -792,6 +794,7 @@ }, "compute pass encoder": { "category": "object", + "no autolock": true, "methods": [ { "name": "insert debug marker", @@ -1203,10 +1206,12 @@ }, { "name": "tick", + "no autolock": true, "tags": ["dawn"] }, { "name": "set uncaptured error callback", + "no autolock": true, "args": [ {"name": "callback", "type": "error callback"}, {"name": "userdata", "type": "void", "annotation": "*"} @@ -1214,6 +1219,7 @@ }, { "name": "set logging callback", + "no autolock": true, "tags": ["dawn"], "args": [ {"name": "callback", "type": "logging callback"}, @@ -1222,6 +1228,7 @@ }, { "name": "set device lost callback", + "no autolock": true, "args": [ {"name": "callback", "type": "device lost callback"}, {"name": "userdata", "type": "void", "annotation": "*"} @@ -1469,7 +1476,8 @@ {"value": 1003, "name": "dawn multi planar formats", "tags": ["dawn"]}, {"value": 1004, "name": "dawn native", "tags": ["dawn", "native"]}, {"value": 1005, "name": "chromium experimental dp4a", "tags": ["dawn"]}, - {"value": 1006, "name": "timestamp query inside passes", "tags": ["dawn"]} + {"value": 1006, "name": "timestamp query inside passes", "tags": ["dawn"]}, + {"value": 1007, "name": "implicit device synchronization", "tags": ["dawn", "native"]} ] }, "filter mode": { @@ -1527,6 +1535,7 @@ }, "instance": { "category": "object", + "no autolock": true, "methods": [ { "name": "create surface", @@ -1856,6 +1865,7 @@ "render bundle encoder": { "category": "object", + "no autolock": true, "methods": [ { "name": "set pipeline", @@ -2027,6 +2037,7 @@ }, "render pass encoder": { "category": "object", + "no autolock": true, "methods": [ { "name": "set pipeline", @@ -2482,6 +2493,7 @@ }, "surface": { "category": "object", + "no autolock": true, "methods": [ { "name": "get preferred format", diff --git a/docs/dawn/codegen.md b/docs/dawn/codegen.md index 359fddea7e..1b0868dd45 100644 --- a/docs/dawn/codegen.md +++ b/docs/dawn/codegen.md @@ -76,6 +76,8 @@ A **record** is a list of **record members**, each of which is a dictionary with - `"name"` a string - `"return_type"` (default to no return type) a string that's the name of the return type. - `"arguments"` a **record**, so an array of **record members** + - `"no autolock"`: a boolean flag (default is false) indicates that the method's generated code won't automatically do thread synchronization. This flag can only be true on device or device child objects currently. + - `"no autolock"`: a boolean flag (default is false) to indicate that the object's generated code won't automatically do thread synchronization. This will override individual method's `"no autolock"` flag. This flag can only be true on device or device child objects currently. **`"constant"`** - `"type"`: a string, the name of the base data type diff --git a/generator/dawn_json_generator.py b/generator/dawn_json_generator.py index 705948cd49..5b2d5f9450 100644 --- a/generator/dawn_json_generator.py +++ b/generator/dawn_json_generator.py @@ -189,8 +189,8 @@ class RecordMember: self.handle_type = handle_type -Method = namedtuple('Method', - ['name', 'return_type', 'arguments', 'json_data']) +Method = namedtuple( + 'Method', ['name', 'return_type', 'arguments', 'autolock', 'json_data']) class ObjectType(Type): @@ -337,11 +337,16 @@ def linked_record_members(json_data, types): def link_object(obj, types): + # Disable method's autolock if obj's "no autolock" = True + obj_scoped_autolock_enabled = not obj.json_data.get('no autolock', False) + def make_method(json_data): arguments = linked_record_members(json_data.get('args', []), types) + autolock_enabled = obj_scoped_autolock_enabled and not json_data.get( + 'no autolock', False) return Method(Name(json_data['name']), - types[json_data.get('returns', - 'void')], arguments, json_data) + types[json_data.get('returns', 'void')], arguments, + autolock_enabled, json_data) obj.methods = [make_method(m) for m in obj.json_data.get('methods', [])] obj.methods.sort(key=lambda method: method.name.canonical_case()) @@ -769,9 +774,9 @@ def as_formatType(typ): def c_methods(params, typ): return typ.methods + [ x for x in [ - Method(Name('reference'), params['types']['void'], [], + Method(Name('reference'), params['types']['void'], [], False, {'tags': ['dawn', 'emscripten']}), - Method(Name('release'), params['types']['void'], [], + Method(Name('release'), params['types']['void'], [], False, {'tags': ['dawn', 'emscripten']}), ] if item_is_enabled(params['enabled_tags'], x.json_data) and not item_is_disabled(params['disabled_tags'], x.json_data) diff --git a/generator/templates/dawn/native/ProcTable.cpp b/generator/templates/dawn/native/ProcTable.cpp index f9980ffc61..7b1922b1f5 100644 --- a/generator/templates/dawn/native/ProcTable.cpp +++ b/generator/templates/dawn/native/ProcTable.cpp @@ -56,6 +56,17 @@ namespace {{native_namespace}} { {% endif %} {%- endfor-%} + {% if method.autolock %} + {% if type.name.get() != "device" %} + auto device = self->GetDevice(); + {% else %} + auto device = self; + {% endif %} + auto deviceLock(device->GetScopedLock()); + {% else %} + // This method is specified to not use AutoLock in json script. + {% endif %} + {% if method.return_type.name.canonical_case() != "void" %} auto result = {%- endif %} diff --git a/src/dawn/common/BUILD.gn b/src/dawn/common/BUILD.gn index df20a6c498..808e8b7f4b 100644 --- a/src/dawn/common/BUILD.gn +++ b/src/dawn/common/BUILD.gn @@ -246,6 +246,8 @@ if (is_win || is_linux || is_chromeos || is_mac || is_fuchsia || is_android) { "Log.h", "Math.cpp", "Math.h", + "Mutex.cpp", + "Mutex.h", "NSRef.h", "NonCopyable.h", "Numeric.h", diff --git a/src/dawn/common/CMakeLists.txt b/src/dawn/common/CMakeLists.txt index 45de3dc0cb..c78d6b0c2e 100644 --- a/src/dawn/common/CMakeLists.txt +++ b/src/dawn/common/CMakeLists.txt @@ -52,6 +52,8 @@ target_sources(dawn_common PRIVATE "Log.h" "Math.cpp" "Math.h" + "Mutex.cpp" + "Mutex.h" "NSRef.h" "NonCopyable.h" "Numeric.h" diff --git a/src/dawn/common/Mutex.cpp b/src/dawn/common/Mutex.cpp new file mode 100644 index 0000000000..b8b891f93d --- /dev/null +++ b/src/dawn/common/Mutex.cpp @@ -0,0 +1,49 @@ +// Copyright 2023 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dawn/common/Mutex.h" + +namespace dawn { +Mutex::~Mutex() = default; + +void Mutex::Lock() { +#if defined(DAWN_ENABLE_ASSERTS) + auto currentThread = std::this_thread::get_id(); + ASSERT(mOwner.load(std::memory_order_acquire) != currentThread); +#endif // DAWN_ENABLE_ASSERTS + + mNativeMutex.lock(); + +#if defined(DAWN_ENABLE_ASSERTS) + mOwner.store(currentThread, std::memory_order_release); +#endif // DAWN_ENABLE_ASSERTS +} +void Mutex::Unlock() { +#if defined(DAWN_ENABLE_ASSERTS) + ASSERT(IsLockedByCurrentThread()); + mOwner.store(std::thread::id(), std::memory_order_release); +#endif // DAWN_ENABLE_ASSERTS + + mNativeMutex.unlock(); +} + +bool Mutex::IsLockedByCurrentThread() { +#if defined(DAWN_ENABLE_ASSERTS) + return mOwner.load(std::memory_order_acquire) == std::this_thread::get_id(); +#else + // This is not supported. + abort(); +#endif +} +} // namespace dawn diff --git a/src/dawn/common/Mutex.h b/src/dawn/common/Mutex.h new file mode 100644 index 0000000000..c5a3a8cbf7 --- /dev/null +++ b/src/dawn/common/Mutex.h @@ -0,0 +1,72 @@ +// Copyright 2023 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_DAWN_COMMON_MUTEX_H_ +#define SRC_DAWN_COMMON_MUTEX_H_ + +#include +#include +#include +#include + +#include "dawn/common/Assert.h" +#include "dawn/common/NonCopyable.h" +#include "dawn/common/RefCounted.h" + +namespace dawn { +class Mutex : public RefCounted, NonCopyable { + public: + template + struct AutoLockBase : NonMovable { + AutoLockBase() = delete; + explicit AutoLockBase(MutexRef mutex) : mMutex(std::move(mutex)) { + if (mMutex != nullptr) { + mMutex->Lock(); + } + } + ~AutoLockBase() { + if (mMutex != nullptr) { + mMutex->Unlock(); + } + } + + private: + MutexRef mMutex; + }; + + // This scoped lock won't keep the mutex alive. + using AutoLock = AutoLockBase; + // This scoped lock will keep the mutex alive until out of scope. + using AutoLockAndHoldRef = AutoLockBase>; + + ~Mutex() override; + + void Lock(); + void Unlock(); + + // This method is only enabled when DAWN_ENABLE_ASSERTS is turned on. + // Thus it should only be wrapped inside ASSERT() macro. + // i.e. ASSERT(mutex.IsLockedByCurrentThread()) + bool IsLockedByCurrentThread(); + + private: +#if defined(DAWN_ENABLE_ASSERTS) + std::atomic mOwner; +#endif // DAWN_ENABLE_ASSERTS + std::mutex mNativeMutex; +}; + +} // namespace dawn + +#endif diff --git a/src/dawn/common/RefCounted.cpp b/src/dawn/common/RefCounted.cpp index 14bd0b1df7..b5d244134a 100644 --- a/src/dawn/common/RefCounted.cpp +++ b/src/dawn/common/RefCounted.cpp @@ -15,6 +15,11 @@ #include "dawn/common/RefCounted.h" #include +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#include +#endif +#endif #include "dawn/common/Assert.h" @@ -68,6 +73,14 @@ bool RefCount::Decrement() { // Note that on ARM64 this will generate a `dmb ish` instruction which is a global // memory barrier, when an acquire load on mRefCount (using the `ldar` instruction) // should be enough and could end up being faster. + + // https://github.com/google/sanitizers/issues/1415 There is false positive bug in TSAN + // when using standalone fence. +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) + __tsan_acquire(&mRefCount); +#endif +#endif std::atomic_thread_fence(std::memory_order_acquire); return true; } @@ -95,6 +108,16 @@ void RefCounted::Release() { } } +void RefCounted::ReleaseAndLockBeforeDestroy() { + if (mRefCount.Decrement()) { + LockAndDeleteThis(); + } +} + void RefCounted::DeleteThis() { delete this; } + +void RefCounted::LockAndDeleteThis() { + DeleteThis(); +} diff --git a/src/dawn/common/RefCounted.h b/src/dawn/common/RefCounted.h index b5d74f70ea..839b47dc16 100644 --- a/src/dawn/common/RefCounted.h +++ b/src/dawn/common/RefCounted.h @@ -46,18 +46,25 @@ class RefCounted { uint64_t GetRefCountPayload() const; void Reference(); + // Release() is called by internal code, so it's assumed that there is already a thread + // synchronization in place for destruction. void Release(); void APIReference() { Reference(); } - void APIRelease() { Release(); } + // APIRelease() can be called without any synchronization guarantees so we need to use a Release + // method that will call LockAndDeleteThis() on destruction. + void APIRelease() { ReleaseAndLockBeforeDestroy(); } protected: virtual ~RefCounted(); - // A Derived class may override this if they require a custom deleter. - virtual void DeleteThis(); + void ReleaseAndLockBeforeDestroy(); + + // A Derived class may override these if they require a custom deleter. + virtual void DeleteThis(); + // This calls DeleteThis() by default. + virtual void LockAndDeleteThis(); - private: RefCount mRefCount; }; diff --git a/src/dawn/native/Adapter.cpp b/src/dawn/native/Adapter.cpp index 0a6d93eac1..a83d836a6b 100644 --- a/src/dawn/native/Adapter.cpp +++ b/src/dawn/native/Adapter.cpp @@ -42,6 +42,7 @@ MaybeError AdapterBase::Initialize() { EnableFeature(Feature::DawnNative); EnableFeature(Feature::DawnInternalUsages); + EnableFeature(Feature::ImplicitDeviceSynchronization); InitializeSupportedFeaturesImpl(); DAWN_TRY_CONTEXT( diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp index cc4c30e725..1a72c90de0 100644 --- a/src/dawn/native/Buffer.cpp +++ b/src/dawn/native/Buffer.cpp @@ -43,9 +43,14 @@ struct MapRequestTask : TrackTaskCallback { private: void FinishImpl() override { - ASSERT(mSerial != kMaxExecutionSerial); - TRACE_EVENT1(mPlatform, General, "Buffer::TaskInFlight::Finished", "serial", - uint64_t(mSerial)); + { + // This is called from a callback, and no lock will be held by default. Hence, we need + // to lock the mutex now because mSerial might be changed by another thread. + auto deviceLock(buffer->GetDevice()->GetScopedLock()); + ASSERT(mSerial != kMaxExecutionSerial); + TRACE_EVENT1(mPlatform, General, "Buffer::TaskInFlight::Finished", "serial", + uint64_t(mSerial)); + } buffer->CallbackOnMapRequestCompleted(id, WGPUBufferMapAsyncStatus_Success); } void HandleDeviceLossImpl() override { @@ -597,9 +602,14 @@ MaybeError BufferBase::ValidateUnmap() const { void BufferBase::CallbackOnMapRequestCompleted(MapRequestID mapID, WGPUBufferMapAsyncStatus status) { - if (mapID == mLastMapID && status == WGPUBufferMapAsyncStatus_Success && - mState == BufferState::PendingMap) { - mState = BufferState::Mapped; + { + // This is called from a callback, and no lock will be held by default. Hence, we need to + // lock the mutex now because this will modify the buffer's states. + auto deviceLock(GetDevice()->GetScopedLock()); + if (mapID == mLastMapID && status == WGPUBufferMapAsyncStatus_Success && + mState == BufferState::PendingMap) { + mState = BufferState::Mapped; + } } auto cb = PrepareMappingCallback(mapID, status); diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp index a696c41ee2..0279bdc7af 100644 --- a/src/dawn/native/CommandEncoder.cpp +++ b/src/dawn/native/CommandEncoder.cpp @@ -1047,14 +1047,21 @@ ResultOrError> CommandEncoder::ApplyRenderPassWorkarounds( descriptor.dimension = wgpu::TextureDimension::e2D; descriptor.mipLevelCount = 1; + // We are creating new resources. Need to lock the Device. + // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at + // Command Submit time, so the locking would be removed from here at that point. Ref temporaryResolveTexture; - DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor)); - - TextureViewDescriptor viewDescriptor = {}; Ref temporaryResolveView; - DAWN_TRY_ASSIGN( - temporaryResolveView, - device->CreateTextureView(temporaryResolveTexture.Get(), &viewDescriptor)); + { + auto deviceLock(GetDevice()->GetScopedLock()); + + DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor)); + + TextureViewDescriptor viewDescriptor = {}; + DAWN_TRY_ASSIGN( + temporaryResolveView, + device->CreateTextureView(temporaryResolveTexture.Get(), &viewDescriptor)); + } // Save the temporary and given render targets together for copying after // the render pass ends. @@ -1200,6 +1207,11 @@ void CommandEncoder::APICopyBufferToTexture(const ImageCopyBuffer* source, if (dst.aspect == Aspect::Depth && GetDevice()->IsToggleEnabled(Toggle::UseBlitForBufferToDepthTextureCopy)) { + // The below function might create new resources. Need to lock the Device. + // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at + // Command Submit time, so the locking would be removed from here at that point. + auto deviceLock(GetDevice()->GetScopedLock()); + DAWN_TRY_CONTEXT( BlitBufferToDepth(GetDevice(), this, source->buffer, srcLayout, dst, *copySize), "copying from %s to depth aspect of %s using blit workaround.", source->buffer, @@ -1207,6 +1219,11 @@ void CommandEncoder::APICopyBufferToTexture(const ImageCopyBuffer* source, return {}; } else if (dst.aspect == Aspect::Stencil && GetDevice()->IsToggleEnabled(Toggle::UseBlitForBufferToStencilTextureCopy)) { + // The below function might create new resources. Need to lock the Device. + // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at + // Command Submit time, so the locking would be removed from here at that point. + auto deviceLock(GetDevice()->GetScopedLock()); + DAWN_TRY_CONTEXT(BlitBufferToStencil(GetDevice(), this, source->buffer, srcLayout, dst, *copySize), "copying from %s to stencil aspect of %s using blit workaround.", @@ -1375,6 +1392,11 @@ void CommandEncoder::APICopyTextureToTextureHelper(const ImageCopyTexture* sourc // Use a blit to copy the depth aspect. if (blitDepth) { + // This function might create new resources. Need to lock the Device. + // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at + // Command Submit time, so the locking would be removed from here at that point. + auto deviceLock(GetDevice()->GetScopedLock()); + DAWN_TRY_CONTEXT(BlitDepthToDepth(GetDevice(), this, src, dst, *copySize), "copying depth aspect from %s to %s using blit workaround.", source->texture, destination->texture); @@ -1586,6 +1608,9 @@ void CommandEncoder::APIWriteTimestamp(QuerySetBase* querySet, uint32_t queryInd } CommandBufferBase* CommandEncoder::APIFinish(const CommandBufferDescriptor* descriptor) { + // This function will create new object, need to lock the Device. + auto deviceLock(GetDevice()->GetScopedLock()); + Ref commandBuffer; if (GetDevice()->ConsumedError(Finish(descriptor), &commandBuffer)) { return CommandBufferBase::MakeError(GetDevice()); diff --git a/src/dawn/native/ComputePassEncoder.cpp b/src/dawn/native/ComputePassEncoder.cpp index 63505c7a44..7fbf82b1c3 100644 --- a/src/dawn/native/ComputePassEncoder.cpp +++ b/src/dawn/native/ComputePassEncoder.cpp @@ -238,6 +238,10 @@ ResultOrError, uint64_t>> ComputePassEncoder::TransformIndirectDispatchBuffer(Ref indirectBuffer, uint64_t indirectOffset) { DeviceBase* device = GetDevice(); + // This function creates new resources, need to lock the Device. + // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at Command Submit + // time, so the locking would be removed from here at that point. + auto deviceLock(GetDevice()->GetScopedLock()); const bool shouldDuplicateNumWorkgroups = device->ShouldDuplicateNumWorkgroupsForDispatchIndirect( diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp index 0079baa3c7..047131a9e5 100644 --- a/src/dawn/native/Device.cpp +++ b/src/dawn/native/Device.cpp @@ -92,6 +92,10 @@ struct DeviceBase::DeprecationWarnings { }; namespace { +bool IsMutexLockedByCurrentThreadIfNeeded(const Ref& mutex) { + return mutex == nullptr || mutex->IsLockedByCurrentThread(); +} + struct LoggingCallbackTask : CallbackTask { public: LoggingCallbackTask() = delete; @@ -277,6 +281,13 @@ MaybeError DeviceBase::Initialize(Ref defaultQueue) { CreateShaderModule(&descriptor)); } + if (HasFeature(Feature::ImplicitDeviceSynchronization)) { + mMutex = AcquireRef(new Mutex); + } else { + mMutex = nullptr; + } + + // mAdapter is not set for mock test devices. // TODO(crbug.com/dawn/1702): using a mock adapter could avoid the null checking. if (mAdapter != nullptr) { mAdapter->GetInstance()->AddDevice(this); @@ -286,30 +297,38 @@ MaybeError DeviceBase::Initialize(Ref defaultQueue) { } void DeviceBase::WillDropLastExternalRef() { - // DeviceBase uses RefCountedWithExternalCount to break refcycles. - // - // DeviceBase holds multiple Refs to various API objects (pipelines, buffers, etc.) which are - // used to implement various device-level facilities. These objects are cached on the device, - // so we want to keep them around instead of making transient allocations. However, many of - // the objects also hold a Ref back to their parent device. - // - // In order to break this cycle and prevent leaks, when the application drops the last external - // ref and WillDropLastExternalRef is called, the device clears out any member refs to API - // objects that hold back-refs to the device - thus breaking any reference cycles. - // - // Currently, this is done by calling Destroy on the device to cease all in-flight work and - // drop references to internal objects. We may want to lift this in the future, but it would - // make things more complex because there might be pending tasks which hold a ref back to the - // device - either directly or indirectly. We would need to ensure those tasks don't create new - // reference cycles, and we would need to continuously try draining the pending tasks to clear - // out all remaining refs. - Destroy(); + { + // This will be invoked by API side, so we need to lock. + // Note: we cannot hold the lock when flushing the callbacks so have to limit the scope of + // the lock. + auto deviceLock(GetScopedLock()); + + // DeviceBase uses RefCountedWithExternalCount to break refcycles. + // + // DeviceBase holds multiple Refs to various API objects (pipelines, buffers, etc.) which + // are used to implement various device-level facilities. These objects are cached on the + // device, so we want to keep them around instead of making transient allocations. However, + // many of the objects also hold a Ref back to their parent device. + // + // In order to break this cycle and prevent leaks, when the application drops the last + // external ref and WillDropLastExternalRef is called, the device clears out any member refs + // to API objects that hold back-refs to the device - thus breaking any reference cycles. + // + // Currently, this is done by calling Destroy on the device to cease all in-flight work and + // drop references to internal objects. We may want to lift this in the future, but it would + // make things more complex because there might be pending tasks which hold a ref back to + // the device - either directly or indirectly. We would need to ensure those tasks don't + // create new reference cycles, and we would need to continuously try draining the pending + // tasks to clear out all remaining refs. + Destroy(); + } // Flush last remaining callback tasks. do { - mCallbackTaskManager->Flush(); + FlushCallbackTaskQueue(); } while (!mCallbackTaskManager->IsEmpty()); + auto deviceLock(GetScopedLock()); // Drop te device's reference to the queue. Because the application dropped the last external // references, they can no longer get the queue from APIGetQueue(). mQueue = nullptr; @@ -570,7 +589,8 @@ void DeviceBase::APISetLoggingCallback(wgpu::LoggingCallback callback, void* use // resetting) the resources pointed by such pointer may be freed. Flush all deferred // callback tasks to guarantee we are never going to use the previous callback after // this call. - mCallbackTaskManager->Flush(); + FlushCallbackTaskQueue(); + auto deviceLock(GetScopedLock()); if (IsLost()) { return; } @@ -584,7 +604,8 @@ void DeviceBase::APISetUncapturedErrorCallback(wgpu::ErrorCallback callback, voi // resetting) the resources pointed by such pointer may be freed. Flush all deferred // callback tasks to guarantee we are never going to use the previous callback after // this call. - mCallbackTaskManager->Flush(); + FlushCallbackTaskQueue(); + auto deviceLock(GetScopedLock()); if (IsLost()) { return; } @@ -598,7 +619,8 @@ void DeviceBase::APISetDeviceLostCallback(wgpu::DeviceLostCallback callback, voi // resetting) the resources pointed by such pointer may be freed. Flush all deferred // callback tasks to guarantee we are never going to use the previous callback after // this call. - mCallbackTaskManager->Flush(); + FlushCallbackTaskQueue(); + auto deviceLock(GetScopedLock()); if (IsLost()) { return; } @@ -851,6 +873,7 @@ Ref DeviceBase::GetCachedRenderPipeline( Ref DeviceBase::AddOrGetCachedComputePipeline( Ref computePipeline) { + ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex)); auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get()); if (inserted) { computePipeline->SetIsCachedReference(); @@ -862,6 +885,7 @@ Ref DeviceBase::AddOrGetCachedComputePipeline( Ref DeviceBase::AddOrGetCachedRenderPipeline( Ref renderPipeline) { + ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex)); auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get()); if (inserted) { renderPipeline->SetIsCachedReference(); @@ -1267,15 +1291,23 @@ bool DeviceBase::APITick() { // Tick may trigger callbacks which drop a ref to the device itself. Hold a Ref to ourselves // to avoid deleting |this| in the middle of this function call. Ref self(this); - if (ConsumedError(Tick())) { - mCallbackTaskManager->Flush(); - return false; + bool tickError; + { + // Note: we cannot hold the lock when flushing the callbacks so have to limit the scope of + // the lock here. + auto deviceLock(GetScopedLock()); + tickError = ConsumedError(Tick()); } // We have to check callback tasks in every APITick because it is not related to any global // serials. - mCallbackTaskManager->Flush(); + FlushCallbackTaskQueue(); + if (tickError) { + return false; + } + + auto deviceLock(GetScopedLock()); // We don't throw an error when device is lost. This allows pending callbacks to be // executed even after the Device is lost/destroyed. if (IsLost()) { @@ -1806,6 +1838,26 @@ void DeviceBase::ForceSetToggleForTesting(Toggle toggle, bool isEnabled) { mToggles.ForceSet(toggle, isEnabled); } +void DeviceBase::FlushCallbackTaskQueue() { + // Callbacks might cause re-entrances. Mutex shouldn't be locked. So we expect there is no + // locked mutex before entering this method. + ASSERT(mMutex == nullptr || !mMutex->IsLockedByCurrentThread()); + + Ref callbackTaskManager; + + { + // This is a data race with the assignment to InstanceBase's callback queue manager in + // WillDropLastExternalRef(). Need to protect with a lock and keep the old + // mCallbackTaskManager alive. + // TODO(crbug.com/dawn/752): In future, all devices should use InstanceBase's callback queue + // manager from the start. So we won't need to care about data race at that point. + auto deviceLock(GetScopedLock()); + callbackTaskManager = mCallbackTaskManager; + } + + callbackTaskManager->Flush(); +} + const CombinedLimits& DeviceBase::GetLimits() const { return mLimits; } @@ -1836,7 +1888,15 @@ void DeviceBase::AddComputePipelineAsyncCallbackTask( // CreateComputePipelineAsyncTaskImpl::Run() when the front-end pipeline cache is // thread-safe. ASSERT(mPipeline != nullptr); - mPipeline = mPipeline->GetDevice()->AddOrGetCachedComputePipeline(mPipeline); + { + // This is called inside a callback, and no lock will be held by default so we have + // to lock now to protect the cache. + // Note: we don't lock inside AddOrGetCachedComputePipeline() to avoid deadlock + // because many places calling that method might already have the lock held. For + // example, APICreateComputePipeline() + auto deviceLock(mPipeline->GetDevice()->GetScopedLock()); + mPipeline = mPipeline->GetDevice()->AddOrGetCachedComputePipeline(mPipeline); + } CreateComputePipelineAsyncCallbackTask::FinishImpl(); } @@ -1861,6 +1921,12 @@ void DeviceBase::AddRenderPipelineAsyncCallbackTask(Ref pipe // CreateRenderPipelineAsyncTaskImpl::Run() when the front-end pipeline cache is // thread-safe. if (mPipeline.Get() != nullptr) { + // This is called inside a callback, and no lock will be held by default so we have + // to lock now to protect the cache. + // Note: we don't lock inside AddOrGetCachedRenderPipeline() to avoid deadlock + // because many places calling that method might already have the lock held. For + // example, APICreateRenderPipeline() + auto deviceLock(mPipeline->GetDevice()->GetScopedLock()); mPipeline = mPipeline->GetDevice()->AddOrGetCachedRenderPipeline(mPipeline); } @@ -1974,6 +2040,14 @@ MaybeError DeviceBase::CopyFromStagingToTexture(BufferBase* source, return {}; } +Mutex::AutoLockAndHoldRef DeviceBase::GetScopedLockSafeForDelete() { + return Mutex::AutoLockAndHoldRef(mMutex); +} + +Mutex::AutoLock DeviceBase::GetScopedLock() { + return Mutex::AutoLock(mMutex.Get()); +} + IgnoreLazyClearCountScope::IgnoreLazyClearCountScope(DeviceBase* device) : mDevice(device), mLazyClearCountForTesting(device->mLazyClearCountForTesting) {} diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h index 2de70894c7..b47b98c761 100644 --- a/src/dawn/native/Device.h +++ b/src/dawn/native/Device.h @@ -16,12 +16,12 @@ #define SRC_DAWN_NATIVE_DEVICE_H_ #include -#include #include #include #include #include +#include "dawn/common/Mutex.h" #include "dawn/native/CacheKey.h" #include "dawn/native/Commands.h" #include "dawn/native/ComputePipeline.h" @@ -424,6 +424,13 @@ class DeviceBase : public RefCountedWithExternalCount { // method makes them to be submitted as soon as possbile in next ticks. virtual void ForceEventualFlushOfCommands() = 0; + // It is guaranteed that the wrapped mutex will outlive the Device (if the Device is deleted + // before the AutoLockAndHoldRef). + [[nodiscard]] Mutex::AutoLockAndHoldRef GetScopedLockSafeForDelete(); + // This lock won't guarantee the wrapped mutex will be alive if the Device is deleted before the + // AutoLock. It would crash if such thing happens. + [[nodiscard]] Mutex::AutoLock GetScopedLock(); + // In the 'Normal' mode, currently recorded commands in the backend normally will be actually // submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed // as late as possible, for example, until the client has explictly issued a submission. @@ -480,6 +487,7 @@ class DeviceBase : public RefCountedWithExternalCount { virtual void SetLabelImpl(); virtual MaybeError TickImpl() = 0; + void FlushCallbackTaskQueue(); ResultOrError> CreateEmptyBindGroupLayout(); @@ -595,6 +603,9 @@ class DeviceBase : public RefCountedWithExternalCount { std::unique_ptr mWorkerTaskPool; std::string mLabel; CacheKey mDeviceCacheKey; + + // This pointer is non-null if Feature::ImplicitDeviceSynchronization is turned on. + Ref mMutex = nullptr; }; ResultOrError> ValidateLayoutAndGetComputePipelineDescriptorWithDefaults( diff --git a/src/dawn/native/EncodingContext.cpp b/src/dawn/native/EncodingContext.cpp index 8628e282fe..0682a5a696 100644 --- a/src/dawn/native/EncodingContext.cpp +++ b/src/dawn/native/EncodingContext.cpp @@ -87,6 +87,9 @@ void EncodingContext::HandleError(std::unique_ptr error) { mError = std::move(error); } } else { + // EncodingContext is unprotected from multiple threads by default, but this code will + // modify Device's internal states so we need to lock the device now. + auto deviceLock(mDevice->GetScopedLock()); mDevice->HandleError(std::move(error)); } } diff --git a/src/dawn/native/Features.cpp b/src/dawn/native/Features.cpp index f1098bc060..3d6cdce4bd 100644 --- a/src/dawn/native/Features.cpp +++ b/src/dawn/native/Features.cpp @@ -97,6 +97,11 @@ static constexpr FeatureEnumAndInfoList kFeatureNameAndInfoList = {{ "https://dawn.googlesource.com/dawn/+/refs/heads/main/docs/dawn/features/" "dawn_native.md", FeatureInfo::FeatureState::Stable}}, + {Feature::ImplicitDeviceSynchronization, + {"implicit-device-sync", + "Public API methods (except encoding) will have implicit device synchronization. So they " + "will be safe to be used on multiple threads.", + "https://bugs.chromium.org/p/dawn/issues/detail?id=1662", FeatureInfo::FeatureState::Stable}}, }}; Feature FromAPIFeature(wgpu::FeatureName feature) { @@ -139,6 +144,8 @@ Feature FromAPIFeature(wgpu::FeatureName feature) { return Feature::RG11B10UfloatRenderable; case wgpu::FeatureName::BGRA8UnormStorage: return Feature::BGRA8UnormStorage; + case wgpu::FeatureName::ImplicitDeviceSynchronization: + return Feature::ImplicitDeviceSynchronization; } return Feature::InvalidEnum; } @@ -177,6 +184,8 @@ wgpu::FeatureName ToAPIFeature(Feature feature) { return wgpu::FeatureName::RG11B10UfloatRenderable; case Feature::BGRA8UnormStorage: return wgpu::FeatureName::BGRA8UnormStorage; + case Feature::ImplicitDeviceSynchronization: + return wgpu::FeatureName::ImplicitDeviceSynchronization; case Feature::EnumCount: break; diff --git a/src/dawn/native/Features.h b/src/dawn/native/Features.h index 064d55d864..44d1e7ff23 100644 --- a/src/dawn/native/Features.h +++ b/src/dawn/native/Features.h @@ -45,6 +45,7 @@ enum class Feature { DawnInternalUsages, MultiPlanarFormats, DawnNative, + ImplicitDeviceSynchronization, EnumCount, InvalidEnum = EnumCount, diff --git a/src/dawn/native/ObjectBase.cpp b/src/dawn/native/ObjectBase.cpp index 6a2f151a16..be9331c97a 100644 --- a/src/dawn/native/ObjectBase.cpp +++ b/src/dawn/native/ObjectBase.cpp @@ -96,6 +96,11 @@ void ApiObjectBase::DeleteThis() { RefCounted::DeleteThis(); } +void ApiObjectBase::LockAndDeleteThis() { + auto deviceLock(GetDevice()->GetScopedLockSafeForDelete()); + DeleteThis(); +} + ApiObjectList* ApiObjectBase::GetObjectTrackingList() { ASSERT(GetDevice() != nullptr); return GetDevice()->GetObjectTrackingList(GetType()); diff --git a/src/dawn/native/ObjectBase.h b/src/dawn/native/ObjectBase.h index c1268bb0c1..3bb77d20ee 100644 --- a/src/dawn/native/ObjectBase.h +++ b/src/dawn/native/ObjectBase.h @@ -107,6 +107,7 @@ class ApiObjectBase : public ObjectBase, public LinkNode { // and they should ensure that their overriding versions call this underlying version // somewhere. void DeleteThis() override; + void LockAndDeleteThis() override; // Returns the list where this object may be tracked for future destruction. This can be // overrided to create hierarchical object tracking ownership: diff --git a/src/dawn/tests/BUILD.gn b/src/dawn/tests/BUILD.gn index d0b70f5928..fff8168c6e 100644 --- a/src/dawn/tests/BUILD.gn +++ b/src/dawn/tests/BUILD.gn @@ -293,6 +293,7 @@ dawn_test("dawn_unittests") { "unittests/LimitsTests.cpp", "unittests/LinkedListTests.cpp", "unittests/MathTests.cpp", + "unittests/MutexTests.cpp", "unittests/NumericTests.cpp", "unittests/ObjectBaseTests.cpp", "unittests/PerStageTests.cpp", diff --git a/src/dawn/tests/unittests/MutexTests.cpp b/src/dawn/tests/unittests/MutexTests.cpp new file mode 100644 index 0000000000..6a4398b966 --- /dev/null +++ b/src/dawn/tests/unittests/MutexTests.cpp @@ -0,0 +1,97 @@ +// Copyright 2023 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dawn/common/Mutex.h" +#include "gtest/gtest.h" + +namespace { +#if defined(DAWN_ENABLE_ASSERTS) +constexpr bool kAssertEnabled = true; +#else +constexpr bool kAssertEnabled = false; +#endif +} // namespace + +class MutexTest : public ::testing::Test { + protected: + void SetUp() override { + // IsLockedByCurrentThread() requires DAWN_ENABLE_ASSERTS flag enabled. + if (!kAssertEnabled) { + GTEST_SKIP() << "DAWN_ENABLE_ASSERTS is not enabled"; + } + } + + dawn::Mutex mMutex; +}; + +// Simple Lock() then Unlock() test. +TEST_F(MutexTest, SimpleLockUnlock) { + mMutex.Lock(); + EXPECT_TRUE(mMutex.IsLockedByCurrentThread()); + mMutex.Unlock(); + EXPECT_FALSE(mMutex.IsLockedByCurrentThread()); +} + +// Test AutoLock automatically locks the mutex and unlocks it when out of scope. +TEST_F(MutexTest, AutoLock) { + { + dawn::Mutex::AutoLock autoLock(&mMutex); + EXPECT_TRUE(mMutex.IsLockedByCurrentThread()); + } + EXPECT_FALSE(mMutex.IsLockedByCurrentThread()); +} + +// Test AutoLockAndHoldRef will keep the mutex alive +TEST_F(MutexTest, AutoLockAndHoldRef) { + auto* mutex = new dawn::Mutex(); + EXPECT_EQ(mutex->GetRefCountForTesting(), 1u); + { + dawn::Mutex::AutoLockAndHoldRef autoLock(mutex); + EXPECT_TRUE(mutex->IsLockedByCurrentThread()); + EXPECT_EQ(mutex->GetRefCountForTesting(), 2u); + + mutex->Release(); + EXPECT_EQ(mutex->GetRefCountForTesting(), 1u); + } +} + +using MutexDeathTest = MutexTest; + +// Test that Unlock() call on unlocked mutex will cause assertion failure. +TEST_F(MutexDeathTest, UnlockWhenNotLocked) { + ASSERT_DEATH({ mMutex.Unlock(); }, ""); +} + +// Double Lock() calls should be cause assertion failure +TEST_F(MutexDeathTest, DoubleLockCalls) { + mMutex.Lock(); + EXPECT_TRUE(mMutex.IsLockedByCurrentThread()); + ASSERT_DEATH({ mMutex.Lock(); }, ""); + mMutex.Unlock(); +} + +// Lock() then use AutoLock should cause assertion failure. +TEST_F(MutexDeathTest, LockThenAutoLock) { + mMutex.Lock(); + EXPECT_TRUE(mMutex.IsLockedByCurrentThread()); + ASSERT_DEATH({ dawn::Mutex::AutoLock autoLock(&mMutex); }, ""); + mMutex.Unlock(); +} + +// Use AutoLock then call Lock() should cause assertion failure. +TEST_F(MutexDeathTest, AutoLockThenLock) { + dawn::Mutex::AutoLock autoLock(&mMutex); + EXPECT_TRUE(mMutex.IsLockedByCurrentThread()); + ASSERT_DEATH({ mMutex.Lock(); }, ""); +} diff --git a/src/dawn/wire/SupportedFeatures.cpp b/src/dawn/wire/SupportedFeatures.cpp index d3b0849a71..c4d02c5f14 100644 --- a/src/dawn/wire/SupportedFeatures.cpp +++ b/src/dawn/wire/SupportedFeatures.cpp @@ -24,6 +24,7 @@ bool IsFeatureSupported(WGPUFeatureName feature) { case WGPUFeatureName_Force32: case WGPUFeatureName_DawnNative: case WGPUFeatureName_DawnShaderFloat16: // Deprecated + case WGPUFeatureName_ImplicitDeviceSynchronization: return false; case WGPUFeatureName_Depth32FloatStencil8: case WGPUFeatureName_TimestampQuery: