From 3e7a114a6e3e2d9594d8ede45fb404339dc4cbfd Mon Sep 17 00:00:00 2001 From: Le Hoang Quyen Date: Wed, 12 Apr 2023 21:07:31 +0000 Subject: [PATCH] Add missing lock to APIBeginRender/ComputePass. Add tests to verify multithreading behaviors of encoding render/compute passes. Bug: dawn:1662 Change-Id: I9bc6a0dd5d94b53b59e7e49a4611d4d55cc36e60 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/126980 Reviewed-by: Austin Eng Commit-Queue: Quyen Le Kokoro: Kokoro --- src/dawn/native/CommandEncoder.cpp | 13 +- src/dawn/native/Device.cpp | 12 +- src/dawn/native/Device.h | 6 + src/dawn/tests/BUILD.gn | 1 + src/dawn/tests/end2end/MultithreadTests.cpp | 180 ++++++++++++++++++++ 5 files changed, 204 insertions(+), 8 deletions(-) create mode 100644 src/dawn/tests/end2end/MultithreadTests.cpp diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp index 0279bdc7af..8850157538 100644 --- a/src/dawn/native/CommandEncoder.cpp +++ b/src/dawn/native/CommandEncoder.cpp @@ -771,11 +771,15 @@ void CommandEncoder::TrackQueryAvailability(QuerySetBase* querySet, uint32_t que // Implementation of the API's command recording methods ComputePassEncoder* CommandEncoder::APIBeginComputePass(const ComputePassDescriptor* descriptor) { + // This function will create new object, need to lock the Device. + auto deviceLock(GetDevice()->GetScopedLock()); + return BeginComputePass(descriptor).Detach(); } Ref CommandEncoder::BeginComputePass(const ComputePassDescriptor* descriptor) { DeviceBase* device = GetDevice(); + ASSERT(device->IsLockedByCurrentThreadIfNeeded()); bool success = mEncodingContext.TryEncode( this, @@ -830,11 +834,15 @@ Ref CommandEncoder::BeginComputePass(const ComputePassDescri } RenderPassEncoder* CommandEncoder::APIBeginRenderPass(const RenderPassDescriptor* descriptor) { + // This function will create new object, need to lock the Device. + auto deviceLock(GetDevice()->GetScopedLock()); + return BeginRenderPass(descriptor).Detach(); } Ref CommandEncoder::BeginRenderPass(const RenderPassDescriptor* descriptor) { DeviceBase* device = GetDevice(); + ASSERT(device->IsLockedByCurrentThreadIfNeeded()); RenderPassResourceUsageTracker usageTracker; @@ -1047,13 +1055,14 @@ ResultOrError> CommandEncoder::ApplyRenderPassWorkarounds( descriptor.dimension = wgpu::TextureDimension::e2D; descriptor.mipLevelCount = 1; - // We are creating new resources. Need to lock the Device. + // We are creating new resources. Device must already be locked via + // APIBeginRenderPass -> ApplyRenderPassWorkarounds. // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at // Command Submit time, so the locking would be removed from here at that point. Ref temporaryResolveTexture; Ref temporaryResolveView; { - auto deviceLock(GetDevice()->GetScopedLock()); + ASSERT(device->IsLockedByCurrentThreadIfNeeded()); DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor)); diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp index 03c917be36..bb1cdbbe19 100644 --- a/src/dawn/native/Device.cpp +++ b/src/dawn/native/Device.cpp @@ -92,10 +92,6 @@ struct DeviceBase::DeprecationWarnings { }; namespace { -bool IsMutexLockedByCurrentThreadIfNeeded(const Ref& mutex) { - return mutex == nullptr || mutex->IsLockedByCurrentThread(); -} - struct LoggingCallbackTask : CallbackTask { public: LoggingCallbackTask() = delete; @@ -873,7 +869,7 @@ Ref DeviceBase::GetCachedRenderPipeline( Ref DeviceBase::AddOrGetCachedComputePipeline( Ref computePipeline) { - ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex)); + ASSERT(IsLockedByCurrentThreadIfNeeded()); auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get()); if (inserted) { computePipeline->SetIsCachedReference(); @@ -885,7 +881,7 @@ Ref DeviceBase::AddOrGetCachedComputePipeline( Ref DeviceBase::AddOrGetCachedRenderPipeline( Ref renderPipeline) { - ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex)); + ASSERT(IsLockedByCurrentThreadIfNeeded()); auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get()); if (inserted) { renderPipeline->SetIsCachedReference(); @@ -2048,6 +2044,10 @@ Mutex::AutoLock DeviceBase::GetScopedLock() { return Mutex::AutoLock(mMutex.Get()); } +bool DeviceBase::IsLockedByCurrentThreadIfNeeded() const { + return mMutex == nullptr || mMutex->IsLockedByCurrentThread(); +} + IgnoreLazyClearCountScope::IgnoreLazyClearCountScope(DeviceBase* device) : mDevice(device), mLazyClearCountForTesting(device->mLazyClearCountForTesting) {} diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h index 3fb9c29294..c3ff510e38 100644 --- a/src/dawn/native/Device.h +++ b/src/dawn/native/Device.h @@ -431,6 +431,12 @@ class DeviceBase : public RefCountedWithExternalCount { // AutoLock. It would crash if such thing happens. [[nodiscard]] Mutex::AutoLock GetScopedLock(); + // This method returns true if Feature::ImplicitDeviceSynchronization is turned on and the + // device is locked by current thread. This method is only enabled when DAWN_ENABLE_ASSERTS is + // turned on. Thus it should only be wrapped inside ASSERT() macro. i.e. + // ASSERT(device.IsLockedByCurrentThread()) + bool IsLockedByCurrentThreadIfNeeded() const; + // In the 'Normal' mode, currently recorded commands in the backend normally will be actually // submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed // as late as possible, for example, until the client has explictly issued a submission. diff --git a/src/dawn/tests/BUILD.gn b/src/dawn/tests/BUILD.gn index fff8168c6e..f0a29321c8 100644 --- a/src/dawn/tests/BUILD.gn +++ b/src/dawn/tests/BUILD.gn @@ -531,6 +531,7 @@ source_set("end2end_tests_sources") { "end2end/MemoryAllocationStressTests.cpp", "end2end/MultisampledRenderingTests.cpp", "end2end/MultisampledSamplingTests.cpp", + "end2end/MultithreadTests.cpp", "end2end/NonzeroBufferCreationTests.cpp", "end2end/NonzeroTextureCreationTests.cpp", "end2end/ObjectCachingTests.cpp", diff --git a/src/dawn/tests/end2end/MultithreadTests.cpp b/src/dawn/tests/end2end/MultithreadTests.cpp new file mode 100644 index 0000000000..fdd0fae178 --- /dev/null +++ b/src/dawn/tests/end2end/MultithreadTests.cpp @@ -0,0 +1,180 @@ +// Copyright 2023 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "dawn/common/Constants.h" +#include "dawn/common/Math.h" +#include "dawn/tests/DawnTest.h" +#include "dawn/utils/TestUtils.h" +#include "dawn/utils/TextureUtils.h" +#include "dawn/utils/WGPUHelpers.h" + +class MultithreadTests : public DawnTest { + protected: + std::vector GetRequiredFeatures() override { + std::vector features; + // TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet. + if (!UsesWire()) { + features.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization); + } + return features; + } + + void SetUp() override { + DawnTest::SetUp(); + // TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet. + DAWN_TEST_UNSUPPORTED_IF(UsesWire()); + + // TODO(crbug.com/dawn/1679): OpenGL/D3D11 backend doesn't support thread safe API yet. + DAWN_TEST_UNSUPPORTED_IF(IsOpenGL() || IsOpenGLES() || IsD3D11()); + } + + wgpu::Buffer CreateBuffer(uint32_t size, wgpu::BufferUsage usage) { + wgpu::BufferDescriptor descriptor; + descriptor.size = size; + descriptor.usage = usage; + return device.CreateBuffer(&descriptor); + } + + wgpu::Texture CreateTexture(uint32_t width, + uint32_t height, + wgpu::TextureFormat format, + wgpu::TextureUsage usage, + uint32_t mipLevelCount = 1, + uint32_t sampleCount = 1) { + wgpu::TextureDescriptor texDescriptor = {}; + texDescriptor.size = {width, height, 1}; + texDescriptor.format = format; + texDescriptor.usage = usage; + texDescriptor.mipLevelCount = mipLevelCount; + texDescriptor.sampleCount = sampleCount; + return device.CreateTexture(&texDescriptor); + } + + void RunInParallel(uint32_t numThreads, const std::function& workerFunc) { + std::vector> threads(numThreads); + + for (uint32_t i = 0; i < threads.size(); ++i) { + threads[i] = std::make_unique([i, workerFunc] { workerFunc(i); }); + } + + for (auto& thread : threads) { + thread->join(); + } + } +}; + +class MultithreadEncodingTests : public MultithreadTests {}; + +// Test that encoding render passes in parallel should work +TEST_P(MultithreadEncodingTests, RenderPassEncodersInParallel) { + constexpr uint32_t kRTSize = 16; + constexpr uint32_t kNumThreads = 10; + + wgpu::Texture msaaRenderTarget = + CreateTexture(kRTSize, kRTSize, wgpu::TextureFormat::RGBA8Unorm, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc, + /*mipLevelCount=*/1, /*sampleCount=*/4); + wgpu::TextureView msaaRenderTargetView = msaaRenderTarget.CreateView(); + + wgpu::Texture resolveTarget = + CreateTexture(kRTSize, kRTSize, wgpu::TextureFormat::RGBA8Unorm, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc); + wgpu::TextureView resolveTargetView = resolveTarget.CreateView(); + + std::vector commandBuffers(kNumThreads); + + RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + + // Clear the renderTarget to red. + utils::ComboRenderPassDescriptor renderPass({msaaRenderTargetView}); + renderPass.cColorAttachments[0].resolveTarget = resolveTargetView; + renderPass.cColorAttachments[0].clearValue = {1.0f, 0.0f, 0.0f, 1.0f}; + + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass); + pass.End(); + + commandBuffers[index] = encoder.Finish(); + }); + + // Verify that the command buffers executed correctly. + for (auto& commandBuffer : commandBuffers) { + queue.Submit(1, &commandBuffer); + + EXPECT_TEXTURE_EQ(utils::RGBA8::kRed, resolveTarget, {0, 0}); + EXPECT_TEXTURE_EQ(utils::RGBA8::kRed, resolveTarget, {kRTSize - 1, kRTSize - 1}); + } +} + +// Test that encoding compute passes in parallel should work +TEST_P(MultithreadEncodingTests, ComputePassEncodersInParallel) { + constexpr uint32_t kNumThreads = 10; + constexpr uint32_t kExpected = 0xFFFFFFFFu; + + wgpu::ShaderModule module = utils::CreateShaderModule(device, R"( + @group(0) @binding(0) var output : u32; + + @compute @workgroup_size(1, 1, 1) + fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3u) { + output = 0xFFFFFFFFu; + })"); + wgpu::ComputePipelineDescriptor csDesc; + csDesc.compute.module = module; + csDesc.compute.entryPoint = "main"; + auto pipeline = device.CreateComputePipeline(&csDesc); + + wgpu::Buffer dstBuffer = + CreateBuffer(sizeof(uint32_t), wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | + wgpu::BufferUsage::CopyDst); + wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), + { + {0, dstBuffer, 0, sizeof(uint32_t)}, + }); + + std::vector commandBuffers(kNumThreads); + + RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); + pass.SetPipeline(pipeline); + pass.SetBindGroup(0, bindGroup); + pass.DispatchWorkgroups(1, 1, 1); + pass.End(); + + commandBuffers[index] = encoder.Finish(); + }); + + // Verify that the command buffers executed correctly. + for (auto& commandBuffer : commandBuffers) { + constexpr uint32_t kSentinelData = 0; + queue.WriteBuffer(dstBuffer, 0, &kSentinelData, sizeof(kSentinelData)); + queue.Submit(1, &commandBuffer); + + EXPECT_BUFFER_U32_EQ(kExpected, dstBuffer, 0); + } +} + +DAWN_INSTANTIATE_TEST(MultithreadEncodingTests, + D3D11Backend(), + D3D12Backend(), + MetalBackend(), + OpenGLBackend(), + OpenGLESBackend(), + VulkanBackend());