Add missing lock to APIBeginRender/ComputePass.
Add tests to verify multithreading behaviors of encoding render/compute passes. Bug: dawn:1662 Change-Id: I9bc6a0dd5d94b53b59e7e49a4611d4d55cc36e60 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/126980 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Quyen Le <lehoangquyen@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
parent
3dba94c1d4
commit
3e7a114a6e
|
@ -771,11 +771,15 @@ void CommandEncoder::TrackQueryAvailability(QuerySetBase* querySet, uint32_t que
|
|||
// Implementation of the API's command recording methods
|
||||
|
||||
ComputePassEncoder* CommandEncoder::APIBeginComputePass(const ComputePassDescriptor* descriptor) {
|
||||
// This function will create new object, need to lock the Device.
|
||||
auto deviceLock(GetDevice()->GetScopedLock());
|
||||
|
||||
return BeginComputePass(descriptor).Detach();
|
||||
}
|
||||
|
||||
Ref<ComputePassEncoder> CommandEncoder::BeginComputePass(const ComputePassDescriptor* descriptor) {
|
||||
DeviceBase* device = GetDevice();
|
||||
ASSERT(device->IsLockedByCurrentThreadIfNeeded());
|
||||
|
||||
bool success = mEncodingContext.TryEncode(
|
||||
this,
|
||||
|
@ -830,11 +834,15 @@ Ref<ComputePassEncoder> CommandEncoder::BeginComputePass(const ComputePassDescri
|
|||
}
|
||||
|
||||
RenderPassEncoder* CommandEncoder::APIBeginRenderPass(const RenderPassDescriptor* descriptor) {
|
||||
// This function will create new object, need to lock the Device.
|
||||
auto deviceLock(GetDevice()->GetScopedLock());
|
||||
|
||||
return BeginRenderPass(descriptor).Detach();
|
||||
}
|
||||
|
||||
Ref<RenderPassEncoder> CommandEncoder::BeginRenderPass(const RenderPassDescriptor* descriptor) {
|
||||
DeviceBase* device = GetDevice();
|
||||
ASSERT(device->IsLockedByCurrentThreadIfNeeded());
|
||||
|
||||
RenderPassResourceUsageTracker usageTracker;
|
||||
|
||||
|
@ -1047,13 +1055,14 @@ ResultOrError<std::function<void()>> CommandEncoder::ApplyRenderPassWorkarounds(
|
|||
descriptor.dimension = wgpu::TextureDimension::e2D;
|
||||
descriptor.mipLevelCount = 1;
|
||||
|
||||
// We are creating new resources. Need to lock the Device.
|
||||
// We are creating new resources. Device must already be locked via
|
||||
// APIBeginRenderPass -> ApplyRenderPassWorkarounds.
|
||||
// TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
|
||||
// Command Submit time, so the locking would be removed from here at that point.
|
||||
Ref<TextureBase> temporaryResolveTexture;
|
||||
Ref<TextureViewBase> temporaryResolveView;
|
||||
{
|
||||
auto deviceLock(GetDevice()->GetScopedLock());
|
||||
ASSERT(device->IsLockedByCurrentThreadIfNeeded());
|
||||
|
||||
DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor));
|
||||
|
||||
|
|
|
@ -92,10 +92,6 @@ struct DeviceBase::DeprecationWarnings {
|
|||
};
|
||||
|
||||
namespace {
|
||||
bool IsMutexLockedByCurrentThreadIfNeeded(const Ref<Mutex>& mutex) {
|
||||
return mutex == nullptr || mutex->IsLockedByCurrentThread();
|
||||
}
|
||||
|
||||
struct LoggingCallbackTask : CallbackTask {
|
||||
public:
|
||||
LoggingCallbackTask() = delete;
|
||||
|
@ -873,7 +869,7 @@ Ref<RenderPipelineBase> DeviceBase::GetCachedRenderPipeline(
|
|||
|
||||
Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
|
||||
Ref<ComputePipelineBase> computePipeline) {
|
||||
ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex));
|
||||
ASSERT(IsLockedByCurrentThreadIfNeeded());
|
||||
auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get());
|
||||
if (inserted) {
|
||||
computePipeline->SetIsCachedReference();
|
||||
|
@ -885,7 +881,7 @@ Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
|
|||
|
||||
Ref<RenderPipelineBase> DeviceBase::AddOrGetCachedRenderPipeline(
|
||||
Ref<RenderPipelineBase> renderPipeline) {
|
||||
ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex));
|
||||
ASSERT(IsLockedByCurrentThreadIfNeeded());
|
||||
auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get());
|
||||
if (inserted) {
|
||||
renderPipeline->SetIsCachedReference();
|
||||
|
@ -2048,6 +2044,10 @@ Mutex::AutoLock DeviceBase::GetScopedLock() {
|
|||
return Mutex::AutoLock(mMutex.Get());
|
||||
}
|
||||
|
||||
bool DeviceBase::IsLockedByCurrentThreadIfNeeded() const {
|
||||
return mMutex == nullptr || mMutex->IsLockedByCurrentThread();
|
||||
}
|
||||
|
||||
IgnoreLazyClearCountScope::IgnoreLazyClearCountScope(DeviceBase* device)
|
||||
: mDevice(device), mLazyClearCountForTesting(device->mLazyClearCountForTesting) {}
|
||||
|
||||
|
|
|
@ -431,6 +431,12 @@ class DeviceBase : public RefCountedWithExternalCount {
|
|||
// AutoLock. It would crash if such thing happens.
|
||||
[[nodiscard]] Mutex::AutoLock GetScopedLock();
|
||||
|
||||
// This method returns true if Feature::ImplicitDeviceSynchronization is turned on and the
|
||||
// device is locked by current thread. This method is only enabled when DAWN_ENABLE_ASSERTS is
|
||||
// turned on. Thus it should only be wrapped inside ASSERT() macro. i.e.
|
||||
// ASSERT(device.IsLockedByCurrentThread())
|
||||
bool IsLockedByCurrentThreadIfNeeded() const;
|
||||
|
||||
// In the 'Normal' mode, currently recorded commands in the backend normally will be actually
|
||||
// submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed
|
||||
// as late as possible, for example, until the client has explictly issued a submission.
|
||||
|
|
|
@ -531,6 +531,7 @@ source_set("end2end_tests_sources") {
|
|||
"end2end/MemoryAllocationStressTests.cpp",
|
||||
"end2end/MultisampledRenderingTests.cpp",
|
||||
"end2end/MultisampledSamplingTests.cpp",
|
||||
"end2end/MultithreadTests.cpp",
|
||||
"end2end/NonzeroBufferCreationTests.cpp",
|
||||
"end2end/NonzeroTextureCreationTests.cpp",
|
||||
"end2end/ObjectCachingTests.cpp",
|
||||
|
|
|
@ -0,0 +1,180 @@
|
|||
// Copyright 2023 The Dawn Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "dawn/common/Constants.h"
|
||||
#include "dawn/common/Math.h"
|
||||
#include "dawn/tests/DawnTest.h"
|
||||
#include "dawn/utils/TestUtils.h"
|
||||
#include "dawn/utils/TextureUtils.h"
|
||||
#include "dawn/utils/WGPUHelpers.h"
|
||||
|
||||
class MultithreadTests : public DawnTest {
|
||||
protected:
|
||||
std::vector<wgpu::FeatureName> GetRequiredFeatures() override {
|
||||
std::vector<wgpu::FeatureName> features;
|
||||
// TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet.
|
||||
if (!UsesWire()) {
|
||||
features.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
|
||||
}
|
||||
return features;
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
DawnTest::SetUp();
|
||||
// TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet.
|
||||
DAWN_TEST_UNSUPPORTED_IF(UsesWire());
|
||||
|
||||
// TODO(crbug.com/dawn/1679): OpenGL/D3D11 backend doesn't support thread safe API yet.
|
||||
DAWN_TEST_UNSUPPORTED_IF(IsOpenGL() || IsOpenGLES() || IsD3D11());
|
||||
}
|
||||
|
||||
wgpu::Buffer CreateBuffer(uint32_t size, wgpu::BufferUsage usage) {
|
||||
wgpu::BufferDescriptor descriptor;
|
||||
descriptor.size = size;
|
||||
descriptor.usage = usage;
|
||||
return device.CreateBuffer(&descriptor);
|
||||
}
|
||||
|
||||
wgpu::Texture CreateTexture(uint32_t width,
|
||||
uint32_t height,
|
||||
wgpu::TextureFormat format,
|
||||
wgpu::TextureUsage usage,
|
||||
uint32_t mipLevelCount = 1,
|
||||
uint32_t sampleCount = 1) {
|
||||
wgpu::TextureDescriptor texDescriptor = {};
|
||||
texDescriptor.size = {width, height, 1};
|
||||
texDescriptor.format = format;
|
||||
texDescriptor.usage = usage;
|
||||
texDescriptor.mipLevelCount = mipLevelCount;
|
||||
texDescriptor.sampleCount = sampleCount;
|
||||
return device.CreateTexture(&texDescriptor);
|
||||
}
|
||||
|
||||
void RunInParallel(uint32_t numThreads, const std::function<void(uint32_t)>& workerFunc) {
|
||||
std::vector<std::unique_ptr<std::thread>> threads(numThreads);
|
||||
|
||||
for (uint32_t i = 0; i < threads.size(); ++i) {
|
||||
threads[i] = std::make_unique<std::thread>([i, workerFunc] { workerFunc(i); });
|
||||
}
|
||||
|
||||
for (auto& thread : threads) {
|
||||
thread->join();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class MultithreadEncodingTests : public MultithreadTests {};
|
||||
|
||||
// Test that encoding render passes in parallel should work
|
||||
TEST_P(MultithreadEncodingTests, RenderPassEncodersInParallel) {
|
||||
constexpr uint32_t kRTSize = 16;
|
||||
constexpr uint32_t kNumThreads = 10;
|
||||
|
||||
wgpu::Texture msaaRenderTarget =
|
||||
CreateTexture(kRTSize, kRTSize, wgpu::TextureFormat::RGBA8Unorm,
|
||||
wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc,
|
||||
/*mipLevelCount=*/1, /*sampleCount=*/4);
|
||||
wgpu::TextureView msaaRenderTargetView = msaaRenderTarget.CreateView();
|
||||
|
||||
wgpu::Texture resolveTarget =
|
||||
CreateTexture(kRTSize, kRTSize, wgpu::TextureFormat::RGBA8Unorm,
|
||||
wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc);
|
||||
wgpu::TextureView resolveTargetView = resolveTarget.CreateView();
|
||||
|
||||
std::vector<wgpu::CommandBuffer> commandBuffers(kNumThreads);
|
||||
|
||||
RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) {
|
||||
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||
|
||||
// Clear the renderTarget to red.
|
||||
utils::ComboRenderPassDescriptor renderPass({msaaRenderTargetView});
|
||||
renderPass.cColorAttachments[0].resolveTarget = resolveTargetView;
|
||||
renderPass.cColorAttachments[0].clearValue = {1.0f, 0.0f, 0.0f, 1.0f};
|
||||
|
||||
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
|
||||
pass.End();
|
||||
|
||||
commandBuffers[index] = encoder.Finish();
|
||||
});
|
||||
|
||||
// Verify that the command buffers executed correctly.
|
||||
for (auto& commandBuffer : commandBuffers) {
|
||||
queue.Submit(1, &commandBuffer);
|
||||
|
||||
EXPECT_TEXTURE_EQ(utils::RGBA8::kRed, resolveTarget, {0, 0});
|
||||
EXPECT_TEXTURE_EQ(utils::RGBA8::kRed, resolveTarget, {kRTSize - 1, kRTSize - 1});
|
||||
}
|
||||
}
|
||||
|
||||
// Test that encoding compute passes in parallel should work
|
||||
TEST_P(MultithreadEncodingTests, ComputePassEncodersInParallel) {
|
||||
constexpr uint32_t kNumThreads = 10;
|
||||
constexpr uint32_t kExpected = 0xFFFFFFFFu;
|
||||
|
||||
wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
|
||||
@group(0) @binding(0) var<storage, read_write> output : u32;
|
||||
|
||||
@compute @workgroup_size(1, 1, 1)
|
||||
fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3u) {
|
||||
output = 0xFFFFFFFFu;
|
||||
})");
|
||||
wgpu::ComputePipelineDescriptor csDesc;
|
||||
csDesc.compute.module = module;
|
||||
csDesc.compute.entryPoint = "main";
|
||||
auto pipeline = device.CreateComputePipeline(&csDesc);
|
||||
|
||||
wgpu::Buffer dstBuffer =
|
||||
CreateBuffer(sizeof(uint32_t), wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc |
|
||||
wgpu::BufferUsage::CopyDst);
|
||||
wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
|
||||
{
|
||||
{0, dstBuffer, 0, sizeof(uint32_t)},
|
||||
});
|
||||
|
||||
std::vector<wgpu::CommandBuffer> commandBuffers(kNumThreads);
|
||||
|
||||
RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) {
|
||||
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
|
||||
pass.SetPipeline(pipeline);
|
||||
pass.SetBindGroup(0, bindGroup);
|
||||
pass.DispatchWorkgroups(1, 1, 1);
|
||||
pass.End();
|
||||
|
||||
commandBuffers[index] = encoder.Finish();
|
||||
});
|
||||
|
||||
// Verify that the command buffers executed correctly.
|
||||
for (auto& commandBuffer : commandBuffers) {
|
||||
constexpr uint32_t kSentinelData = 0;
|
||||
queue.WriteBuffer(dstBuffer, 0, &kSentinelData, sizeof(kSentinelData));
|
||||
queue.Submit(1, &commandBuffer);
|
||||
|
||||
EXPECT_BUFFER_U32_EQ(kExpected, dstBuffer, 0);
|
||||
}
|
||||
}
|
||||
|
||||
DAWN_INSTANTIATE_TEST(MultithreadEncodingTests,
|
||||
D3D11Backend(),
|
||||
D3D12Backend(),
|
||||
MetalBackend(),
|
||||
OpenGLBackend(),
|
||||
OpenGLESBackend(),
|
||||
VulkanBackend());
|
Loading…
Reference in New Issue