Add missing lock to APIBeginRender/ComputePass.
Add tests to verify multithreading behaviors of encoding render/compute passes. Bug: dawn:1662 Change-Id: I9bc6a0dd5d94b53b59e7e49a4611d4d55cc36e60 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/126980 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Quyen Le <lehoangquyen@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
parent
3dba94c1d4
commit
3e7a114a6e
|
@ -771,11 +771,15 @@ void CommandEncoder::TrackQueryAvailability(QuerySetBase* querySet, uint32_t que
|
||||||
// Implementation of the API's command recording methods
|
// Implementation of the API's command recording methods
|
||||||
|
|
||||||
ComputePassEncoder* CommandEncoder::APIBeginComputePass(const ComputePassDescriptor* descriptor) {
|
ComputePassEncoder* CommandEncoder::APIBeginComputePass(const ComputePassDescriptor* descriptor) {
|
||||||
|
// This function will create new object, need to lock the Device.
|
||||||
|
auto deviceLock(GetDevice()->GetScopedLock());
|
||||||
|
|
||||||
return BeginComputePass(descriptor).Detach();
|
return BeginComputePass(descriptor).Detach();
|
||||||
}
|
}
|
||||||
|
|
||||||
Ref<ComputePassEncoder> CommandEncoder::BeginComputePass(const ComputePassDescriptor* descriptor) {
|
Ref<ComputePassEncoder> CommandEncoder::BeginComputePass(const ComputePassDescriptor* descriptor) {
|
||||||
DeviceBase* device = GetDevice();
|
DeviceBase* device = GetDevice();
|
||||||
|
ASSERT(device->IsLockedByCurrentThreadIfNeeded());
|
||||||
|
|
||||||
bool success = mEncodingContext.TryEncode(
|
bool success = mEncodingContext.TryEncode(
|
||||||
this,
|
this,
|
||||||
|
@ -830,11 +834,15 @@ Ref<ComputePassEncoder> CommandEncoder::BeginComputePass(const ComputePassDescri
|
||||||
}
|
}
|
||||||
|
|
||||||
RenderPassEncoder* CommandEncoder::APIBeginRenderPass(const RenderPassDescriptor* descriptor) {
|
RenderPassEncoder* CommandEncoder::APIBeginRenderPass(const RenderPassDescriptor* descriptor) {
|
||||||
|
// This function will create new object, need to lock the Device.
|
||||||
|
auto deviceLock(GetDevice()->GetScopedLock());
|
||||||
|
|
||||||
return BeginRenderPass(descriptor).Detach();
|
return BeginRenderPass(descriptor).Detach();
|
||||||
}
|
}
|
||||||
|
|
||||||
Ref<RenderPassEncoder> CommandEncoder::BeginRenderPass(const RenderPassDescriptor* descriptor) {
|
Ref<RenderPassEncoder> CommandEncoder::BeginRenderPass(const RenderPassDescriptor* descriptor) {
|
||||||
DeviceBase* device = GetDevice();
|
DeviceBase* device = GetDevice();
|
||||||
|
ASSERT(device->IsLockedByCurrentThreadIfNeeded());
|
||||||
|
|
||||||
RenderPassResourceUsageTracker usageTracker;
|
RenderPassResourceUsageTracker usageTracker;
|
||||||
|
|
||||||
|
@ -1047,13 +1055,14 @@ ResultOrError<std::function<void()>> CommandEncoder::ApplyRenderPassWorkarounds(
|
||||||
descriptor.dimension = wgpu::TextureDimension::e2D;
|
descriptor.dimension = wgpu::TextureDimension::e2D;
|
||||||
descriptor.mipLevelCount = 1;
|
descriptor.mipLevelCount = 1;
|
||||||
|
|
||||||
// We are creating new resources. Need to lock the Device.
|
// We are creating new resources. Device must already be locked via
|
||||||
|
// APIBeginRenderPass -> ApplyRenderPassWorkarounds.
|
||||||
// TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
|
// TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
|
||||||
// Command Submit time, so the locking would be removed from here at that point.
|
// Command Submit time, so the locking would be removed from here at that point.
|
||||||
Ref<TextureBase> temporaryResolveTexture;
|
Ref<TextureBase> temporaryResolveTexture;
|
||||||
Ref<TextureViewBase> temporaryResolveView;
|
Ref<TextureViewBase> temporaryResolveView;
|
||||||
{
|
{
|
||||||
auto deviceLock(GetDevice()->GetScopedLock());
|
ASSERT(device->IsLockedByCurrentThreadIfNeeded());
|
||||||
|
|
||||||
DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor));
|
DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor));
|
||||||
|
|
||||||
|
|
|
@ -92,10 +92,6 @@ struct DeviceBase::DeprecationWarnings {
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
bool IsMutexLockedByCurrentThreadIfNeeded(const Ref<Mutex>& mutex) {
|
|
||||||
return mutex == nullptr || mutex->IsLockedByCurrentThread();
|
|
||||||
}
|
|
||||||
|
|
||||||
struct LoggingCallbackTask : CallbackTask {
|
struct LoggingCallbackTask : CallbackTask {
|
||||||
public:
|
public:
|
||||||
LoggingCallbackTask() = delete;
|
LoggingCallbackTask() = delete;
|
||||||
|
@ -873,7 +869,7 @@ Ref<RenderPipelineBase> DeviceBase::GetCachedRenderPipeline(
|
||||||
|
|
||||||
Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
|
Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
|
||||||
Ref<ComputePipelineBase> computePipeline) {
|
Ref<ComputePipelineBase> computePipeline) {
|
||||||
ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex));
|
ASSERT(IsLockedByCurrentThreadIfNeeded());
|
||||||
auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get());
|
auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get());
|
||||||
if (inserted) {
|
if (inserted) {
|
||||||
computePipeline->SetIsCachedReference();
|
computePipeline->SetIsCachedReference();
|
||||||
|
@ -885,7 +881,7 @@ Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
|
||||||
|
|
||||||
Ref<RenderPipelineBase> DeviceBase::AddOrGetCachedRenderPipeline(
|
Ref<RenderPipelineBase> DeviceBase::AddOrGetCachedRenderPipeline(
|
||||||
Ref<RenderPipelineBase> renderPipeline) {
|
Ref<RenderPipelineBase> renderPipeline) {
|
||||||
ASSERT(IsMutexLockedByCurrentThreadIfNeeded(mMutex));
|
ASSERT(IsLockedByCurrentThreadIfNeeded());
|
||||||
auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get());
|
auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get());
|
||||||
if (inserted) {
|
if (inserted) {
|
||||||
renderPipeline->SetIsCachedReference();
|
renderPipeline->SetIsCachedReference();
|
||||||
|
@ -2048,6 +2044,10 @@ Mutex::AutoLock DeviceBase::GetScopedLock() {
|
||||||
return Mutex::AutoLock(mMutex.Get());
|
return Mutex::AutoLock(mMutex.Get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DeviceBase::IsLockedByCurrentThreadIfNeeded() const {
|
||||||
|
return mMutex == nullptr || mMutex->IsLockedByCurrentThread();
|
||||||
|
}
|
||||||
|
|
||||||
IgnoreLazyClearCountScope::IgnoreLazyClearCountScope(DeviceBase* device)
|
IgnoreLazyClearCountScope::IgnoreLazyClearCountScope(DeviceBase* device)
|
||||||
: mDevice(device), mLazyClearCountForTesting(device->mLazyClearCountForTesting) {}
|
: mDevice(device), mLazyClearCountForTesting(device->mLazyClearCountForTesting) {}
|
||||||
|
|
||||||
|
|
|
@ -431,6 +431,12 @@ class DeviceBase : public RefCountedWithExternalCount {
|
||||||
// AutoLock. It would crash if such thing happens.
|
// AutoLock. It would crash if such thing happens.
|
||||||
[[nodiscard]] Mutex::AutoLock GetScopedLock();
|
[[nodiscard]] Mutex::AutoLock GetScopedLock();
|
||||||
|
|
||||||
|
// This method returns true if Feature::ImplicitDeviceSynchronization is turned on and the
|
||||||
|
// device is locked by current thread. This method is only enabled when DAWN_ENABLE_ASSERTS is
|
||||||
|
// turned on. Thus it should only be wrapped inside ASSERT() macro. i.e.
|
||||||
|
// ASSERT(device.IsLockedByCurrentThread())
|
||||||
|
bool IsLockedByCurrentThreadIfNeeded() const;
|
||||||
|
|
||||||
// In the 'Normal' mode, currently recorded commands in the backend normally will be actually
|
// In the 'Normal' mode, currently recorded commands in the backend normally will be actually
|
||||||
// submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed
|
// submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed
|
||||||
// as late as possible, for example, until the client has explictly issued a submission.
|
// as late as possible, for example, until the client has explictly issued a submission.
|
||||||
|
|
|
@ -531,6 +531,7 @@ source_set("end2end_tests_sources") {
|
||||||
"end2end/MemoryAllocationStressTests.cpp",
|
"end2end/MemoryAllocationStressTests.cpp",
|
||||||
"end2end/MultisampledRenderingTests.cpp",
|
"end2end/MultisampledRenderingTests.cpp",
|
||||||
"end2end/MultisampledSamplingTests.cpp",
|
"end2end/MultisampledSamplingTests.cpp",
|
||||||
|
"end2end/MultithreadTests.cpp",
|
||||||
"end2end/NonzeroBufferCreationTests.cpp",
|
"end2end/NonzeroBufferCreationTests.cpp",
|
||||||
"end2end/NonzeroTextureCreationTests.cpp",
|
"end2end/NonzeroTextureCreationTests.cpp",
|
||||||
"end2end/ObjectCachingTests.cpp",
|
"end2end/ObjectCachingTests.cpp",
|
||||||
|
|
|
@ -0,0 +1,180 @@
|
||||||
|
// Copyright 2023 The Dawn Authors
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
|
#include <memory>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "dawn/common/Constants.h"
|
||||||
|
#include "dawn/common/Math.h"
|
||||||
|
#include "dawn/tests/DawnTest.h"
|
||||||
|
#include "dawn/utils/TestUtils.h"
|
||||||
|
#include "dawn/utils/TextureUtils.h"
|
||||||
|
#include "dawn/utils/WGPUHelpers.h"
|
||||||
|
|
||||||
|
class MultithreadTests : public DawnTest {
|
||||||
|
protected:
|
||||||
|
std::vector<wgpu::FeatureName> GetRequiredFeatures() override {
|
||||||
|
std::vector<wgpu::FeatureName> features;
|
||||||
|
// TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet.
|
||||||
|
if (!UsesWire()) {
|
||||||
|
features.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
|
||||||
|
}
|
||||||
|
return features;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetUp() override {
|
||||||
|
DawnTest::SetUp();
|
||||||
|
// TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet.
|
||||||
|
DAWN_TEST_UNSUPPORTED_IF(UsesWire());
|
||||||
|
|
||||||
|
// TODO(crbug.com/dawn/1679): OpenGL/D3D11 backend doesn't support thread safe API yet.
|
||||||
|
DAWN_TEST_UNSUPPORTED_IF(IsOpenGL() || IsOpenGLES() || IsD3D11());
|
||||||
|
}
|
||||||
|
|
||||||
|
wgpu::Buffer CreateBuffer(uint32_t size, wgpu::BufferUsage usage) {
|
||||||
|
wgpu::BufferDescriptor descriptor;
|
||||||
|
descriptor.size = size;
|
||||||
|
descriptor.usage = usage;
|
||||||
|
return device.CreateBuffer(&descriptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
wgpu::Texture CreateTexture(uint32_t width,
|
||||||
|
uint32_t height,
|
||||||
|
wgpu::TextureFormat format,
|
||||||
|
wgpu::TextureUsage usage,
|
||||||
|
uint32_t mipLevelCount = 1,
|
||||||
|
uint32_t sampleCount = 1) {
|
||||||
|
wgpu::TextureDescriptor texDescriptor = {};
|
||||||
|
texDescriptor.size = {width, height, 1};
|
||||||
|
texDescriptor.format = format;
|
||||||
|
texDescriptor.usage = usage;
|
||||||
|
texDescriptor.mipLevelCount = mipLevelCount;
|
||||||
|
texDescriptor.sampleCount = sampleCount;
|
||||||
|
return device.CreateTexture(&texDescriptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RunInParallel(uint32_t numThreads, const std::function<void(uint32_t)>& workerFunc) {
|
||||||
|
std::vector<std::unique_ptr<std::thread>> threads(numThreads);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < threads.size(); ++i) {
|
||||||
|
threads[i] = std::make_unique<std::thread>([i, workerFunc] { workerFunc(i); });
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread->join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class MultithreadEncodingTests : public MultithreadTests {};
|
||||||
|
|
||||||
|
// Test that encoding render passes in parallel should work
|
||||||
|
TEST_P(MultithreadEncodingTests, RenderPassEncodersInParallel) {
|
||||||
|
constexpr uint32_t kRTSize = 16;
|
||||||
|
constexpr uint32_t kNumThreads = 10;
|
||||||
|
|
||||||
|
wgpu::Texture msaaRenderTarget =
|
||||||
|
CreateTexture(kRTSize, kRTSize, wgpu::TextureFormat::RGBA8Unorm,
|
||||||
|
wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc,
|
||||||
|
/*mipLevelCount=*/1, /*sampleCount=*/4);
|
||||||
|
wgpu::TextureView msaaRenderTargetView = msaaRenderTarget.CreateView();
|
||||||
|
|
||||||
|
wgpu::Texture resolveTarget =
|
||||||
|
CreateTexture(kRTSize, kRTSize, wgpu::TextureFormat::RGBA8Unorm,
|
||||||
|
wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc);
|
||||||
|
wgpu::TextureView resolveTargetView = resolveTarget.CreateView();
|
||||||
|
|
||||||
|
std::vector<wgpu::CommandBuffer> commandBuffers(kNumThreads);
|
||||||
|
|
||||||
|
RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) {
|
||||||
|
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||||
|
|
||||||
|
// Clear the renderTarget to red.
|
||||||
|
utils::ComboRenderPassDescriptor renderPass({msaaRenderTargetView});
|
||||||
|
renderPass.cColorAttachments[0].resolveTarget = resolveTargetView;
|
||||||
|
renderPass.cColorAttachments[0].clearValue = {1.0f, 0.0f, 0.0f, 1.0f};
|
||||||
|
|
||||||
|
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
|
||||||
|
pass.End();
|
||||||
|
|
||||||
|
commandBuffers[index] = encoder.Finish();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify that the command buffers executed correctly.
|
||||||
|
for (auto& commandBuffer : commandBuffers) {
|
||||||
|
queue.Submit(1, &commandBuffer);
|
||||||
|
|
||||||
|
EXPECT_TEXTURE_EQ(utils::RGBA8::kRed, resolveTarget, {0, 0});
|
||||||
|
EXPECT_TEXTURE_EQ(utils::RGBA8::kRed, resolveTarget, {kRTSize - 1, kRTSize - 1});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test that encoding compute passes in parallel should work
|
||||||
|
TEST_P(MultithreadEncodingTests, ComputePassEncodersInParallel) {
|
||||||
|
constexpr uint32_t kNumThreads = 10;
|
||||||
|
constexpr uint32_t kExpected = 0xFFFFFFFFu;
|
||||||
|
|
||||||
|
wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
|
||||||
|
@group(0) @binding(0) var<storage, read_write> output : u32;
|
||||||
|
|
||||||
|
@compute @workgroup_size(1, 1, 1)
|
||||||
|
fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3u) {
|
||||||
|
output = 0xFFFFFFFFu;
|
||||||
|
})");
|
||||||
|
wgpu::ComputePipelineDescriptor csDesc;
|
||||||
|
csDesc.compute.module = module;
|
||||||
|
csDesc.compute.entryPoint = "main";
|
||||||
|
auto pipeline = device.CreateComputePipeline(&csDesc);
|
||||||
|
|
||||||
|
wgpu::Buffer dstBuffer =
|
||||||
|
CreateBuffer(sizeof(uint32_t), wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc |
|
||||||
|
wgpu::BufferUsage::CopyDst);
|
||||||
|
wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
|
||||||
|
{
|
||||||
|
{0, dstBuffer, 0, sizeof(uint32_t)},
|
||||||
|
});
|
||||||
|
|
||||||
|
std::vector<wgpu::CommandBuffer> commandBuffers(kNumThreads);
|
||||||
|
|
||||||
|
RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) {
|
||||||
|
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||||
|
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
|
||||||
|
pass.SetPipeline(pipeline);
|
||||||
|
pass.SetBindGroup(0, bindGroup);
|
||||||
|
pass.DispatchWorkgroups(1, 1, 1);
|
||||||
|
pass.End();
|
||||||
|
|
||||||
|
commandBuffers[index] = encoder.Finish();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify that the command buffers executed correctly.
|
||||||
|
for (auto& commandBuffer : commandBuffers) {
|
||||||
|
constexpr uint32_t kSentinelData = 0;
|
||||||
|
queue.WriteBuffer(dstBuffer, 0, &kSentinelData, sizeof(kSentinelData));
|
||||||
|
queue.Submit(1, &commandBuffer);
|
||||||
|
|
||||||
|
EXPECT_BUFFER_U32_EQ(kExpected, dstBuffer, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DAWN_INSTANTIATE_TEST(MultithreadEncodingTests,
|
||||||
|
D3D11Backend(),
|
||||||
|
D3D12Backend(),
|
||||||
|
MetalBackend(),
|
||||||
|
OpenGLBackend(),
|
||||||
|
OpenGLESBackend(),
|
||||||
|
VulkanBackend());
|
Loading…
Reference in New Issue