From 23f43961779d0dea6f39aaa87fb2366b1052f6ee Mon Sep 17 00:00:00 2001 From: Le Hoang Quyen Date: Mon, 24 Apr 2023 20:12:00 +0000 Subject: [PATCH] Add more multithread tests. Bug: dawn:1662 Change-Id: I2b2c66c6f9a7b512ae9f8010a082e7306feaa6f3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/122060 Commit-Queue: Quyen Le Kokoro: Kokoro Reviewed-by: Austin Eng --- src/dawn/tests/DawnTest.cpp | 30 +- src/dawn/tests/DawnTest.h | 6 +- .../end2end/D3D12ResourceWrappingTests.cpp | 156 +++- .../tests/end2end/DeviceLifetimeTests.cpp | 41 + .../tests/end2end/IOSurfaceWrappingTests.cpp | 65 ++ src/dawn/tests/end2end/MultithreadTests.cpp | 879 +++++++++++++++++- src/dawn/utils/TestUtils.cpp | 20 + src/dawn/utils/TestUtils.h | 5 + 8 files changed, 1141 insertions(+), 61 deletions(-) diff --git a/src/dawn/tests/DawnTest.cpp b/src/dawn/tests/DawnTest.cpp index 97f930130b..3ed7ebd232 100644 --- a/src/dawn/tests/DawnTest.cpp +++ b/src/dawn/tests/DawnTest.cpp @@ -15,6 +15,7 @@ #include "dawn/tests/DawnTest.h" #include +#include #include #include #include @@ -1146,6 +1147,9 @@ std::ostringstream& DawnTestBase::AddBufferExpectation(const char* file, deferred.size = size; deferred.expectation.reset(expectation); + // This expectation might be called from multiple threads + dawn::Mutex::AutoLock lg(&mMutex); + mDeferredExpectations.push_back(std::move(deferred)); mDeferredExpectations.back().message = std::make_unique(); return *(mDeferredExpectations.back().message.get()); @@ -1200,6 +1204,9 @@ std::ostringstream& DawnTestBase::AddTextureExpectationImpl(const char* file, deferred.bytesPerRow = bytesPerRow; deferred.expectation.reset(expectation); + // This expectation might be called from multiple threads + dawn::Mutex::AutoLock lg(&mMutex); + mDeferredExpectations.push_back(std::move(deferred)); mDeferredExpectations.back().message = std::make_unique(); return *(mDeferredExpectations.back().message.get()); @@ -1504,11 +1511,16 @@ void DawnTestBase::FlushWire() { } void DawnTestBase::WaitForAllOperations() { - bool done = false; + // Callback might be invoked on another thread that calls the same WaitABit() method, not + // necessarily the current thread. So we need to use atomic here. + std::atomic done(false); device.GetQueue().OnSubmittedWorkDone( - 0u, [](WGPUQueueWorkDoneStatus, void* userdata) { *static_cast(userdata) = true; }, + 0u, + [](WGPUQueueWorkDoneStatus, void* userdata) { + *static_cast*>(userdata) = true; + }, &done); - while (!done) { + while (!done.load()) { WaitABit(); } } @@ -1526,6 +1538,9 @@ DawnTestBase::ReadbackReservation DawnTestBase::ReserveReadback(wgpu::Device tar utils::CreateBufferFromData(targetDevice, initialBufferData.data(), readbackSize, wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst); + // This readback might be called from multiple threads + dawn::Mutex::AutoLock lg(&mMutex); + ReadbackReservation reservation; reservation.device = targetDevice; reservation.buffer = slot.buffer; @@ -1551,7 +1566,7 @@ void DawnTestBase::MapSlotsSynchronously() { } // Busy wait until all map operations are done. - while (mNumPendingMapOperations != 0) { + while (mNumPendingMapOperations.load(std::memory_order_acquire) != 0) { WaitABit(); } } @@ -1562,7 +1577,8 @@ void DawnTestBase::SlotMapCallback(WGPUBufferMapAsyncStatus status, void* userda status == WGPUBufferMapAsyncStatus_DeviceLost); std::unique_ptr userdata(static_cast(userdata_)); DawnTestBase* test = userdata->test; - test->mNumPendingMapOperations--; + + dawn::Mutex::AutoLock lg(&test->mMutex); ReadbackSlot* slot = &test->mReadbackSlots[userdata->slot]; if (status == WGPUBufferMapAsyncStatus_Success) { @@ -1571,6 +1587,8 @@ void DawnTestBase::SlotMapCallback(WGPUBufferMapAsyncStatus status, void* userda } else { slot->mappedData = nullptr; } + + test->mNumPendingMapOperations.fetch_sub(1, std::memory_order_release); } void DawnTestBase::ResolveExpectations() { @@ -1629,6 +1647,8 @@ void DawnTestBase::ResolveDeferredExpectationsNow() { FlushWire(); MapSlotsSynchronously(); + + dawn::Mutex::AutoLock lg(&mMutex); ResolveExpectations(); mDeferredExpectations.clear(); diff --git a/src/dawn/tests/DawnTest.h b/src/dawn/tests/DawnTest.h index 8482c7c9ae..9c917c1328 100644 --- a/src/dawn/tests/DawnTest.h +++ b/src/dawn/tests/DawnTest.h @@ -15,6 +15,7 @@ #ifndef SRC_DAWN_TESTS_DAWNTEST_H_ #define SRC_DAWN_TESTS_DAWNTEST_H_ +#include #include #include #include @@ -23,6 +24,7 @@ #include #include "dawn/common/Log.h" +#include "dawn/common/Mutex.h" #include "dawn/common/Platform.h" #include "dawn/common/Preprocessor.h" #include "dawn/dawn_proc_table.h" @@ -623,7 +625,7 @@ class DawnTestBase { // Maps all the buffers and fill ReadbackSlot::mappedData void MapSlotsSynchronously(); static void SlotMapCallback(WGPUBufferMapAsyncStatus status, void* userdata); - size_t mNumPendingMapOperations = 0; + std::atomic mNumPendingMapOperations = 0; // Reserve space where the data for an expectation can be copied struct ReadbackReservation { @@ -656,6 +658,8 @@ class DawnTestBase { WGPUDevice mLastCreatedBackendDevice; std::unique_ptr mTestPlatform; + + dawn::Mutex mMutex; }; #define DAWN_SKIP_TEST_IF_BASE(condition, type, reason) \ diff --git a/src/dawn/tests/end2end/D3D12ResourceWrappingTests.cpp b/src/dawn/tests/end2end/D3D12ResourceWrappingTests.cpp index 3329126f91..702a0bf854 100644 --- a/src/dawn/tests/end2end/D3D12ResourceWrappingTests.cpp +++ b/src/dawn/tests/end2end/D3D12ResourceWrappingTests.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -395,14 +396,11 @@ class D3D12SharedHandleUsageTests : public D3D12ResourceTestBase { queue.Submit(1, &commands); } - void WrapAndClearD3D11Texture( - const wgpu::TextureDescriptor& dawnDescriptor, - const D3D11_TEXTURE2D_DESC& d3dDescriptor, - const wgpu::Color& clearColor, - wgpu::Texture* dawnTextureOut, - ID3D11Texture2D** d3d11TextureOut, - std::unique_ptr* externalImageOut, - bool isInitialized = true) const { + void CreateSharedD3D11Texture(const D3D11_TEXTURE2D_DESC& d3dDescriptor, + ID3D11Texture2D** d3d11TextureOut, + ID3D11Fence** d3d11FenceOut, + HANDLE* sharedHandleOut, + HANDLE* fenceSharedHandleOut) const { ComPtr d3d11Texture; HRESULT hr = mD3d11Device->CreateTexture2D(&d3dDescriptor, nullptr, &d3d11Texture); ASSERT_EQ(hr, S_OK); @@ -417,20 +415,10 @@ class D3D12SharedHandleUsageTests : public D3D12ResourceTestBase { &sharedHandle); ASSERT_EQ(hr, S_OK); - ComPtr dxgiKeyedMutex; - HANDLE fenceSharedHandle = nullptr; ComPtr d3d11Fence; - ComPtr d3d11DeviceContext4; - - if (GetParam().mSyncMode == SyncMode::kKeyedMutex) { - hr = d3d11Texture.As(&dxgiKeyedMutex); - ASSERT_EQ(hr, S_OK); - - hr = dxgiKeyedMutex->AcquireSync(kDXGIKeyedMutexAcquireReleaseKey, INFINITE); - ASSERT_EQ(hr, S_OK); - } else { + if (GetParam().mSyncMode == SyncMode::kFence) { ComPtr d3d11Device5; hr = mD3d11Device.As(&d3d11Device5); ASSERT_EQ(hr, S_OK); @@ -442,6 +430,33 @@ class D3D12SharedHandleUsageTests : public D3D12ResourceTestBase { ASSERT_EQ(hr, S_OK); } + *d3d11TextureOut = d3d11Texture.Detach(); + *d3d11FenceOut = d3d11Fence.Detach(); + *sharedHandleOut = sharedHandle; + *fenceSharedHandleOut = fenceSharedHandle; + } + + void ClearD3D11Texture(const wgpu::Color& clearColor, + ID3D11Texture2D* d3d11TexturePtr, + ID3D11Fence* d3d11Fence, + uint64_t fenceSignalValue) const { + ComPtr d3d11Texture = d3d11TexturePtr; + ComPtr dxgiResource; + HRESULT hr = d3d11Texture.As(&dxgiResource); + ASSERT_EQ(hr, S_OK); + + ComPtr dxgiKeyedMutex; + + ComPtr d3d11DeviceContext4; + + if (GetParam().mSyncMode == SyncMode::kKeyedMutex) { + hr = d3d11Texture.As(&dxgiKeyedMutex); + ASSERT_EQ(hr, S_OK); + + hr = dxgiKeyedMutex->AcquireSync(kDXGIKeyedMutexAcquireReleaseKey, INFINITE); + ASSERT_EQ(hr, S_OK); + } + ComPtr d3d11RTV; hr = mD3d11Device->CreateRenderTargetView(d3d11Texture.Get(), nullptr, &d3d11RTV); ASSERT_EQ(hr, S_OK); @@ -451,7 +466,6 @@ class D3D12SharedHandleUsageTests : public D3D12ResourceTestBase { static_cast(clearColor.b), static_cast(clearColor.a)}; mD3d11DeviceContext->ClearRenderTargetView(d3d11RTV.Get(), colorRGBA); - constexpr uint64_t kFenceSignalValue = 1; if (dxgiKeyedMutex) { hr = dxgiKeyedMutex->ReleaseSync(kDXGIKeyedMutexAcquireReleaseKey); ASSERT_EQ(hr, S_OK); @@ -460,9 +474,18 @@ class D3D12SharedHandleUsageTests : public D3D12ResourceTestBase { ASSERT_EQ(hr, S_OK); // The fence starts with 0 signaled, but that won't capture the render target view clear // above, so signal explicitly with 1 and make the next Dawn access wait on 1. - d3d11DeviceContext4->Signal(d3d11Fence.Get(), kFenceSignalValue); + d3d11DeviceContext4->Signal(d3d11Fence, fenceSignalValue); } + } + void WaitAndWrapD3D11Texture( + const wgpu::TextureDescriptor& dawnDescriptor, + HANDLE sharedHandle, + HANDLE fenceSharedHandle, + uint64_t fenceWaitValue, + wgpu::Texture* dawnTextureOut, + std::unique_ptr* externalImageOut, + bool isInitialized) const { dawn::native::d3d12::ExternalImageDescriptorDXGISharedHandle externalImageDesc = {}; externalImageDesc.sharedHandle = sharedHandle; externalImageDesc.cTextureDescriptor = @@ -476,12 +499,36 @@ class D3D12SharedHandleUsageTests : public D3D12ResourceTestBase { externalAccessDesc.isInitialized = isInitialized; externalAccessDesc.usage = static_cast(dawnDescriptor.usage); if (fenceSharedHandle != nullptr) { - externalAccessDesc.waitFences.push_back({fenceSharedHandle, kFenceSignalValue}); + externalAccessDesc.waitFences.push_back({fenceSharedHandle, fenceWaitValue}); } *dawnTextureOut = wgpu::Texture::Acquire(externalImage->BeginAccess(&externalAccessDesc)); - *d3d11TextureOut = d3d11Texture.Detach(); *externalImageOut = std::move(externalImage); + } + + void WrapAndClearD3D11Texture( + const wgpu::TextureDescriptor& dawnDescriptor, + const D3D11_TEXTURE2D_DESC& d3dDescriptor, + const wgpu::Color& clearColor, + wgpu::Texture* dawnTextureOut, + ID3D11Texture2D** d3d11TextureOut, + std::unique_ptr* externalImageOut, + bool isInitialized = true) const { + ComPtr d3d11Texture; + ComPtr d3d11Fence; + HANDLE sharedHandle = nullptr; + HANDLE fenceSharedHandle = nullptr; + CreateSharedD3D11Texture(d3dDescriptor, &d3d11Texture, &d3d11Fence, &sharedHandle, + &fenceSharedHandle); + + constexpr uint64_t kFenceSignalValue = 1; + ClearD3D11Texture(clearColor, d3d11Texture.Get(), d3d11Fence.Get(), kFenceSignalValue); + + WaitAndWrapD3D11Texture(dawnDescriptor, sharedHandle, fenceSharedHandle, + /*fenceWaitValue=*/kFenceSignalValue, dawnTextureOut, + externalImageOut, isInitialized); + + *d3d11TextureOut = d3d11Texture.Detach(); if (fenceSharedHandle != nullptr) { ::CloseHandle(fenceSharedHandle); @@ -1123,6 +1170,69 @@ TEST_P(D3D12SharedHandleMultithreadTests, DestroyDeviceAndUseImageInParallel) { thread2.join(); } +// 1. Create and clear a D3D11 texture +// 2. On 2nd thread: Wrap it in a Dawn texture and clear it to a different color +// 3. Readback the texture with D3D11 and ensure we receive the color we cleared with Dawn. +TEST_P(D3D12SharedHandleMultithreadTests, ClearInD3D12ReadbackInD3D11_TwoThreads) { + // TODO(crbug.com/dawn/735): This test appears to hang for + // D3D12_Microsoft_Basic_Render_Driver_CPU when validation is enabled. + DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsWARP() && IsBackendValidationEnabled()); + + // KeyedMutex doesn't guarantee the order of commands so skip it. + DAWN_TEST_UNSUPPORTED_IF(GetParam().mSyncMode != SyncMode::kFence); + + const wgpu::Color d3d11ClearColor{1.0f, 1.0f, 0.0f, 1.0f}; + const wgpu::Color d3d12ClearColor{0.0f, 0.0f, 1.0f, 1.0f}; + + constexpr uint64_t kD3D11FenceSignalValue = 1; + + ComPtr d3d11Texture; + ComPtr d3d11Fence; + HANDLE sharedHandle = nullptr; + HANDLE fenceSharedHandle = nullptr; + CreateSharedD3D11Texture(baseD3dDescriptor, &d3d11Texture, &d3d11Fence, &sharedHandle, + &fenceSharedHandle); + + dawn::native::d3d12::ExternalImageDXGIFenceDescriptor d3d12SignalFence; + + std::thread d3d12Thread([=, &d3d12SignalFence] { + wgpu::Texture dawnTexture; + std::unique_ptr externalImage; + WaitAndWrapD3D11Texture(baseDawnDescriptor, sharedHandle, fenceSharedHandle, + /*fenceWaitValue=*/kD3D11FenceSignalValue, &dawnTexture, + &externalImage, /*isInitialized=*/true); + + ASSERT_NE(dawnTexture.Get(), nullptr); + + EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(d3d11ClearColor.r * 255, d3d11ClearColor.g * 255, + d3d11ClearColor.b * 255, d3d11ClearColor.a * 255), + dawnTexture, 0, 0); + + ClearImage(dawnTexture, d3d12ClearColor, device); + + externalImage->EndAccess(dawnTexture.Get(), &d3d12SignalFence); + + dawnTexture.Destroy(); + }); + + ClearD3D11Texture(d3d11ClearColor, d3d11Texture.Get(), d3d11Fence.Get(), + /*fenceSignalValue=*/kD3D11FenceSignalValue); + + d3d12Thread.join(); + // Now that Dawn (via D3D12) has finished writing to the texture, we should be + // able to read it back by copying it to a staging texture and verifying the + // color matches the D3D12 clear color. + ExpectPixelRGBA8EQ(d3d11Texture.Get(), d3d12ClearColor, &d3d12SignalFence); + + if (sharedHandle != nullptr) { + ::CloseHandle(sharedHandle); + } + + if (fenceSharedHandle != nullptr) { + ::CloseHandle(fenceSharedHandle); + } +} + DAWN_INSTANTIATE_TEST_P(D3D12SharedHandleValidation, {D3D12Backend()}, {SyncMode::kKeyedMutex, SyncMode::kFence}); diff --git a/src/dawn/tests/end2end/DeviceLifetimeTests.cpp b/src/dawn/tests/end2end/DeviceLifetimeTests.cpp index cec6374bf1..83d13b0041 100644 --- a/src/dawn/tests/end2end/DeviceLifetimeTests.cpp +++ b/src/dawn/tests/end2end/DeviceLifetimeTests.cpp @@ -211,6 +211,47 @@ TEST_P(DeviceLifetimeTests, DroppedThenMapBuffer) { } } +// Test that the device can be dropped before a buffer created from it, then mapping the buffer +// twice (one inside callback) will both fail. +TEST_P(DeviceLifetimeTests, Dropped_ThenMapBuffer_ThenMapBufferInCallback) { + wgpu::BufferDescriptor desc = {}; + desc.size = 4; + desc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst; + wgpu::Buffer buffer = device.CreateBuffer(&desc); + + device = nullptr; + + struct UserData { + wgpu::Buffer buffer; + bool done = false; + }; + + UserData userData; + userData.buffer = buffer; + + // First mapping. + buffer.MapAsync( + wgpu::MapMode::Read, 0, wgpu::kWholeMapSize, + [](WGPUBufferMapAsyncStatus status, void* userdataPtr) { + EXPECT_EQ(status, WGPUBufferMapAsyncStatus_DeviceLost); + auto userdata = static_cast(userdataPtr); + + // Second mapping. + userdata->buffer.MapAsync( + wgpu::MapMode::Read, 0, wgpu::kWholeMapSize, + [](WGPUBufferMapAsyncStatus status, void* userdataPtr) { + EXPECT_EQ(status, WGPUBufferMapAsyncStatus_DeviceLost); + *static_cast(userdataPtr) = true; + }, + &userdata->done); + }, + &userData); + + while (!userData.done) { + WaitABit(); + } +} + // Test that the device can be dropped inside a buffer map callback. TEST_P(DeviceLifetimeTests, DroppedInsideBufferMapCallback) { wgpu::BufferDescriptor desc = {}; diff --git a/src/dawn/tests/end2end/IOSurfaceWrappingTests.cpp b/src/dawn/tests/end2end/IOSurfaceWrappingTests.cpp index 4f54f110e8..0c86cde098 100644 --- a/src/dawn/tests/end2end/IOSurfaceWrappingTests.cpp +++ b/src/dawn/tests/end2end/IOSurfaceWrappingTests.cpp @@ -16,6 +16,10 @@ #include #include +#include +#include +#include + #include "dawn/tests/DawnTest.h" #include "dawn/native/MetalBackend.h" @@ -588,5 +592,66 @@ TEST_P(IOSurfaceUsageTests, WriteThenConcurrentReadThenWrite) { EXPECT_TRUE(endWriteAccessDesc.isInitialized); } +class IOSurfaceMultithreadTests : public IOSurfaceUsageTests { + protected: + std::vector GetRequiredFeatures() override { + std::vector features; + // TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet. + if (!UsesWire()) { + features.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization); + } + return features; + } + + void SetUp() override { + IOSurfaceUsageTests::SetUp(); + // TODO(crbug.com/dawn/1678): DawnWire doesn't support thread safe API yet. + DAWN_TEST_UNSUPPORTED_IF(UsesWire()); + } +}; + +// Test that texture with color is cleared when isInitialized = false. There shoudn't be any data +// race if multiple of them are created on multiple threads. +TEST_P(IOSurfaceMultithreadTests, UninitializedTexturesAreCleared_OnMultipleThreads) { + utils::RunInParallel(10, [this](uint32_t) { + ScopedIOSurfaceRef ioSurface = + CreateSinglePlaneIOSurface(1, 1, kCVPixelFormatType_32RGBA, 4); + uint32_t data = 0x04030201; + + IOSurfaceLock(ioSurface.get(), 0, nullptr); + memcpy(IOSurfaceGetBaseAddress(ioSurface.get()), &data, sizeof(data)); + IOSurfaceUnlock(ioSurface.get(), 0, nullptr); + + wgpu::TextureDescriptor textureDescriptor; + textureDescriptor.dimension = wgpu::TextureDimension::e2D; + textureDescriptor.format = wgpu::TextureFormat::RGBA8Unorm; + textureDescriptor.size = {1, 1, 1}; + textureDescriptor.sampleCount = 1; + textureDescriptor.mipLevelCount = 1; + textureDescriptor.usage = + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc; + + // wrap ioSurface and ensure color is not visible when isInitialized set to false + wgpu::Texture ioSurfaceTexture = WrapIOSurface(&textureDescriptor, ioSurface.get(), false); + EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(0, 0, 0, 0), ioSurfaceTexture, 0, 0); + + dawn::native::metal::ExternalImageIOSurfaceEndAccessDescriptor endAccessDesc; + dawn::native::metal::IOSurfaceEndAccess(ioSurfaceTexture.Get(), &endAccessDesc); + EXPECT_TRUE(endAccessDesc.isInitialized); + }); +} + +// Test that wrapping multiple IOSurface and clear them on multiple threads work. +TEST_P(IOSurfaceMultithreadTests, WrapAndClear_OnMultipleThreads) { + utils::RunInParallel(10, [this](uint32_t) { + ScopedIOSurfaceRef ioSurface = + CreateSinglePlaneIOSurface(1, 1, kCVPixelFormatType_32BGRA, 4); + + uint32_t data = 0x04010203; + DoClearTest(ioSurface.get(), wgpu::TextureFormat::BGRA8Unorm, &data, sizeof(data)); + }); +} + DAWN_INSTANTIATE_TEST(IOSurfaceValidationTests, MetalBackend()); DAWN_INSTANTIATE_TEST(IOSurfaceUsageTests, MetalBackend()); +DAWN_INSTANTIATE_TEST(IOSurfaceMultithreadTests, MetalBackend()); diff --git a/src/dawn/tests/end2end/MultithreadTests.cpp b/src/dawn/tests/end2end/MultithreadTests.cpp index 94f5a35723..652866017d 100644 --- a/src/dawn/tests/end2end/MultithreadTests.cpp +++ b/src/dawn/tests/end2end/MultithreadTests.cpp @@ -12,28 +12,50 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include #include #include +#include +#include +#include #include +#include #include #include "dawn/common/Constants.h" #include "dawn/common/Math.h" -#include "dawn/common/Mutex.h" #include "dawn/tests/DawnTest.h" #include "dawn/utils/ComboRenderPipelineDescriptor.h" #include "dawn/utils/TestUtils.h" #include "dawn/utils/TextureUtils.h" #include "dawn/utils/WGPUHelpers.h" -#define LOCKED_CMD(CMD) \ - do { \ - dawn::Mutex::AutoLock lk(&mutex); \ - CMD; \ - } while (0) - namespace { +template +class LockStep { + public: + LockStep() = delete; + explicit LockStep(Step startStep) : mStep(startStep) {} + + void Signal(Step step) { + std::lock_guard lg(mMutex); + mStep = step; + mCv.notify_all(); + } + + void Wait(Step step) { + std::unique_lock lg(mMutex); + mCv.wait(lg, [=] { return mStep == step; }); + } + + private: + Step mStep; + std::mutex mMutex; + std::condition_variable mCv; +}; + class MultithreadTests : public DawnTest { protected: std::vector GetRequiredFeatures() override { @@ -75,21 +97,309 @@ class MultithreadTests : public DawnTest { texDescriptor.sampleCount = sampleCount; return device.CreateTexture(&texDescriptor); } +}; - void RunInParallel(uint32_t numThreads, const std::function& workerFunc) { - std::vector> threads(numThreads); +// Test that dropping a device's last ref on another thread won't crash Instance::ProcessEvents. +TEST_P(MultithreadTests, Device_DroppedOnAnotherThread) { + std::vector devices(5); - for (uint32_t i = 0; i < threads.size(); ++i) { - threads[i] = std::make_unique([i, workerFunc] { workerFunc(i); }); - } - - for (auto& thread : threads) { - thread->join(); - } + // Create devices. + for (size_t i = 0; i < devices.size(); ++i) { + devices[i] = CreateDevice(); } - dawn::Mutex mutex; -}; + std::atomic numAliveDevices = static_cast(devices.size()); + + // Create threads + utils::RunInParallel( + numAliveDevices.load(), + [&devices, &numAliveDevices](uint32_t index) { + EXPECT_NE(devices[index].Get(), nullptr); + + // Drop device. + devices[index] = nullptr; + + numAliveDevices--; + }, + [this, &numAliveDevices] { + while (numAliveDevices.load() > 0) { + // main thread process events from all devices + WaitABit(); + } + }); +} + +// Test that dropping a device's last ref inside a callback on another thread won't crash +// Instance::ProcessEvents. +TEST_P(MultithreadTests, Device_DroppedInCallback_OnAnotherThread) { + std::vector devices(10); + + // Create devices. + for (auto& device : devices) { + device = CreateDevice(); + } + + // Create threads + utils::RunInParallel(static_cast(devices.size()), [&devices, this](uint32_t index) { + auto additionalDevice = std::move(devices[index]); + struct UserData { + wgpu::Device device2ndRef; + std::atomic_bool isCompleted{false}; + } userData; + + userData.device2ndRef = additionalDevice; + + // Drop the last ref inside a callback. + additionalDevice.PushErrorScope(wgpu::ErrorFilter::Validation); + additionalDevice.PopErrorScope( + [](WGPUErrorType type, const char*, void* userdataPtr) { + auto userdata = static_cast(userdataPtr); + userdata->device2ndRef = nullptr; + userdata->isCompleted = true; + }, + &userData); + // main ref dropped. + additionalDevice = nullptr; + + do { + WaitABit(); + } while (!userData.isCompleted.load()); + + EXPECT_EQ(userData.device2ndRef, nullptr); + }); +} + +// Test that multiple buffers being created and mapped on multiple threads won't interfere with +// each other. +TEST_P(MultithreadTests, Buffers_MapInParallel) { + constexpr uint32_t kDataSize = 1000; + std::vector myData; + for (uint32_t i = 0; i < kDataSize; ++i) { + myData.push_back(i); + } + + constexpr uint32_t kSize = static_cast(kDataSize * sizeof(uint32_t)); + + utils::RunInParallel(10, [=, &myData = std::as_const(myData)](uint32_t) { + wgpu::Buffer buffer; + std::atomic mapCompleted(false); + + // Create buffer and request mapping. + buffer = CreateBuffer(kSize, wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc); + + buffer.MapAsync( + wgpu::MapMode::Write, 0, kSize, + [](WGPUBufferMapAsyncStatus status, void* userdata) { + EXPECT_EQ(WGPUBufferMapAsyncStatus_Success, status); + (*static_cast*>(userdata)) = true; + }, + &mapCompleted); + + // Wait for the mapping to complete + while (!mapCompleted.load()) { + device.Tick(); + FlushWire(); + } + + // Buffer is mapped, write into it and unmap . + memcpy(buffer.GetMappedRange(0, kSize), myData.data(), kSize); + buffer.Unmap(); + + // Check the content of the buffer. + EXPECT_BUFFER_U32_RANGE_EQ(myData.data(), buffer, 0, kDataSize); + }); +} + +// Test CreateComputePipelineAsync on multiple threads. +TEST_P(MultithreadTests, CreateComputePipelineAsyncInParallel) { + // TODO(crbug.com/dawn/1766): TSAN reported race conditions in NVIDIA's vk driver. + DAWN_SUPPRESS_TEST_IF(IsVulkan() && IsNvidia() && IsTsan()); + + std::vector pipelines(10); + std::vector shaderSources(pipelines.size()); + std::vector expectedValues(shaderSources.size()); + + for (uint32_t i = 0; i < pipelines.size(); ++i) { + expectedValues[i] = i + 1; + + std::ostringstream ss; + ss << R"( + struct SSBO { + value : u32 + } + @group(0) @binding(0) var ssbo : SSBO; + + @compute @workgroup_size(1) fn main() { + ssbo.value = + )"; + ss << expectedValues[i]; + ss << ";}"; + + shaderSources[i] = ss.str(); + } + + // Create pipelines in parallel + utils::RunInParallel(static_cast(pipelines.size()), [&](uint32_t index) { + wgpu::ComputePipelineDescriptor csDesc; + csDesc.compute.module = utils::CreateShaderModule(device, shaderSources[index].c_str()); + csDesc.compute.entryPoint = "main"; + + struct Task { + wgpu::ComputePipeline computePipeline; + std::atomic isCompleted{false}; + } task; + device.CreateComputePipelineAsync( + &csDesc, + [](WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline returnPipeline, + const char* message, void* userdata) { + EXPECT_EQ(WGPUCreatePipelineAsyncStatus::WGPUCreatePipelineAsyncStatus_Success, + status); + + auto task = static_cast(userdata); + task->computePipeline = wgpu::ComputePipeline::Acquire(returnPipeline); + task->isCompleted = true; + }, + &task); + + while (!task.isCompleted.load()) { + WaitABit(); + } + + pipelines[index] = task.computePipeline; + }); + + // Verify pipelines' executions + for (uint32_t i = 0; i < pipelines.size(); ++i) { + wgpu::Buffer ssbo = + CreateBuffer(sizeof(uint32_t), wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc); + + wgpu::CommandBuffer commands; + { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); + + ASSERT_NE(nullptr, pipelines[i].Get()); + wgpu::BindGroup bindGroup = + utils::MakeBindGroup(device, pipelines[i].GetBindGroupLayout(0), + { + {0, ssbo, 0, sizeof(uint32_t)}, + }); + pass.SetBindGroup(0, bindGroup); + pass.SetPipeline(pipelines[i]); + + pass.DispatchWorkgroups(1); + pass.End(); + + commands = encoder.Finish(); + } + + queue.Submit(1, &commands); + + EXPECT_BUFFER_U32_EQ(expectedValues[i], ssbo, 0); + } +} + +// Test CreateRenderPipelineAsync on multiple threads. +TEST_P(MultithreadTests, CreateRenderPipelineAsyncInParallel) { + // TODO(crbug.com/dawn/1766): TSAN reported race conditions in NVIDIA's vk driver. + DAWN_SUPPRESS_TEST_IF(IsVulkan() && IsNvidia() && IsTsan()); + + constexpr uint32_t kNumThreads = 10; + constexpr wgpu::TextureFormat kRenderAttachmentFormat = wgpu::TextureFormat::RGBA8Unorm; + constexpr uint8_t kColorStep = 250 / kNumThreads; + + std::vector pipelines(kNumThreads); + std::vector fragmentShaderSources(kNumThreads); + std::vector minExpectedValues(kNumThreads); + std::vector maxExpectedValues(kNumThreads); + + for (uint32_t i = 0; i < kNumThreads; ++i) { + // Due to floating point precision, we need to use min & max values to compare the + // expectations. + auto expectedGreen = kColorStep * i; + minExpectedValues[i] = + utils::RGBA8(0, expectedGreen == 0 ? 0 : (expectedGreen - 2), 0, 255); + maxExpectedValues[i] = + utils::RGBA8(0, expectedGreen == 255 ? 255 : (expectedGreen + 2), 0, 255); + + std::ostringstream ss; + ss << R"( + @fragment fn main() -> @location(0) vec4f { + return vec4f(0.0, + )"; + ss << expectedGreen / 255.0; + ss << ", 0.0, 1.0);}"; + + fragmentShaderSources[i] = ss.str(); + } + + // Create pipelines in parallel + utils::RunInParallel(kNumThreads, [&](uint32_t index) { + utils::ComboRenderPipelineDescriptor renderPipelineDescriptor; + wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"( + @vertex fn main() -> @builtin(position) vec4f { + return vec4f(0.0, 0.0, 0.0, 1.0); + })"); + wgpu::ShaderModule fsModule = + utils::CreateShaderModule(device, fragmentShaderSources[index].c_str()); + renderPipelineDescriptor.vertex.module = vsModule; + renderPipelineDescriptor.cFragment.module = fsModule; + renderPipelineDescriptor.cTargets[0].format = kRenderAttachmentFormat; + renderPipelineDescriptor.primitive.topology = wgpu::PrimitiveTopology::PointList; + + struct Task { + wgpu::RenderPipeline renderPipeline; + std::atomic isCompleted{false}; + } task; + device.CreateRenderPipelineAsync( + &renderPipelineDescriptor, + [](WGPUCreatePipelineAsyncStatus status, WGPURenderPipeline returnPipeline, + const char* message, void* userdata) { + EXPECT_EQ(WGPUCreatePipelineAsyncStatus::WGPUCreatePipelineAsyncStatus_Success, + status); + + auto* task = static_cast(userdata); + task->renderPipeline = wgpu::RenderPipeline::Acquire(returnPipeline); + task->isCompleted = true; + }, + &task); + + while (!task.isCompleted) { + WaitABit(); + } + + pipelines[index] = task.renderPipeline; + }); + + // Verify pipelines' executions + for (uint32_t i = 0; i < pipelines.size(); ++i) { + wgpu::Texture outputTexture = + CreateTexture(1, 1, kRenderAttachmentFormat, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc); + + utils::ComboRenderPassDescriptor renderPassDescriptor({outputTexture.CreateView()}); + renderPassDescriptor.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear; + renderPassDescriptor.cColorAttachments[0].clearValue = {1.f, 0.f, 0.f, 1.f}; + + wgpu::CommandBuffer commands; + { + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder renderPassEncoder = + encoder.BeginRenderPass(&renderPassDescriptor); + + ASSERT_NE(nullptr, pipelines[i].Get()); + + renderPassEncoder.SetPipeline(pipelines[i]); + renderPassEncoder.Draw(1); + renderPassEncoder.End(); + commands = encoder.Finish(); + } + + queue.Submit(1, &commands); + + EXPECT_PIXEL_RGBA8_BETWEEN(minExpectedValues[i], maxExpectedValues[i], outputTexture, 0, 0); + } +} class MultithreadCachingTests : public MultithreadTests { protected: @@ -116,7 +426,7 @@ class MultithreadCachingTests : public MultithreadTests { // Test that creating a same shader module (which will return the cached shader module) and release // it on multiple threads won't race. TEST_P(MultithreadCachingTests, RefAndReleaseCachedShaderModulesInParallel) { - RunInParallel(100, [this](uint32_t) { + utils::RunInParallel(100, [this](uint32_t) { wgpu::ShaderModule csModule = CreateComputeShaderModule(); EXPECT_NE(nullptr, csModule.Get()); }); @@ -134,7 +444,7 @@ TEST_P(MultithreadCachingTests, RefAndReleaseCachedComputePipelinesInParallel) { csDesc.compute.entryPoint = "main"; csDesc.layout = pipelineLayout; - RunInParallel(100, [&, this](uint32_t) { + utils::RunInParallel(100, [&, this](uint32_t) { wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc); EXPECT_NE(nullptr, pipeline.Get()); }); @@ -143,7 +453,7 @@ TEST_P(MultithreadCachingTests, RefAndReleaseCachedComputePipelinesInParallel) { // Test that creating a same bind group layout (which will return the cached layout) and // release it on multiple threads won't race. TEST_P(MultithreadCachingTests, RefAndReleaseCachedBindGroupLayoutsInParallel) { - RunInParallel(100, [&, this](uint32_t) { + utils::RunInParallel(100, [&, this](uint32_t) { wgpu::BindGroupLayout layout = CreateComputeBindGroupLayout(); EXPECT_NE(nullptr, layout.Get()); }); @@ -154,7 +464,7 @@ TEST_P(MultithreadCachingTests, RefAndReleaseCachedBindGroupLayoutsInParallel) { TEST_P(MultithreadCachingTests, RefAndReleaseCachedPipelineLayoutsInParallel) { wgpu::BindGroupLayout bglayout = CreateComputeBindGroupLayout(); - RunInParallel(100, [&, this](uint32_t) { + utils::RunInParallel(100, [&, this](uint32_t) { wgpu::PipelineLayout pipelineLayout = utils::MakePipelineLayout(device, {bglayout}); EXPECT_NE(nullptr, pipelineLayout.Get()); }); @@ -177,7 +487,7 @@ TEST_P(MultithreadCachingTests, RefAndReleaseCachedRenderPipelinesInParallel) { renderPipelineDescriptor.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm; renderPipelineDescriptor.primitive.topology = wgpu::PrimitiveTopology::PointList; - RunInParallel(100, [&, this](uint32_t) { + utils::RunInParallel(100, [&, this](uint32_t) { wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDescriptor); EXPECT_NE(nullptr, pipeline.Get()); }); @@ -187,7 +497,7 @@ TEST_P(MultithreadCachingTests, RefAndReleaseCachedRenderPipelinesInParallel) { // on multiple threads won't race. TEST_P(MultithreadCachingTests, RefAndReleaseCachedSamplersInParallel) { wgpu::SamplerDescriptor desc = {}; - RunInParallel(100, [&, this](uint32_t) { + utils::RunInParallel(100, [&, this](uint32_t) { wgpu::Sampler sampler = device.CreateSampler(&desc); EXPECT_NE(nullptr, sampler.Get()); }); @@ -213,7 +523,7 @@ TEST_P(MultithreadEncodingTests, RenderPassEncodersInParallel) { std::vector commandBuffers(kNumThreads); - RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) { + utils::RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); // Clear the renderTarget to red. @@ -263,7 +573,7 @@ TEST_P(MultithreadEncodingTests, ComputePassEncodersInParallel) { std::vector commandBuffers(kNumThreads); - RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) { + utils::RunInParallel(kNumThreads, [=, &commandBuffers](uint32_t index) { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); pass.SetPipeline(pipeline); @@ -284,6 +594,494 @@ TEST_P(MultithreadEncodingTests, ComputePassEncodersInParallel) { } } +class MultithreadTextureCopyTests : public MultithreadTests { + protected: + void SetUp() override { + MultithreadTests::SetUp(); + + // TODO(crbug.com/dawn/1291): These tests are failing on GLES (both native and ANGLE) + // when using Tint/GLSL. + DAWN_TEST_UNSUPPORTED_IF(IsOpenGLES()); + } + + wgpu::Texture CreateAndWriteTexture(uint32_t width, + uint32_t height, + wgpu::TextureFormat format, + wgpu::TextureUsage usage, + const void* data, + size_t dataSize) { + auto texture = CreateTexture(width, height, format, wgpu::TextureUsage::CopyDst | usage); + + wgpu::Extent3D textureSize = {width, height, 1}; + + wgpu::ImageCopyTexture imageCopyTexture = + utils::CreateImageCopyTexture(texture, 0, {0, 0, 0}, wgpu::TextureAspect::All); + wgpu::TextureDataLayout textureDataLayout = + utils::CreateTextureDataLayout(0, dataSize / height); + + queue.WriteTexture(&imageCopyTexture, data, dataSize, &textureDataLayout, &textureSize); + + return texture; + } + + uint32_t BufferSizeForTextureCopy(uint32_t width, uint32_t height, wgpu::TextureFormat format) { + uint32_t bytesPerRow = utils::GetMinimumBytesPerRow(format, width); + return utils::RequiredBytesInCopy(bytesPerRow, height, {width, height, 1}, format); + } + + void CopyTextureToTextureHelper( + const wgpu::Texture& srcTexture, + const wgpu::ImageCopyTexture& dst, + const wgpu::Extent3D& dstSize, + const wgpu::CommandEncoder& encoder, + const wgpu::CopyTextureForBrowserOptions* copyForBrowerOptions = nullptr) { + wgpu::ImageCopyTexture srcView = + utils::CreateImageCopyTexture(srcTexture, 0, {0, 0, 0}, wgpu::TextureAspect::All); + + if (copyForBrowerOptions == nullptr) { + encoder.CopyTextureToTexture(&srcView, &dst, &dstSize); + + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + } else { + // Don't need encoder + ASSERT(encoder == nullptr); + queue.CopyTextureForBrowser(&srcView, &dst, &dstSize, copyForBrowerOptions); + } + } + + void CopyBufferToTextureHelper(const wgpu::Buffer& srcBuffer, + uint32_t srcBytesPerRow, + const wgpu::ImageCopyTexture& dst, + const wgpu::Extent3D& dstSize, + const wgpu::CommandEncoder& encoder) { + wgpu::ImageCopyBuffer srcView = + utils::CreateImageCopyBuffer(srcBuffer, 0, srcBytesPerRow, dstSize.height); + + encoder.CopyBufferToTexture(&srcView, &dst, &dstSize); + + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + } +}; + +// Test that depth texture's CopyTextureToTexture() can work in parallel with other commands (such +// resources creation and texture to buffer copy for texture expectations). +// This test is needed since most of command encoder's commands are not synchronized, but +// CopyTextureToTexture() command might internally allocate resources and we need to make sure that +// it won't race with other threads' works. +TEST_P(MultithreadTextureCopyTests, CopyDepthToDepthNoRace) { + enum class Step { + Begin, + WriteTexture, + }; + + constexpr uint32_t kWidth = 4; + constexpr uint32_t kHeight = 4; + + const std::vector kExpectedData32 = { + 0, 0, 0, 0, // + 0, 0, 0.4f, 0, // + 1.0f, 1.0f, 0, 0, // + 1.0f, 1.0f, 0, 0, // + }; + + std::vector kExpectedData16(kExpectedData32.size()); + for (size_t i = 0; i < kExpectedData32.size(); ++i) { + kExpectedData16[i] = kExpectedData32[i] * std::numeric_limits::max(); + } + + const size_t kExpectedDataSize16 = kExpectedData16.size() * sizeof(kExpectedData16[0]); + + LockStep lockStep(Step::Begin); + + wgpu::Texture depthTexture; + std::thread writeThread([&] { + depthTexture = CreateAndWriteTexture( + kWidth, kHeight, wgpu::TextureFormat::Depth16Unorm, + wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::RenderAttachment, + kExpectedData16.data(), kExpectedDataSize16); + + lockStep.Signal(Step::WriteTexture); + + // Verify the initial data + ExpectAttachmentDepthTestData(depthTexture, wgpu::TextureFormat::Depth16Unorm, kWidth, + kHeight, 0, /*mipLevel=*/0, kExpectedData32); + }); + + std::thread copyThread([&] { + auto destTexture = + CreateTexture(kWidth * 2, kHeight * 2, wgpu::TextureFormat::Depth16Unorm, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopyDst | + wgpu::TextureUsage::CopySrc, + /*mipLevelCount=*/2); + + // Copy from depthTexture to destTexture. + const wgpu::Extent3D dstSize = {kWidth, kHeight, 1}; + wgpu::ImageCopyTexture dest = utils::CreateImageCopyTexture( + destTexture, /*dstMipLevel=*/1, {0, 0, 0}, wgpu::TextureAspect::All); + auto encoder = device.CreateCommandEncoder(); + lockStep.Wait(Step::WriteTexture); + CopyTextureToTextureHelper(depthTexture, dest, dstSize, encoder); + + // Verify the copied data + ExpectAttachmentDepthTestData(destTexture, wgpu::TextureFormat::Depth16Unorm, kWidth, + kHeight, 0, /*mipLevel=*/1, kExpectedData32); + }); + + writeThread.join(); + copyThread.join(); +} + +// Test that depth texture's CopyBufferToTexture() can work in parallel with other commands (such +// resources creation and texture to buffer copy for texture expectations). +// This test is needed since most of command encoder's commands are not synchronized, but +// CopyBufferToTexture() command might internally allocate resources and we need to make sure that +// it won't race with other threads' works. +TEST_P(MultithreadTextureCopyTests, CopyBufferToDepthNoRace) { + enum class Step { + Begin, + WriteBuffer, + }; + + constexpr uint32_t kWidth = 16; + constexpr uint32_t kHeight = 1; + + const std::vector kExpectedData32 = { + 0, 0, 0, 0, // + 0, 0, 0.4f, 0, // + 1.0f, 1.0f, 0, 0, // + 1.0f, 1.0f, 0, 0, // + }; + + std::vector kExpectedData16(kExpectedData32.size()); + for (size_t i = 0; i < kExpectedData32.size(); ++i) { + kExpectedData16[i] = kExpectedData32[i] * std::numeric_limits::max(); + } + + const uint32_t kExpectedDataSize16 = kExpectedData16.size() * sizeof(kExpectedData16[0]); + + const wgpu::Extent3D kSize = {kWidth, kHeight, 1}; + LockStep lockStep(Step::Begin); + + wgpu::Buffer buffer; + std::thread writeThread([&] { + buffer = CreateBuffer( + BufferSizeForTextureCopy(kWidth, kHeight, wgpu::TextureFormat::Depth16Unorm), + wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc); + + queue.WriteBuffer(buffer, 0, kExpectedData16.data(), kExpectedDataSize16); + device.Tick(); + + lockStep.Signal(Step::WriteBuffer); + + EXPECT_BUFFER_U16_RANGE_EQ(kExpectedData16.data(), buffer, 0, kExpectedData16.size()); + }); + + std::thread copyThread([&] { + auto destTexture = + CreateTexture(kWidth, kHeight, wgpu::TextureFormat::Depth16Unorm, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopyDst | + wgpu::TextureUsage::CopySrc); + + auto encoder = device.CreateCommandEncoder(); + + wgpu::ImageCopyTexture dest = utils::CreateImageCopyTexture( + destTexture, /*dstMipLevel=*/0, {0, 0, 0}, wgpu::TextureAspect::All); + + // Wait until src buffer is written. + lockStep.Wait(Step::WriteBuffer); + CopyBufferToTextureHelper(buffer, kTextureBytesPerRowAlignment, dest, kSize, encoder); + + // Verify the copied data + ExpectAttachmentDepthTestData(destTexture, wgpu::TextureFormat::Depth16Unorm, kWidth, + kHeight, 0, /*mipLevel=*/0, kExpectedData32); + }); + + writeThread.join(); + copyThread.join(); +} + +// Test that stencil texture's CopyTextureToTexture() can work in parallel with other commands (such +// resources creation and texture to buffer copy for texture expectations). +// This test is needed since most of command encoder's commands are not synchronized, but +// CopyTextureToTexture() command might internally allocate resources and we need to make sure that +// it won't race with other threads' works. +TEST_P(MultithreadTextureCopyTests, CopyStencilToStencilNoRace) { + // TODO(crbug.com/dawn/1497): glReadPixels: GL error: HIGH: Invalid format and type + // combination. + DAWN_SUPPRESS_TEST_IF(IsANGLE()); + + // TODO(crbug.com/dawn/667): Work around the fact that some platforms are unable to read + // stencil. + DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("disable_depth_stencil_read")); + + enum class Step { + Begin, + WriteTexture, + }; + + constexpr uint32_t kWidth = 1; + constexpr uint32_t kHeight = 1; + + constexpr uint8_t kExpectedData = 177; + constexpr size_t kExpectedDataSize = sizeof(kExpectedData); + + LockStep lockStep(Step::Begin); + + wgpu::Texture stencilTexture; + std::thread writeThread([&] { + stencilTexture = CreateAndWriteTexture( + kWidth, kHeight, wgpu::TextureFormat::Stencil8, + wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::RenderAttachment, &kExpectedData, + kExpectedDataSize); + + lockStep.Signal(Step::WriteTexture); + + // Verify the initial data + ExpectAttachmentStencilTestData(stencilTexture, wgpu::TextureFormat::Stencil8, kWidth, + kHeight, 0, /*mipLevel=*/0, kExpectedData); + }); + + std::thread copyThread([&] { + auto destTexture = + CreateTexture(kWidth * 2, kHeight * 2, wgpu::TextureFormat::Stencil8, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopyDst | + wgpu::TextureUsage::CopySrc, + /*mipLevelCount=*/2); + + // Copy from stencilTexture to destTexture. + const wgpu::Extent3D dstSize = {kWidth, kHeight, 1}; + wgpu::ImageCopyTexture dest = utils::CreateImageCopyTexture( + destTexture, /*dstMipLevel=*/1, {0, 0, 0}, wgpu::TextureAspect::All); + auto encoder = device.CreateCommandEncoder(); + lockStep.Wait(Step::WriteTexture); + + CopyTextureToTextureHelper(stencilTexture, dest, dstSize, encoder); + + // Verify the copied data + ExpectAttachmentStencilTestData(destTexture, wgpu::TextureFormat::Stencil8, kWidth, kHeight, + 0, /*mipLevel=*/1, kExpectedData); + }); + + writeThread.join(); + copyThread.join(); +} + +// Test that stencil texture's CopyBufferToTexture() can work in parallel with other commands (such +// resources creation and texture to buffer copy for texture expectations). +// This test is needed since most of command encoder's commands are not synchronized, but +// CopyBufferToTexture() command might internally allocate resources and we need to make sure that +// it won't race with other threads' works. +TEST_P(MultithreadTextureCopyTests, CopyBufferToStencilNoRace) { + enum class Step { + Begin, + WriteBuffer, + }; + + constexpr uint32_t kWidth = 1; + constexpr uint32_t kHeight = 1; + + constexpr uint8_t kExpectedData = 177; + + const wgpu::Extent3D kSize = {kWidth, kHeight, 1}; + LockStep lockStep(Step::Begin); + + wgpu::Buffer buffer; + std::thread writeThread([&] { + const auto kBufferSize = kTextureBytesPerRowAlignment; + buffer = CreateBuffer(kBufferSize, wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc); + + std::vector bufferData(kBufferSize); + bufferData[0] = kExpectedData; + + queue.WriteBuffer(buffer.Get(), 0, bufferData.data(), kBufferSize); + device.Tick(); + + lockStep.Signal(Step::WriteBuffer); + + EXPECT_BUFFER_U8_EQ(kExpectedData, buffer, 0); + }); + + std::thread copyThread([&] { + auto destTexture = + CreateTexture(kWidth, kHeight, wgpu::TextureFormat::Stencil8, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopyDst | + wgpu::TextureUsage::CopySrc); + + auto encoder = device.CreateCommandEncoder(); + + wgpu::ImageCopyTexture dest = utils::CreateImageCopyTexture( + destTexture, /*dstMipLevel=*/0, {0, 0, 0}, wgpu::TextureAspect::All); + + // Wait until src buffer is written. + lockStep.Wait(Step::WriteBuffer); + CopyBufferToTextureHelper(buffer, kTextureBytesPerRowAlignment, dest, kSize, encoder); + + // Verify the copied data + ExpectAttachmentStencilTestData(destTexture, wgpu::TextureFormat::Stencil8, kWidth, kHeight, + 0, /*mipLevel=*/0, kExpectedData); + }); + + writeThread.join(); + copyThread.join(); +} + +// Test that color texture's CopyTextureForBrowser() can work in parallel with other commands (such +// resources creation and texture to buffer copy for texture expectations). +// This test is needed since CopyTextureForBrowser() command might internally allocate resources and +// we need to make sure that it won't race with other threads' works. +TEST_P(MultithreadTextureCopyTests, CopyTextureForBrowserNoRace) { + // TODO(crbug.com/dawn/1232): Program link error on OpenGLES backend + DAWN_SUPPRESS_TEST_IF(IsOpenGLES()); + DAWN_SUPPRESS_TEST_IF(IsOpenGL() && IsLinux()); + + enum class Step { + Begin, + WriteTexture, + }; + + constexpr uint32_t kWidth = 4; + constexpr uint32_t kHeight = 4; + + const std::vector kExpectedData = { + utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kGreen, utils::RGBA8::kBlack, // + utils::RGBA8::kRed, utils::RGBA8::kRed, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + utils::RGBA8::kRed, utils::RGBA8::kBlue, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + }; + + const std::vector kExpectedFlippedData = { + utils::RGBA8::kRed, utils::RGBA8::kBlue, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + utils::RGBA8::kRed, utils::RGBA8::kRed, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kGreen, utils::RGBA8::kBlack, // + utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + }; + + const size_t kExpectedDataSize = kExpectedData.size() * sizeof(kExpectedData[0]); + + LockStep lockStep(Step::Begin); + + wgpu::Texture srcTexture; + std::thread writeThread([&] { + srcTexture = + CreateAndWriteTexture(kWidth, kHeight, wgpu::TextureFormat::RGBA8Unorm, + wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::TextureBinding, + kExpectedData.data(), kExpectedDataSize); + + lockStep.Signal(Step::WriteTexture); + + // Verify the initial data + EXPECT_TEXTURE_EQ(kExpectedData.data(), srcTexture, {0, 0}, {kWidth, kHeight}); + }); + + std::thread copyThread([&] { + auto destTexture = + CreateTexture(kWidth, kHeight, wgpu::TextureFormat::RGBA8Unorm, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopyDst | + wgpu::TextureUsage::CopySrc); + + // Copy from srcTexture to destTexture. + const wgpu::Extent3D dstSize = {kWidth, kHeight, 1}; + wgpu::ImageCopyTexture dest = utils::CreateImageCopyTexture( + destTexture, /*dstMipLevel=*/0, {0, 0, 0}, wgpu::TextureAspect::All); + wgpu::CopyTextureForBrowserOptions options; + options.flipY = true; + + lockStep.Wait(Step::WriteTexture); + CopyTextureToTextureHelper(srcTexture, dest, dstSize, nullptr, &options); + + // Verify the copied data + EXPECT_TEXTURE_EQ(kExpectedFlippedData.data(), destTexture, {0, 0}, {kWidth, kHeight}); + }); + + writeThread.join(); + copyThread.join(); +} + +// Test that error from CopyTextureForBrowser() won't cause deadlock. +TEST_P(MultithreadTextureCopyTests, CopyTextureForBrowserErrorNoDeadLock) { + // TODO(crbug.com/dawn/1232): Program link error on OpenGLES backend + DAWN_SUPPRESS_TEST_IF(IsOpenGLES()); + DAWN_SUPPRESS_TEST_IF(IsOpenGL() && IsLinux()); + + DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("skip_validation")); + + enum class Step { + Begin, + WriteTexture, + }; + + constexpr uint32_t kWidth = 4; + constexpr uint32_t kHeight = 4; + + const std::vector kExpectedData = { + utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + utils::RGBA8::kBlack, utils::RGBA8::kBlack, utils::RGBA8::kGreen, utils::RGBA8::kBlack, // + utils::RGBA8::kRed, utils::RGBA8::kRed, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + utils::RGBA8::kRed, utils::RGBA8::kBlue, utils::RGBA8::kBlack, utils::RGBA8::kBlack, // + }; + + const size_t kExpectedDataSize = kExpectedData.size() * sizeof(kExpectedData[0]); + + LockStep lockStep(Step::Begin); + + wgpu::Texture srcTexture; + std::thread writeThread([&] { + srcTexture = + CreateAndWriteTexture(kWidth, kHeight, wgpu::TextureFormat::RGBA8Unorm, + wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::TextureBinding, + kExpectedData.data(), kExpectedDataSize); + + lockStep.Signal(Step::WriteTexture); + + // Verify the initial data + EXPECT_TEXTURE_EQ(kExpectedData.data(), srcTexture, {0, 0}, {kWidth, kHeight}); + }); + + std::thread copyThread([&] { + wgpu::Texture invalidSrcTexture; + invalidSrcTexture = CreateTexture(kWidth, kHeight, wgpu::TextureFormat::RGBA8Unorm, + wgpu::TextureUsage::CopySrc); + auto destTexture = + CreateTexture(kWidth, kHeight, wgpu::TextureFormat::RGBA8Unorm, + wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopyDst | + wgpu::TextureUsage::CopySrc); + + // Copy from srcTexture to destTexture. + const wgpu::Extent3D dstSize = {kWidth, kHeight, 1}; + wgpu::ImageCopyTexture dest = utils::CreateImageCopyTexture( + destTexture, /*dstMipLevel=*/0, {0, 0, 0}, wgpu::TextureAspect::All); + wgpu::CopyTextureForBrowserOptions options = {}; + + device.PushErrorScope(wgpu::ErrorFilter::Validation); + + // The first copy should be an error because of missing TextureBinding from src texture. + lockStep.Wait(Step::WriteTexture); + CopyTextureToTextureHelper(invalidSrcTexture, dest, dstSize, nullptr, &options); + + std::atomic errorThrown(false); + device.PopErrorScope( + [](WGPUErrorType type, char const* message, void* userdata) { + EXPECT_EQ(type, WGPUErrorType_Validation); + auto error = static_cast*>(userdata); + *error = true; + }, + &errorThrown); + device.Tick(); + EXPECT_TRUE(errorThrown.load()); + + // Second copy is valid. + CopyTextureToTextureHelper(srcTexture, dest, dstSize, nullptr, &options); + + // Verify the copied data + EXPECT_TEXTURE_EQ(kExpectedData.data(), destTexture, {0, 0}, {kWidth, kHeight}); + }); + + writeThread.join(); + copyThread.join(); +} + class MultithreadDrawIndexedIndirectTests : public MultithreadTests { protected: void SetUp() override { @@ -376,8 +1174,8 @@ class MultithreadDrawIndexedIndirectTests : public MultithreadTests { wgpu::CommandBuffer commands) { queue.Submit(1, &commands); - LOCKED_CMD(EXPECT_PIXEL_RGBA8_EQ(bottomLeftExpected, renderPass.color, 1, 3)); - LOCKED_CMD(EXPECT_PIXEL_RGBA8_EQ(topRightExpected, renderPass.color, 3, 1)); + EXPECT_PIXEL_RGBA8_EQ(bottomLeftExpected, renderPass.color, 1, 3); + EXPECT_PIXEL_RGBA8_EQ(topRightExpected, renderPass.color, 3, 1); } wgpu::RenderPipeline pipeline; @@ -397,7 +1195,7 @@ TEST_P(MultithreadDrawIndexedIndirectTests, IndirectOffsetInParallel) { utils::RGBA8 filled(0, 255, 0, 255); utils::RGBA8 notFilled(0, 0, 0, 0); - RunInParallel(10, [=](uint32_t) { + utils::RunInParallel(10, [=](uint32_t) { // Test an offset draw call, with indirect buffer containing 2 calls: // 1) first 3 indices of the second quad (top right triangle) // 2) last 3 indices of the second quad @@ -473,7 +1271,7 @@ TEST_P(MultithreadTimestampQueryTests, ResolveQuerySets_InParallel) { destinations[i] = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); } - RunInParallel(kNumThreads, [&](uint32_t index) { + utils::RunInParallel(kNumThreads, [&](uint32_t index) { const auto& querySet = querySets[index]; const auto& destination = destinations[index]; wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); @@ -483,13 +1281,19 @@ TEST_P(MultithreadTimestampQueryTests, ResolveQuerySets_InParallel) { wgpu::CommandBuffer commands = encoder.Finish(); queue.Submit(1, &commands); - LOCKED_CMD(EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), - new TimestampExpectation)); + EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); }); } } // namespace +DAWN_INSTANTIATE_TEST(MultithreadTests, + D3D12Backend(), + MetalBackend(), + OpenGLBackend(), + OpenGLESBackend(), + VulkanBackend()); + DAWN_INSTANTIATE_TEST(MultithreadCachingTests, D3D12Backend(), MetalBackend(), @@ -504,6 +1308,17 @@ DAWN_INSTANTIATE_TEST(MultithreadEncodingTests, OpenGLESBackend(), VulkanBackend()); +DAWN_INSTANTIATE_TEST( + MultithreadTextureCopyTests, + D3D12Backend(), + MetalBackend(), + MetalBackend({"use_blit_for_buffer_to_depth_texture_copy", + "use_blit_for_depth_texture_to_texture_copy_to_nonzero_subresource"}), + MetalBackend({"use_blit_for_buffer_to_stencil_texture_copy"}), + OpenGLBackend(), + OpenGLESBackend(), + VulkanBackend()); + DAWN_INSTANTIATE_TEST(MultithreadDrawIndexedIndirectTests, D3D12Backend(), MetalBackend(), diff --git a/src/dawn/utils/TestUtils.cpp b/src/dawn/utils/TestUtils.cpp index 3216402abf..bf7b247802 100644 --- a/src/dawn/utils/TestUtils.cpp +++ b/src/dawn/utils/TestUtils.cpp @@ -13,7 +13,9 @@ // limitations under the License. #include +#include #include +#include #include #include "dawn/common/Assert.h" @@ -190,4 +192,22 @@ uint32_t VertexFormatSize(wgpu::VertexFormat format) { UNREACHABLE(); } +void RunInParallel(uint32_t numThreads, + const std::function& workerFunc, + const std::function& mainThreadFunc) { + std::vector> threads(numThreads); + + for (uint32_t i = 0; i < threads.size(); ++i) { + threads[i] = std::make_unique([i, workerFunc] { workerFunc(i); }); + } + + if (mainThreadFunc != nullptr) { + mainThreadFunc(); + } + + for (auto& thread : threads) { + thread->join(); + } +} + } // namespace utils diff --git a/src/dawn/utils/TestUtils.h b/src/dawn/utils/TestUtils.h index 5c359b75b7..9654b72116 100644 --- a/src/dawn/utils/TestUtils.h +++ b/src/dawn/utils/TestUtils.h @@ -15,6 +15,7 @@ #ifndef SRC_DAWN_UTILS_TESTUTILS_H_ #define SRC_DAWN_UTILS_TESTUTILS_H_ +#include #include #include "dawn/webgpu_cpp.h" @@ -84,6 +85,10 @@ void UnalignDynamicUploader(wgpu::Device device); uint32_t VertexFormatSize(wgpu::VertexFormat format); +void RunInParallel(uint32_t numThreads, + const std::function& workerFunc, + const std::function& mainThreadFunc = nullptr); + } // namespace utils #endif // SRC_DAWN_UTILS_TESTUTILS_H_