Vulkan: Check for device loss in CheckAndUpdateCompletedSerials

Bug: chromium:1195645
Bug: chromium:1195693
Change-Id: I3c25a64af87a60f40030094dd73b13a035a7876c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/46625
Auto-Submit: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Jiawei Shao <jiawei.shao@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
This commit is contained in:
Corentin Wallez 2021-04-07 18:09:21 +00:00 committed by Commit Bot service account
parent 44771b3567
commit 6870e6d78d
13 changed files with 36 additions and 32 deletions

View File

@ -402,8 +402,9 @@ namespace dawn_native {
}
}
void DeviceBase::CheckPassedSerials() {
ExecutionSerial completedSerial = CheckAndUpdateCompletedSerials();
MaybeError DeviceBase::CheckPassedSerials() {
ExecutionSerial completedSerial;
DAWN_TRY_ASSIGN(completedSerial, CheckAndUpdateCompletedSerials());
ASSERT(completedSerial <= mLastSubmittedSerial);
// completedSerial should not be less than mCompletedSerial unless it is 0.
@ -413,6 +414,8 @@ namespace dawn_native {
if (completedSerial > mCompletedSerial) {
mCompletedSerial = completedSerial;
}
return {};
}
ResultOrError<const Format*> DeviceBase::GetInternalFormat(wgpu::TextureFormat format) const {
@ -932,8 +935,7 @@ namespace dawn_native {
// 1. the last submitted serial has moved beyond the completed serial
// 2. or the completed serial has not reached the future serial set by the trackers
if (mLastSubmittedSerial > mCompletedSerial || mCompletedSerial < mFutureSerial) {
CheckPassedSerials();
DAWN_TRY(CheckPassedSerials());
DAWN_TRY(TickImpl());
// There is no GPU work in flight, we need to move the serials forward so that

View File

@ -243,7 +243,7 @@ namespace dawn_native {
// reaching the serial the work will be executed on.
void AddFutureSerial(ExecutionSerial serial);
// Check for passed fences and set the new completed serial
void CheckPassedSerials();
MaybeError CheckPassedSerials();
MaybeError Tick();
@ -336,7 +336,7 @@ namespace dawn_native {
// Each backend should implement to check their passed fences if there are any and return a
// completed serial. Return 0 should indicate no fences to check.
virtual ExecutionSerial CheckAndUpdateCompletedSerials() = 0;
virtual ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() = 0;
// During shut down of device, some operations might have been started since the last submit
// and waiting on a serial that doesn't have a corresponding fence enqueued. Fake serials to
// make all commands look completed.

View File

@ -269,17 +269,17 @@ namespace dawn_native { namespace d3d12 {
}
MaybeError Device::WaitForSerial(ExecutionSerial serial) {
CheckPassedSerials();
DAWN_TRY(CheckPassedSerials());
if (GetCompletedCommandSerial() < serial) {
DAWN_TRY(CheckHRESULT(mFence->SetEventOnCompletion(uint64_t(serial), mFenceEvent),
"D3D12 set event on completion"));
WaitForSingleObject(mFenceEvent, INFINITE);
CheckPassedSerials();
DAWN_TRY(CheckPassedSerials());
}
return {};
}
ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
ExecutionSerial completeSerial = ExecutionSerial(mFence->GetCompletedValue());
if (completeSerial <= GetCompletedCommandSerial()) {

View File

@ -181,7 +181,7 @@ namespace dawn_native { namespace d3d12 {
ComPtr<ID3D12Fence> mFence;
HANDLE mFenceEvent = nullptr;
ExecutionSerial CheckAndUpdateCompletedSerials() override;
ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
ComPtr<ID3D12Device> mD3d12Device; // Device is owned by adapter and will not be outlived.
ComPtr<ID3D12CommandQueue> mCommandQueue;

View File

@ -112,7 +112,7 @@ namespace dawn_native { namespace metal {
void InitTogglesFromDriver();
void ShutDownImpl() override;
MaybeError WaitForIdleForDestruction() override;
ExecutionSerial CheckAndUpdateCompletedSerials() override;
ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
NSPRef<id<MTLDevice>> mMtlDevice;
NSPRef<id<MTLCommandQueue>> mCommandQueue;

View File

@ -176,7 +176,7 @@ namespace dawn_native { namespace metal {
return TextureView::Create(texture, descriptor);
}
ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
uint64_t frontendCompletedSerial{GetCompletedCommandSerial()};
if (frontendCompletedSerial > mCompletedSerial) {
// sometimes we increase the serials, in which case the completed serial in
@ -375,12 +375,12 @@ namespace dawn_native { namespace metal {
MaybeError Device::WaitForIdleForDestruction() {
// Forget all pending commands.
mCommandContext.AcquireCommands();
CheckPassedSerials();
DAWN_TRY(CheckPassedSerials());
// Wait for all commands to be finished so we can free resources
while (GetCompletedCommandSerial() != GetLastSubmittedCommandSerial()) {
usleep(100);
CheckPassedSerials();
DAWN_TRY(CheckPassedSerials());
}
return {};

View File

@ -218,25 +218,27 @@ namespace dawn_native { namespace null {
}
MaybeError Device::TickImpl() {
SubmitPendingOperations();
return {};
return SubmitPendingOperations();
}
ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
return GetLastSubmittedCommandSerial();
}
void Device::AddPendingOperation(std::unique_ptr<PendingOperation> operation) {
mPendingOperations.emplace_back(std::move(operation));
}
void Device::SubmitPendingOperations() {
MaybeError Device::SubmitPendingOperations() {
for (auto& operation : mPendingOperations) {
operation->Execute();
}
mPendingOperations.clear();
CheckPassedSerials();
DAWN_TRY(CheckPassedSerials());
IncrementLastSubmittedCommandSerial();
return {};
}
// BindGroupDataHolder
@ -342,8 +344,7 @@ namespace dawn_native { namespace null {
// for testing purposes we should also tick in the null implementation.
DAWN_TRY(device->Tick());
device->SubmitPendingOperations();
return {};
return device->SubmitPendingOperations();
}
MaybeError Queue::WriteBufferImpl(BufferBase* buffer,

View File

@ -98,7 +98,7 @@ namespace dawn_native { namespace null {
MaybeError TickImpl() override;
void AddPendingOperation(std::unique_ptr<PendingOperation> operation);
void SubmitPendingOperations();
MaybeError SubmitPendingOperations();
ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) override;
MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
@ -153,7 +153,7 @@ namespace dawn_native { namespace null {
TextureBase* texture,
const TextureViewDescriptor* descriptor) override;
ExecutionSerial CheckAndUpdateCompletedSerials() override;
ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
void ShutDownImpl() override;
MaybeError WaitForIdleForDestruction() override;

View File

@ -180,7 +180,7 @@ namespace dawn_native { namespace opengl {
return {};
}
ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
ExecutionSerial fenceSerial{0};
while (!mFencesInFlight.empty()) {
GLsync sync = mFencesInFlight.front().first;
@ -234,7 +234,7 @@ namespace dawn_native { namespace opengl {
MaybeError Device::WaitForIdleForDestruction() {
gl.Finish();
CheckPassedSerials();
DAWN_TRY(CheckPassedSerials());
ASSERT(mFencesInFlight.empty());
return {};

View File

@ -109,7 +109,7 @@ namespace dawn_native { namespace opengl {
const TextureViewDescriptor* descriptor) override;
void InitTogglesFromDriver();
ExecutionSerial CheckAndUpdateCompletedSerials() override;
ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
void ShutDownImpl() override;
MaybeError WaitForIdleForDestruction() override;

View File

@ -513,21 +513,22 @@ namespace dawn_native { namespace vulkan {
return fence;
}
ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
ExecutionSerial fenceSerial(0);
while (!mFencesInFlight.empty()) {
VkFence fence = mFencesInFlight.front().first;
ExecutionSerial tentativeSerial = mFencesInFlight.front().second;
VkResult result = VkResult::WrapUnsafe(
INJECT_ERROR_OR_RUN(fn.GetFenceStatus(mVkDevice, fence), VK_ERROR_DEVICE_LOST));
// TODO: Handle DeviceLost error.
ASSERT(result == VK_SUCCESS || result == VK_NOT_READY);
// Fence are added in order, so we can stop searching as soon
// as we see one that's not ready.
if (result == VK_NOT_READY) {
return fenceSerial;
} else {
DAWN_TRY(CheckVkSuccess(::VkResult(result), "GetFenceStatus"));
}
// Update fenceSerial since fence is ready.
fenceSerial = tentativeSerial;

View File

@ -174,7 +174,7 @@ namespace dawn_native { namespace vulkan {
std::unique_ptr<external_semaphore::Service> mExternalSemaphoreService;
ResultOrError<VkFence> GetUnusedFence();
ExecutionSerial CheckAndUpdateCompletedSerials() override;
ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
// We track which operations are in flight on the GPU with an increasing serial.
// This works only because we have a single queue. Each submit to a queue is associated

View File

@ -245,7 +245,7 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
heaps.push_back(heap);
// CheckPassedSerials() will update the last internally completed serial.
mD3DDevice->CheckPassedSerials();
EXPECT_TRUE(mD3DDevice->CheckPassedSerials().IsSuccess());
// NextSerial() will increment the last internally submitted serial.
EXPECT_TRUE(mD3DDevice->NextSerial().IsSuccess());
}
@ -258,7 +258,7 @@ TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
EXPECT_TRUE(heaps.front() == heap);
heaps.pop_front();
mD3DDevice->CheckPassedSerials();
EXPECT_TRUE(mD3DDevice->CheckPassedSerials().IsSuccess());
EXPECT_TRUE(mD3DDevice->NextSerial().IsSuccess());
}