From 8dbe22c65ac2db6ca353a5947f01aed9476339a6 Mon Sep 17 00:00:00 2001 From: James Price Date: Thu, 5 May 2022 22:14:20 +0000 Subject: [PATCH] Surface Vulkan validation messages in errors This change ensures that when errors are raised from WebGPU, including lost context errors, they can include any associated Vulkan validation layer messages in the message text if backend validation is enabled. This will allow these messages to be surfaced in places like when running CTS tests on browser build infrastructure. Because of how Vulkan messages are raised at the instance level, some routing of the messages to the appropriate device needed to be added. This is based on appending message IDs to object labels and then parsing them out of the message when it comes in. This change also makes Vulkan validation messages cause a device loss, which is inline with the behavior of the D3D12 backend. Bug: dawn:1396 Change-Id: I2b309ba5475a338e7e99d4b4daaa7c3d5de61011 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/88460 Reviewed-by: Kai Ninomiya Commit-Queue: Brandon Jones --- src/dawn/native/vulkan/BackendVk.cpp | 52 ++++++++++++++++++++++++-- src/dawn/native/vulkan/BackendVk.h | 16 ++++++++ src/dawn/native/vulkan/DeviceVk.cpp | 37 +++++++++++++++++- src/dawn/native/vulkan/DeviceVk.h | 13 +++++++ src/dawn/native/vulkan/UtilsVulkan.cpp | 43 +++++++++++++++++---- src/dawn/native/vulkan/UtilsVulkan.h | 3 ++ 6 files changed, 151 insertions(+), 13 deletions(-) diff --git a/src/dawn/native/vulkan/BackendVk.cpp b/src/dawn/native/vulkan/BackendVk.cpp index 1f34bcb258..bc8efa9756 100644 --- a/src/dawn/native/vulkan/BackendVk.cpp +++ b/src/dawn/native/vulkan/BackendVk.cpp @@ -23,6 +23,7 @@ #include "dawn/native/Instance.h" #include "dawn/native/VulkanBackend.h" #include "dawn/native/vulkan/AdapterVk.h" +#include "dawn/native/vulkan/DeviceVk.h" #include "dawn/native/vulkan/UtilsVulkan.h" #include "dawn/native/vulkan/VulkanError.h" @@ -108,10 +109,33 @@ VKAPI_ATTR VkBool32 VKAPI_CALL OnDebugUtilsCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT /* messageTypes */, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, - void* /* pUserData */) { + void* pUserData) { if (ShouldReportDebugMessage(pCallbackData->pMessageIdName, pCallbackData->pMessage)) { - dawn::WarningLog() << pCallbackData->pMessage; - ASSERT((messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) == 0); + if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { + dawn::ErrorLog() << pCallbackData->pMessage; + + if (pUserData != nullptr) { + // Look through all the object labels attached to the debug message and try to parse + // a device debug prefix out of one of them. If a debug prefix is found and matches + // a registered device, forward the message on to it. + for (uint32_t i = 0; i < pCallbackData->objectCount; ++i) { + const VkDebugUtilsObjectNameInfoEXT& object = pCallbackData->pObjects[i]; + std::string deviceDebugPrefix = + GetDeviceDebugPrefixFromDebugName(object.pObjectName); + if (deviceDebugPrefix.empty()) { + continue; + } + + VulkanInstance* instance = reinterpret_cast(pUserData); + if (instance->HandleDeviceMessage(std::move(deviceDebugPrefix), + pCallbackData->pMessage)) { + return VK_FALSE; + } + } + } + } else { + dawn::WarningLog() << pCallbackData->pMessage; + } } return VK_FALSE; } @@ -133,6 +157,8 @@ OnInstanceCreationDebugUtilsCallback(VkDebugUtilsMessageSeverityFlagBitsEXT mess VulkanInstance::VulkanInstance() = default; VulkanInstance::~VulkanInstance() { + ASSERT(mMessageListenerDevices.empty()); + if (mDebugUtilsMessenger != VK_NULL_HANDLE) { mFunctions.DestroyDebugUtilsMessengerEXT(mInstance, mDebugUtilsMessenger, nullptr); mDebugUtilsMessenger = VK_NULL_HANDLE; @@ -373,13 +399,31 @@ MaybeError VulkanInstance::RegisterDebugUtils() { createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; createInfo.pfnUserCallback = OnDebugUtilsCallback; - createInfo.pUserData = nullptr; + createInfo.pUserData = this; return CheckVkSuccess(mFunctions.CreateDebugUtilsMessengerEXT(mInstance, &createInfo, nullptr, &*mDebugUtilsMessenger), "vkCreateDebugUtilsMessengerEXT"); } +void VulkanInstance::StartListeningForDeviceMessages(Device* device) { + std::lock_guard lock(mMessageListenerDevicesMutex); + mMessageListenerDevices.insert({device->GetDebugPrefix(), device}); +} +void VulkanInstance::StopListeningForDeviceMessages(Device* device) { + std::lock_guard lock(mMessageListenerDevicesMutex); + mMessageListenerDevices.erase(device->GetDebugPrefix()); +} +bool VulkanInstance::HandleDeviceMessage(std::string deviceDebugPrefix, std::string message) { + std::lock_guard lock(mMessageListenerDevicesMutex); + auto it = mMessageListenerDevices.find(deviceDebugPrefix); + if (it != mMessageListenerDevices.end()) { + it->second->OnDebugMessage(std::move(message)); + return true; + } + return false; +} + Backend::Backend(InstanceBase* instance) : BackendConnection(instance, wgpu::BackendType::Vulkan) {} Backend::~Backend() = default; diff --git a/src/dawn/native/vulkan/BackendVk.h b/src/dawn/native/vulkan/BackendVk.h index 8567d3a793..7b3e430c03 100644 --- a/src/dawn/native/vulkan/BackendVk.h +++ b/src/dawn/native/vulkan/BackendVk.h @@ -15,6 +15,9 @@ #ifndef SRC_DAWN_NATIVE_VULKAN_BACKENDVK_H_ #define SRC_DAWN_NATIVE_VULKAN_BACKENDVK_H_ +#include +#include +#include #include #include "dawn/native/BackendConnection.h" @@ -32,6 +35,8 @@ enum class ICD { SwiftShader, }; +class Device; + // VulkanInstance holds the reference to the Vulkan library, the VkInstance, VkPhysicalDevices // on that instance, Vulkan functions loaded from the library, and global information // gathered from the instance. VkPhysicalDevices bound to the VkInstance are bound to the GPU @@ -50,6 +55,12 @@ class VulkanInstance : public RefCounted { const VulkanGlobalInfo& GetGlobalInfo() const; const std::vector& GetPhysicalDevices() const; + // TODO(dawn:831): This set of functions guards may need to be adjusted when Dawn is updated + // to support multithreading. + void StartListeningForDeviceMessages(Device* device); + void StopListeningForDeviceMessages(Device* device); + bool HandleDeviceMessage(std::string deviceDebugPrefix, std::string message); + private: VulkanInstance(); @@ -66,6 +77,11 @@ class VulkanInstance : public RefCounted { VkDebugUtilsMessengerEXT mDebugUtilsMessenger = VK_NULL_HANDLE; std::vector mPhysicalDevices; + + // Devices keep the VulkanInstance alive, so as long as devices remove themselves from this + // map on destruction the pointers it contains should remain valid. + std::unordered_map mMessageListenerDevices; + std::mutex mMessageListenerDevicesMutex; }; class Backend : public BackendConnection { diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp index 00cc9d4cb5..8c0ed3b3b3 100644 --- a/src/dawn/native/vulkan/DeviceVk.cpp +++ b/src/dawn/native/vulkan/DeviceVk.cpp @@ -14,11 +14,13 @@ #include "dawn/native/vulkan/DeviceVk.h" +#include "dawn/common/Log.h" #include "dawn/common/Platform.h" #include "dawn/native/BackendConnection.h" #include "dawn/native/ChainUtils_autogen.h" #include "dawn/native/Error.h" #include "dawn/native/ErrorData.h" +#include "dawn/native/Instance.h" #include "dawn/native/VulkanBackend.h" #include "dawn/native/vulkan/AdapterVk.h" #include "dawn/native/vulkan/BackendVk.h" @@ -53,7 +55,7 @@ ResultOrError> Device::Create(Adapter* adapter, const DeviceDescript } Device::Device(Adapter* adapter, const DeviceDescriptor* descriptor) - : DeviceBase(adapter, descriptor) { + : DeviceBase(adapter, descriptor), mDebugPrefix(GetNextDeviceDebugPrefix()) { InitTogglesFromDriver(); } @@ -103,6 +105,8 @@ MaybeError Device::Initialize(const DeviceDescriptor* descriptor) { SetLabelImpl(); + ToBackend(GetAdapter())->GetVulkanInstance()->StartListeningForDeviceMessages(this); + return DeviceBase::Initialize(Queue::Create(this, &descriptor->defaultQueue)); } @@ -200,6 +204,8 @@ MaybeError Device::TickImpl() { DAWN_TRY(SubmitPendingCommands()); } + DAWN_TRY(CheckDebugLayerAndGenerateErrors()); + return {}; } @@ -876,6 +882,33 @@ uint32_t Device::GetComputeSubgroupSize() const { return mComputeSubgroupSize; } +void Device::OnDebugMessage(std::string message) { + mDebugMessages.push_back(std::move(message)); +} + +MaybeError Device::CheckDebugLayerAndGenerateErrors() { + if (!GetAdapter()->GetInstance()->IsBackendValidationEnabled() || mDebugMessages.empty()) { + return {}; + } + + auto error = DAWN_INTERNAL_ERROR("The Vulkan validation layer reported uncaught errors."); + + AppendDebugLayerMessages(error.get()); + + return error; +} + +void Device::AppendDebugLayerMessages(ErrorData* error) { + if (!GetAdapter()->GetInstance()->IsBackendValidationEnabled()) { + return; + } + + while (!mDebugMessages.empty()) { + error->AppendBackendMessage(std::move(mDebugMessages.back())); + mDebugMessages.pop_back(); + } +} + MaybeError Device::WaitForIdleForDestruction() { // Immediately tag the recording context as unused so we don't try to submit it in Tick. // Move the mRecordingContext.used to mUnusedCommands so it can be cleaned up in @@ -947,6 +980,8 @@ void Device::DestroyImpl() { // Enough of the Device's initialization happened that we can now do regular robust // deinitialization. + ToBackend(GetAdapter())->GetVulkanInstance()->StopListeningForDeviceMessages(this); + // Immediately tag the recording context as unused so we don't try to submit it in Tick. mRecordingContext.used = false; if (mRecordingContext.commandPool != VK_NULL_HANDLE) { diff --git a/src/dawn/native/vulkan/DeviceVk.h b/src/dawn/native/vulkan/DeviceVk.h index 8473ecf893..db23623477 100644 --- a/src/dawn/native/vulkan/DeviceVk.h +++ b/src/dawn/native/vulkan/DeviceVk.h @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -105,6 +106,11 @@ class Device final : public DeviceBase { void SetLabelImpl() override; + void OnDebugMessage(std::string message); + + // Used to associate this device with validation layer messages. + const char* GetDebugPrefix() { return mDebugPrefix.c_str(); } + private: Device(Adapter* adapter, const DeviceDescriptor* descriptor); @@ -152,6 +158,9 @@ class Device final : public DeviceBase { void ApplyDepthStencilFormatToggles(); void ApplyUseZeroInitializeWorkgroupMemoryExtensionToggle(); + MaybeError CheckDebugLayerAndGenerateErrors(); + void AppendDebugLayerMessages(ErrorData* error) override; + void DestroyImpl() override; MaybeError WaitForIdleForDestruction() override; @@ -185,6 +194,10 @@ class Device final : public DeviceBase { // Fences in the unused list aren't reset yet. std::vector mUnusedFences; + // For capturing messages generated by the Vulkan debug layer. + const std::string mDebugPrefix; + std::vector mDebugMessages; + MaybeError PrepareRecordingContext(); void RecycleCompletedCommands(); diff --git a/src/dawn/native/vulkan/UtilsVulkan.cpp b/src/dawn/native/vulkan/UtilsVulkan.cpp index a64e659b26..fe034244fd 100644 --- a/src/dawn/native/vulkan/UtilsVulkan.cpp +++ b/src/dawn/native/vulkan/UtilsVulkan.cpp @@ -26,6 +26,9 @@ namespace dawn::native::vulkan { +constexpr char kDeviceDebugPrefix[] = "DawnDbg="; +constexpr char kDeviceDebugSeparator[] = ";"; + #define VK_OBJECT_TYPE_GETTER(object, objectType) \ template <> \ VkObjectType GetVkObjectType(object handle) { \ @@ -216,20 +219,44 @@ void SetDebugNameInternal(Device* device, objectNameInfo.objectType = objectType; objectNameInfo.objectHandle = objectHandle; - if (label.empty() || !device->IsToggleEnabled(Toggle::UseUserDefinedLabelsInBackend)) { - objectNameInfo.pObjectName = prefix; - device->fn.SetDebugUtilsObjectNameEXT(device->GetVkDevice(), &objectNameInfo); - return; + std::ostringstream objectNameStream; + // Prefix with the device's message ID so that if this label appears in a validation + // message it can be parsed out and the message can be associated with the right device. + objectNameStream << device->GetDebugPrefix() << kDeviceDebugSeparator << prefix; + if (!label.empty() && device->IsToggleEnabled(Toggle::UseUserDefinedLabelsInBackend)) { + objectNameStream << "_" << label; } - - std::string objectName = prefix; - objectName += "_"; - objectName += label; + std::string objectName = objectNameStream.str(); objectNameInfo.pObjectName = objectName.c_str(); device->fn.SetDebugUtilsObjectNameEXT(device->GetVkDevice(), &objectNameInfo); } } +std::string GetNextDeviceDebugPrefix() { + static uint64_t nextDeviceDebugId = 1; + std::ostringstream objectName; + objectName << kDeviceDebugPrefix << nextDeviceDebugId++; + return objectName.str(); +} + +std::string GetDeviceDebugPrefixFromDebugName(const char* debugName) { + if (debugName == nullptr) { + return {}; + } + + if (strncmp(debugName, kDeviceDebugPrefix, sizeof(kDeviceDebugPrefix) - 1) != 0) { + return {}; + } + + const char* separator = strstr(debugName + sizeof(kDeviceDebugPrefix), kDeviceDebugSeparator); + if (separator == nullptr) { + return {}; + } + + size_t length = separator - debugName; + return std::string(debugName, length); +} + VkSpecializationInfo* GetVkSpecializationInfo( const ProgrammableStage& programmableStage, VkSpecializationInfo* specializationInfo, diff --git a/src/dawn/native/vulkan/UtilsVulkan.h b/src/dawn/native/vulkan/UtilsVulkan.h index 572b2a7089..7c63b1dfcd 100644 --- a/src/dawn/native/vulkan/UtilsVulkan.h +++ b/src/dawn/native/vulkan/UtilsVulkan.h @@ -141,6 +141,9 @@ void SetDebugName(Device* device, label); } +std::string GetNextDeviceDebugPrefix(); +std::string GetDeviceDebugPrefixFromDebugName(const char* debugName); + // Returns nullptr or &specializationInfo // specializationInfo, specializationDataEntries, specializationMapEntries needs to // be alive at least until VkSpecializationInfo is passed into Vulkan Create*Pipelines