// Copyright 2017 The Dawn Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "dawn_native/vulkan/DeviceVk.h" #include "common/Platform.h" #include "dawn_native/BackendConnection.h" #include "dawn_native/ChainUtils_autogen.h" #include "dawn_native/Error.h" #include "dawn_native/ErrorData.h" #include "dawn_native/VulkanBackend.h" #include "dawn_native/vulkan/AdapterVk.h" #include "dawn_native/vulkan/BackendVk.h" #include "dawn_native/vulkan/BindGroupLayoutVk.h" #include "dawn_native/vulkan/BindGroupVk.h" #include "dawn_native/vulkan/BufferVk.h" #include "dawn_native/vulkan/CommandBufferVk.h" #include "dawn_native/vulkan/ComputePipelineVk.h" #include "dawn_native/vulkan/FencedDeleter.h" #include "dawn_native/vulkan/PipelineLayoutVk.h" #include "dawn_native/vulkan/QuerySetVk.h" #include "dawn_native/vulkan/QueueVk.h" #include "dawn_native/vulkan/RenderPassCache.h" #include "dawn_native/vulkan/RenderPipelineVk.h" #include "dawn_native/vulkan/ResourceMemoryAllocatorVk.h" #include "dawn_native/vulkan/SamplerVk.h" #include "dawn_native/vulkan/ShaderModuleVk.h" #include "dawn_native/vulkan/StagingBufferVk.h" #include "dawn_native/vulkan/SwapChainVk.h" #include "dawn_native/vulkan/TextureVk.h" #include "dawn_native/vulkan/UtilsVulkan.h" #include "dawn_native/vulkan/VulkanError.h" namespace dawn_native { namespace vulkan { // static ResultOrError Device::Create(Adapter* adapter, const DeviceDescriptor* descriptor) { Ref device = AcquireRef(new Device(adapter, descriptor)); DAWN_TRY(device->Initialize()); return device.Detach(); } Device::Device(Adapter* adapter, const DeviceDescriptor* descriptor) : DeviceBase(adapter, descriptor) { InitTogglesFromDriver(); } MaybeError Device::Initialize() { // Copy the adapter's device info to the device so that we can change the "knobs" mDeviceInfo = ToBackend(GetAdapter())->GetDeviceInfo(); // Initialize the "instance" procs of our local function table. VulkanFunctions* functions = GetMutableFunctions(); *functions = ToBackend(GetAdapter())->GetBackend()->GetFunctions(); // Two things are crucial if device initialization fails: the function pointers to destroy // objects, and the fence deleter that calls these functions. Do not do anything before // these two are set up, so that a failed initialization doesn't cause a crash in // ShutDownImpl() { VkPhysicalDevice physicalDevice = ToBackend(GetAdapter())->GetPhysicalDevice(); VulkanDeviceKnobs usedDeviceKnobs = {}; DAWN_TRY_ASSIGN(usedDeviceKnobs, CreateDevice(physicalDevice)); *static_cast(&mDeviceInfo) = usedDeviceKnobs; DAWN_TRY(functions->LoadDeviceProcs(mVkDevice, mDeviceInfo)); // The queue can be loaded before the fenced deleter because their lifetime is tied to // the device. GatherQueueFromDevice(); mDeleter = std::make_unique(this); } mRenderPassCache = std::make_unique(this); mResourceMemoryAllocator = std::make_unique(this); mExternalMemoryService = std::make_unique(this); mExternalSemaphoreService = std::make_unique(this); DAWN_TRY(PrepareRecordingContext()); // The environment can request to use D32S8 or D24S8 when it's not available. Override // the decision if it is not applicable. ApplyDepth24PlusS8Toggle(); return DeviceBase::Initialize(Queue::Create(this)); } Device::~Device() { ShutDownBase(); } ResultOrError> Device::CreateBindGroupImpl( const BindGroupDescriptor* descriptor) { return BindGroup::Create(this, descriptor); } ResultOrError> Device::CreateBindGroupLayoutImpl( const BindGroupLayoutDescriptor* descriptor, PipelineCompatibilityToken pipelineCompatibilityToken) { return BindGroupLayout::Create(this, descriptor, pipelineCompatibilityToken); } ResultOrError> Device::CreateBufferImpl(const BufferDescriptor* descriptor) { return Buffer::Create(this, descriptor); } ResultOrError> Device::CreateCommandBuffer( CommandEncoder* encoder, const CommandBufferDescriptor* descriptor) { return CommandBuffer::Create(encoder, descriptor); } ResultOrError> Device::CreateComputePipelineImpl( const ComputePipelineDescriptor* descriptor) { return ComputePipeline::Create(this, descriptor); } ResultOrError> Device::CreatePipelineLayoutImpl( const PipelineLayoutDescriptor* descriptor) { return PipelineLayout::Create(this, descriptor); } ResultOrError> Device::CreateQuerySetImpl( const QuerySetDescriptor* descriptor) { return QuerySet::Create(this, descriptor); } ResultOrError> Device::CreateRenderPipelineImpl( const RenderPipelineDescriptor* descriptor) { return RenderPipeline::Create(this, descriptor); } ResultOrError> Device::CreateSamplerImpl(const SamplerDescriptor* descriptor) { return Sampler::Create(this, descriptor); } ResultOrError> Device::CreateShaderModuleImpl( const ShaderModuleDescriptor* descriptor, ShaderModuleParseResult* parseResult) { return ShaderModule::Create(this, descriptor, parseResult); } ResultOrError> Device::CreateSwapChainImpl( const SwapChainDescriptor* descriptor) { return OldSwapChain::Create(this, descriptor); } ResultOrError> Device::CreateSwapChainImpl( Surface* surface, NewSwapChainBase* previousSwapChain, const SwapChainDescriptor* descriptor) { return SwapChain::Create(this, surface, previousSwapChain, descriptor); } ResultOrError> Device::CreateTextureImpl(const TextureDescriptor* descriptor) { return Texture::Create(this, descriptor); } ResultOrError> Device::CreateTextureViewImpl( TextureBase* texture, const TextureViewDescriptor* descriptor) { return TextureView::Create(texture, descriptor); } void Device::CreateComputePipelineAsyncImpl(const ComputePipelineDescriptor* descriptor, size_t blueprintHash, WGPUCreateComputePipelineAsyncCallback callback, void* userdata) { ComputePipeline::CreateAsync(this, descriptor, blueprintHash, callback, userdata); } MaybeError Device::TickImpl() { RecycleCompletedCommands(); ExecutionSerial completedSerial = GetCompletedCommandSerial(); for (Ref& bgl : mBindGroupLayoutsPendingDeallocation.IterateUpTo(completedSerial)) { bgl->FinishDeallocation(completedSerial); } mBindGroupLayoutsPendingDeallocation.ClearUpTo(completedSerial); mResourceMemoryAllocator->Tick(completedSerial); mDeleter->Tick(completedSerial); if (mRecordingContext.used) { DAWN_TRY(SubmitPendingCommands()); } return {}; } VkInstance Device::GetVkInstance() const { return ToBackend(GetAdapter())->GetBackend()->GetVkInstance(); } const VulkanDeviceInfo& Device::GetDeviceInfo() const { return mDeviceInfo; } const VulkanGlobalInfo& Device::GetGlobalInfo() const { return ToBackend(GetAdapter())->GetBackend()->GetGlobalInfo(); } VkDevice Device::GetVkDevice() const { return mVkDevice; } uint32_t Device::GetGraphicsQueueFamily() const { return mQueueFamily; } VkQueue Device::GetQueue() const { return mQueue; } FencedDeleter* Device::GetFencedDeleter() const { return mDeleter.get(); } RenderPassCache* Device::GetRenderPassCache() const { return mRenderPassCache.get(); } ResourceMemoryAllocator* Device::GetResourceMemoryAllocator() const { return mResourceMemoryAllocator.get(); } void Device::EnqueueDeferredDeallocation(BindGroupLayout* bindGroupLayout) { mBindGroupLayoutsPendingDeallocation.Enqueue(bindGroupLayout, GetPendingCommandSerial()); } CommandRecordingContext* Device::GetPendingRecordingContext() { ASSERT(mRecordingContext.commandBuffer != VK_NULL_HANDLE); mRecordingContext.used = true; return &mRecordingContext; } MaybeError Device::SubmitPendingCommands() { if (!mRecordingContext.used) { return {}; } DAWN_TRY(CheckVkSuccess(fn.EndCommandBuffer(mRecordingContext.commandBuffer), "vkEndCommandBuffer")); std::vector dstStageMasks(mRecordingContext.waitSemaphores.size(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); VkSubmitInfo submitInfo; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.pNext = nullptr; submitInfo.waitSemaphoreCount = static_cast(mRecordingContext.waitSemaphores.size()); submitInfo.pWaitSemaphores = AsVkArray(mRecordingContext.waitSemaphores.data()); submitInfo.pWaitDstStageMask = dstStageMasks.data(); submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &mRecordingContext.commandBuffer; submitInfo.signalSemaphoreCount = static_cast(mRecordingContext.signalSemaphores.size()); submitInfo.pSignalSemaphores = AsVkArray(mRecordingContext.signalSemaphores.data()); VkFence fence = VK_NULL_HANDLE; DAWN_TRY_ASSIGN(fence, GetUnusedFence()); DAWN_TRY_WITH_CLEANUP( CheckVkSuccess(fn.QueueSubmit(mQueue, 1, &submitInfo, fence), "vkQueueSubmit"), { // If submitting to the queue fails, move the fence back into the unused fence // list, as if it were never acquired. Not doing so would leak the fence since // it would be neither in the unused list nor in the in-flight list. mUnusedFences.push_back(fence); }); // Enqueue the semaphores before incrementing the serial, so that they can be deleted as // soon as the current submission is finished. for (VkSemaphore semaphore : mRecordingContext.waitSemaphores) { mDeleter->DeleteWhenUnused(semaphore); } for (VkSemaphore semaphore : mRecordingContext.signalSemaphores) { mDeleter->DeleteWhenUnused(semaphore); } IncrementLastSubmittedCommandSerial(); ExecutionSerial lastSubmittedSerial = GetLastSubmittedCommandSerial(); mFencesInFlight.emplace(fence, lastSubmittedSerial); CommandPoolAndBuffer submittedCommands = {mRecordingContext.commandPool, mRecordingContext.commandBuffer}; mCommandsInFlight.Enqueue(submittedCommands, lastSubmittedSerial); mRecordingContext = CommandRecordingContext(); DAWN_TRY(PrepareRecordingContext()); return {}; } ResultOrError Device::CreateDevice(VkPhysicalDevice physicalDevice) { VulkanDeviceKnobs usedKnobs = {}; // Default to asking for all avilable known extensions. usedKnobs.extensions = mDeviceInfo.extensions; // However only request the extensions that haven't been promoted in the device's apiVersion std::vector extensionNames; for (DeviceExt ext : IterateBitSet(usedKnobs.extensions)) { const DeviceExtInfo& info = GetDeviceExtInfo(ext); if (info.versionPromoted > mDeviceInfo.properties.apiVersion) { extensionNames.push_back(info.name); } } // Some device features can only be enabled using a VkPhysicalDeviceFeatures2 struct, which // is supported by the VK_EXT_get_physical_properties2 instance extension, which was // promoted as a core API in Vulkan 1.1. // // Prepare a VkPhysicalDeviceFeatures2 struct for this use case, it will only be populated // if HasExt(DeviceExt::GetPhysicalDeviceProperties2) is true. VkPhysicalDeviceFeatures2 features2 = {}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; PNextChainBuilder featuresChain(&features2); // Required for core WebGPU features. usedKnobs.features.depthBiasClamp = VK_TRUE; usedKnobs.features.fragmentStoresAndAtomics = VK_TRUE; usedKnobs.features.fullDrawIndexUint32 = VK_TRUE; usedKnobs.features.imageCubeArray = VK_TRUE; usedKnobs.features.independentBlend = VK_TRUE; usedKnobs.features.sampleRateShading = VK_TRUE; if (IsRobustnessEnabled()) { usedKnobs.features.robustBufferAccess = VK_TRUE; } if (mDeviceInfo.HasExt(DeviceExt::SubgroupSizeControl)) { ASSERT(usedKnobs.HasExt(DeviceExt::SubgroupSizeControl)); // Always request all the features from VK_EXT_subgroup_size_control when available. usedKnobs.subgroupSizeControlFeatures = mDeviceInfo.subgroupSizeControlFeatures; featuresChain.Add(&usedKnobs.subgroupSizeControlFeatures); mComputeSubgroupSize = FindComputeSubgroupSize(); } if (mDeviceInfo.features.samplerAnisotropy == VK_TRUE) { usedKnobs.features.samplerAnisotropy = VK_TRUE; } if (IsExtensionEnabled(Extension::TextureCompressionBC)) { ASSERT(ToBackend(GetAdapter())->GetDeviceInfo().features.textureCompressionBC == VK_TRUE); usedKnobs.features.textureCompressionBC = VK_TRUE; } if (IsExtensionEnabled(Extension::TextureCompressionETC2)) { ASSERT(ToBackend(GetAdapter())->GetDeviceInfo().features.textureCompressionETC2 == VK_TRUE); usedKnobs.features.textureCompressionETC2 = VK_TRUE; } if (IsExtensionEnabled(Extension::TextureCompressionASTC)) { ASSERT(ToBackend(GetAdapter())->GetDeviceInfo().features.textureCompressionASTC_LDR == VK_TRUE); usedKnobs.features.textureCompressionASTC_LDR = VK_TRUE; } if (IsExtensionEnabled(Extension::PipelineStatisticsQuery)) { ASSERT(ToBackend(GetAdapter())->GetDeviceInfo().features.pipelineStatisticsQuery == VK_TRUE); usedKnobs.features.pipelineStatisticsQuery = VK_TRUE; } if (IsExtensionEnabled(Extension::ShaderFloat16)) { const VulkanDeviceInfo& deviceInfo = ToBackend(GetAdapter())->GetDeviceInfo(); ASSERT(deviceInfo.HasExt(DeviceExt::ShaderFloat16Int8) && deviceInfo.shaderFloat16Int8Features.shaderFloat16 == VK_TRUE && deviceInfo.HasExt(DeviceExt::_16BitStorage) && deviceInfo._16BitStorageFeatures.storageBuffer16BitAccess == VK_TRUE && deviceInfo._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess == VK_TRUE); usedKnobs.shaderFloat16Int8Features.shaderFloat16 = VK_TRUE; usedKnobs._16BitStorageFeatures.storageBuffer16BitAccess = VK_TRUE; usedKnobs._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess = VK_TRUE; featuresChain.Add(&usedKnobs.shaderFloat16Int8Features, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR); featuresChain.Add(&usedKnobs._16BitStorageFeatures, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES); } // Find a universal queue family { // Note that GRAPHICS and COMPUTE imply TRANSFER so we don't need to check for it. constexpr uint32_t kUniversalFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; int universalQueueFamily = -1; for (unsigned int i = 0; i < mDeviceInfo.queueFamilies.size(); ++i) { if ((mDeviceInfo.queueFamilies[i].queueFlags & kUniversalFlags) == kUniversalFlags) { universalQueueFamily = i; break; } } if (universalQueueFamily == -1) { return DAWN_INTERNAL_ERROR("No universal queue family"); } mQueueFamily = static_cast(universalQueueFamily); } // Choose to create a single universal queue std::vector queuesToRequest; float zero = 0.0f; { VkDeviceQueueCreateInfo queueCreateInfo; queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queueCreateInfo.pNext = nullptr; queueCreateInfo.flags = 0; queueCreateInfo.queueFamilyIndex = static_cast(mQueueFamily); queueCreateInfo.queueCount = 1; queueCreateInfo.pQueuePriorities = &zero; queuesToRequest.push_back(queueCreateInfo); } VkDeviceCreateInfo createInfo; createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; createInfo.pNext = nullptr; createInfo.flags = 0; createInfo.queueCreateInfoCount = static_cast(queuesToRequest.size()); createInfo.pQueueCreateInfos = queuesToRequest.data(); createInfo.enabledLayerCount = 0; createInfo.ppEnabledLayerNames = nullptr; createInfo.enabledExtensionCount = static_cast(extensionNames.size()); createInfo.ppEnabledExtensionNames = extensionNames.data(); // When we have DeviceExt::GetPhysicalDeviceProperties2, use features2 so that features not // covered by VkPhysicalDeviceFeatures can be enabled. if (mDeviceInfo.HasExt(DeviceExt::GetPhysicalDeviceProperties2)) { features2.features = usedKnobs.features; createInfo.pNext = &features2; createInfo.pEnabledFeatures = nullptr; } else { ASSERT(features2.pNext == nullptr); createInfo.pEnabledFeatures = &usedKnobs.features; } DAWN_TRY(CheckVkSuccess(fn.CreateDevice(physicalDevice, &createInfo, nullptr, &mVkDevice), "vkCreateDevice")); return usedKnobs; } uint32_t Device::FindComputeSubgroupSize() const { if (!mDeviceInfo.HasExt(DeviceExt::SubgroupSizeControl)) { return 0; } const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& ext = mDeviceInfo.subgroupSizeControlProperties; if (ext.minSubgroupSize == ext.maxSubgroupSize) { return 0; } // At the moment, only Intel devices support varying subgroup sizes and 16, which is the // next value after the minimum of 8, is the sweet spot according to [1]. Hence the // following heuristics, which may need to be adjusted in the future for other // architectures, or if a specific API is added to let client code select the size.. // // [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875 uint32_t subgroupSize = ext.minSubgroupSize * 2; if (subgroupSize <= ext.maxSubgroupSize) { return subgroupSize; } else { return ext.minSubgroupSize; } } void Device::GatherQueueFromDevice() { fn.GetDeviceQueue(mVkDevice, mQueueFamily, 0, &mQueue); } void Device::InitTogglesFromDriver() { // TODO(crbug.com/dawn/857): tighten this workaround when this issue is fixed in both // Vulkan SPEC and drivers. SetToggle(Toggle::UseTemporaryBufferInCompressedTextureToTextureCopy, true); // By default try to use D32S8 for Depth24PlusStencil8 SetToggle(Toggle::VulkanUseD32S8, true); } void Device::ApplyDepth24PlusS8Toggle() { VkPhysicalDevice physicalDevice = ToBackend(GetAdapter())->GetPhysicalDevice(); bool supportsD32s8 = false; { VkFormatProperties properties; fn.GetPhysicalDeviceFormatProperties(physicalDevice, VK_FORMAT_D32_SFLOAT_S8_UINT, &properties); supportsD32s8 = properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; } bool supportsD24s8 = false; { VkFormatProperties properties; fn.GetPhysicalDeviceFormatProperties(physicalDevice, VK_FORMAT_D24_UNORM_S8_UINT, &properties); supportsD24s8 = properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; } ASSERT(supportsD32s8 || supportsD24s8); if (!supportsD24s8) { ForceSetToggle(Toggle::VulkanUseD32S8, true); } if (!supportsD32s8) { ForceSetToggle(Toggle::VulkanUseD32S8, false); } } VulkanFunctions* Device::GetMutableFunctions() { return const_cast(&fn); } ResultOrError Device::GetUnusedFence() { if (!mUnusedFences.empty()) { VkFence fence = mUnusedFences.back(); DAWN_TRY(CheckVkSuccess(fn.ResetFences(mVkDevice, 1, &*fence), "vkResetFences")); mUnusedFences.pop_back(); return fence; } VkFenceCreateInfo createInfo; createInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; createInfo.pNext = nullptr; createInfo.flags = 0; VkFence fence = VK_NULL_HANDLE; DAWN_TRY(CheckVkSuccess(fn.CreateFence(mVkDevice, &createInfo, nullptr, &*fence), "vkCreateFence")); return fence; } ResultOrError Device::CheckAndUpdateCompletedSerials() { ExecutionSerial fenceSerial(0); while (!mFencesInFlight.empty()) { VkFence fence = mFencesInFlight.front().first; ExecutionSerial tentativeSerial = mFencesInFlight.front().second; VkResult result = VkResult::WrapUnsafe( INJECT_ERROR_OR_RUN(fn.GetFenceStatus(mVkDevice, fence), VK_ERROR_DEVICE_LOST)); // Fence are added in order, so we can stop searching as soon // as we see one that's not ready. if (result == VK_NOT_READY) { return fenceSerial; } else { DAWN_TRY(CheckVkSuccess(::VkResult(result), "GetFenceStatus")); } // Update fenceSerial since fence is ready. fenceSerial = tentativeSerial; mUnusedFences.push_back(fence); ASSERT(fenceSerial > GetCompletedCommandSerial()); mFencesInFlight.pop(); } return fenceSerial; } MaybeError Device::PrepareRecordingContext() { ASSERT(!mRecordingContext.used); ASSERT(mRecordingContext.commandBuffer == VK_NULL_HANDLE); ASSERT(mRecordingContext.commandPool == VK_NULL_HANDLE); // First try to recycle unused command pools. if (!mUnusedCommands.empty()) { CommandPoolAndBuffer commands = mUnusedCommands.back(); mUnusedCommands.pop_back(); DAWN_TRY_WITH_CLEANUP(CheckVkSuccess(fn.ResetCommandPool(mVkDevice, commands.pool, 0), "vkResetCommandPool"), { // vkResetCommandPool failed (it may return out-of-memory). // Free the commands in the cleanup step before returning to // reclaim memory. // The VkCommandBuffer memory should be wholly owned by the // pool and freed when it is destroyed, but that's not the // case in some drivers and they leak memory. So we call // FreeCommandBuffers before DestroyCommandPool to be safe. // TODO(enga): Only do this on a known list of bad drivers. fn.FreeCommandBuffers(mVkDevice, commands.pool, 1, &commands.commandBuffer); fn.DestroyCommandPool(mVkDevice, commands.pool, nullptr); }); mRecordingContext.commandBuffer = commands.commandBuffer; mRecordingContext.commandPool = commands.pool; } else { // Create a new command pool for our commands and allocate the command buffer. VkCommandPoolCreateInfo createInfo; createInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; createInfo.pNext = nullptr; createInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; createInfo.queueFamilyIndex = mQueueFamily; DAWN_TRY(CheckVkSuccess(fn.CreateCommandPool(mVkDevice, &createInfo, nullptr, &*mRecordingContext.commandPool), "vkCreateCommandPool")); VkCommandBufferAllocateInfo allocateInfo; allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocateInfo.pNext = nullptr; allocateInfo.commandPool = mRecordingContext.commandPool; allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocateInfo.commandBufferCount = 1; DAWN_TRY(CheckVkSuccess(fn.AllocateCommandBuffers(mVkDevice, &allocateInfo, &mRecordingContext.commandBuffer), "vkAllocateCommandBuffers")); } // Start the recording of commands in the command buffer. VkCommandBufferBeginInfo beginInfo; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.pNext = nullptr; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; beginInfo.pInheritanceInfo = nullptr; return CheckVkSuccess(fn.BeginCommandBuffer(mRecordingContext.commandBuffer, &beginInfo), "vkBeginCommandBuffer"); } void Device::RecycleCompletedCommands() { for (auto& commands : mCommandsInFlight.IterateUpTo(GetCompletedCommandSerial())) { mUnusedCommands.push_back(commands); } mCommandsInFlight.ClearUpTo(GetCompletedCommandSerial()); } ResultOrError> Device::CreateStagingBuffer(size_t size) { std::unique_ptr stagingBuffer = std::make_unique(size, this); DAWN_TRY(stagingBuffer->Initialize()); return std::move(stagingBuffer); } MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, uint64_t sourceOffset, BufferBase* destination, uint64_t destinationOffset, uint64_t size) { // It is a validation error to do a 0-sized copy in Vulkan, check it is skipped prior to // calling this function. ASSERT(size != 0); CommandRecordingContext* recordingContext = GetPendingRecordingContext(); ToBackend(destination) ->EnsureDataInitializedAsDestination(recordingContext, destinationOffset, size); // There is no need of a barrier to make host writes available and visible to the copy // operation for HOST_COHERENT memory. The Vulkan spec for vkQueueSubmit describes that it // does an implicit availability, visibility and domain operation. // Insert pipeline barrier to ensure correct ordering with previous memory operations on the // buffer. ToBackend(destination)->TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); VkBufferCopy copy; copy.srcOffset = sourceOffset; copy.dstOffset = destinationOffset; copy.size = size; this->fn.CmdCopyBuffer(recordingContext->commandBuffer, ToBackend(source)->GetBufferHandle(), ToBackend(destination)->GetHandle(), 1, ©); return {}; } MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source, const TextureDataLayout& src, TextureCopy* dst, const Extent3D& copySizePixels) { // There is no need of a barrier to make host writes available and visible to the copy // operation for HOST_COHERENT memory. The Vulkan spec for vkQueueSubmit describes that it // does an implicit availability, visibility and domain operation. CommandRecordingContext* recordingContext = GetPendingRecordingContext(); VkBufferImageCopy region = ComputeBufferImageCopyRegion(src, *dst, copySizePixels); VkImageSubresourceLayers subresource = region.imageSubresource; ASSERT(dst->texture->GetDimension() != wgpu::TextureDimension::e1D); SubresourceRange range = GetSubresourcesAffectedByCopy(*dst, copySizePixels); if (IsCompleteSubresourceCopiedTo(dst->texture.Get(), copySizePixels, subresource.mipLevel)) { // Since texture has been overwritten, it has been "initialized" dst->texture->SetIsSubresourceContentInitialized(true, range); } else { ToBackend(dst->texture)->EnsureSubresourceContentInitialized(recordingContext, range); } // Insert pipeline barrier to ensure correct ordering with previous memory operations on the // texture. ToBackend(dst->texture) ->TransitionUsageNow(recordingContext, wgpu::TextureUsage::CopyDst, range); VkImage dstImage = ToBackend(dst->texture)->GetHandle(); // Dawn guarantees dstImage be in the TRANSFER_DST_OPTIMAL layout after the // copy command. this->fn.CmdCopyBufferToImage(recordingContext->commandBuffer, ToBackend(source)->GetBufferHandle(), dstImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); return {}; } MaybeError Device::ImportExternalImage(const ExternalImageDescriptorVk* descriptor, ExternalMemoryHandle memoryHandle, VkImage image, const std::vector& waitHandles, VkSemaphore* outSignalSemaphore, VkDeviceMemory* outAllocation, std::vector* outWaitSemaphores) { const TextureDescriptor* textureDescriptor = reinterpret_cast(descriptor->cTextureDescriptor); const DawnTextureInternalUsageDescriptor* internalUsageDesc = nullptr; FindInChain(textureDescriptor->nextInChain, &internalUsageDesc); wgpu::TextureUsage usage = textureDescriptor->usage; if (internalUsageDesc != nullptr) { usage |= internalUsageDesc->internalUsage; } // Check services support this combination of handle type / image info if (!mExternalSemaphoreService->Supported()) { return DAWN_VALIDATION_ERROR("External semaphore usage not supported"); } if (!mExternalMemoryService->SupportsImportMemory( VulkanImageFormat(this, textureDescriptor->format), VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VulkanImageUsage(usage, GetValidInternalFormat(textureDescriptor->format)), VK_IMAGE_CREATE_ALIAS_BIT_KHR)) { return DAWN_VALIDATION_ERROR("External memory usage not supported"); } // Create an external semaphore to signal when the texture is done being used DAWN_TRY_ASSIGN(*outSignalSemaphore, mExternalSemaphoreService->CreateExportableSemaphore()); // Import the external image's memory external_memory::MemoryImportParams importParams; DAWN_TRY_ASSIGN(importParams, mExternalMemoryService->GetMemoryImportParams(descriptor, image)); DAWN_TRY_ASSIGN(*outAllocation, mExternalMemoryService->ImportMemory(memoryHandle, importParams, image)); // Import semaphores we have to wait on before using the texture for (const ExternalSemaphoreHandle& handle : waitHandles) { VkSemaphore semaphore = VK_NULL_HANDLE; DAWN_TRY_ASSIGN(semaphore, mExternalSemaphoreService->ImportSemaphore(handle)); outWaitSemaphores->push_back(semaphore); } return {}; } bool Device::SignalAndExportExternalTexture( Texture* texture, VkImageLayout desiredLayout, ExternalImageExportInfoVk* info, std::vector* semaphoreHandles) { return !ConsumedError([&]() -> MaybeError { DAWN_TRY(ValidateObject(texture)); VkSemaphore signalSemaphore; VkImageLayout releasedOldLayout; VkImageLayout releasedNewLayout; DAWN_TRY(texture->ExportExternalTexture(desiredLayout, &signalSemaphore, &releasedOldLayout, &releasedNewLayout)); ExternalSemaphoreHandle semaphoreHandle; DAWN_TRY_ASSIGN(semaphoreHandle, mExternalSemaphoreService->ExportSemaphore(signalSemaphore)); semaphoreHandles->push_back(semaphoreHandle); info->releasedOldLayout = releasedOldLayout; info->releasedNewLayout = releasedNewLayout; info->isInitialized = texture->IsSubresourceContentInitialized(texture->GetAllSubresources()); return {}; }()); } TextureBase* Device::CreateTextureWrappingVulkanImage( const ExternalImageDescriptorVk* descriptor, ExternalMemoryHandle memoryHandle, const std::vector& waitHandles) { const TextureDescriptor* textureDescriptor = reinterpret_cast(descriptor->cTextureDescriptor); // Initial validation if (ConsumedError(ValidateTextureDescriptor(this, textureDescriptor))) { return nullptr; } if (ConsumedError(ValidateVulkanImageCanBeWrapped(this, textureDescriptor))) { return nullptr; } VkSemaphore signalSemaphore = VK_NULL_HANDLE; VkDeviceMemory allocation = VK_NULL_HANDLE; std::vector waitSemaphores; waitSemaphores.reserve(waitHandles.size()); // Cleanup in case of a failure, the image creation doesn't acquire the external objects // if a failure happems. Texture* result = nullptr; // TODO(crbug.com/1026480): Consolidate this into a single CreateFromExternal call. if (ConsumedError(Texture::CreateFromExternal(this, descriptor, textureDescriptor, mExternalMemoryService.get()), &result) || ConsumedError(ImportExternalImage(descriptor, memoryHandle, result->GetHandle(), waitHandles, &signalSemaphore, &allocation, &waitSemaphores)) || ConsumedError(result->BindExternalMemory(descriptor, signalSemaphore, allocation, waitSemaphores))) { // Delete the Texture if it was created if (result != nullptr) { result->Release(); } // Clear the signal semaphore fn.DestroySemaphore(GetVkDevice(), signalSemaphore, nullptr); // Clear image memory fn.FreeMemory(GetVkDevice(), allocation, nullptr); // Clear any wait semaphores we were able to import for (VkSemaphore semaphore : waitSemaphores) { fn.DestroySemaphore(GetVkDevice(), semaphore, nullptr); } return nullptr; } return result; } uint32_t Device::GetComputeSubgroupSize() const { return mComputeSubgroupSize; } MaybeError Device::WaitForIdleForDestruction() { // Immediately tag the recording context as unused so we don't try to submit it in Tick. // Move the mRecordingContext.used to mUnusedCommands so it can be cleaned up in // ShutDownImpl if (mRecordingContext.used) { CommandPoolAndBuffer commands = {mRecordingContext.commandPool, mRecordingContext.commandBuffer}; mUnusedCommands.push_back(commands); mRecordingContext = CommandRecordingContext(); } VkResult waitIdleResult = VkResult::WrapUnsafe(fn.QueueWaitIdle(mQueue)); // Ignore the result of QueueWaitIdle: it can return OOM which we can't really do anything // about, Device lost, which means workloads running on the GPU are no longer accessible // (so they are as good as waited on) or success. DAWN_UNUSED(waitIdleResult); // Make sure all fences are complete by explicitly waiting on them all while (!mFencesInFlight.empty()) { VkFence fence = mFencesInFlight.front().first; ExecutionSerial fenceSerial = mFencesInFlight.front().second; ASSERT(fenceSerial > GetCompletedCommandSerial()); VkResult result = VkResult::WrapUnsafe(VK_TIMEOUT); do { result = VkResult::WrapUnsafe( INJECT_ERROR_OR_RUN(fn.WaitForFences(mVkDevice, 1, &*fence, true, UINT64_MAX), VK_ERROR_DEVICE_LOST)); } while (result == VK_TIMEOUT); // Ignore errors from vkWaitForFences: it can be either OOM which we can't do anything // about (and we need to keep going with the destruction of all fences), or device // loss, which means the workload on the GPU is no longer accessible and we can // safely destroy the fence. fn.DestroyFence(mVkDevice, fence, nullptr); mFencesInFlight.pop(); } return {}; } void Device::ShutDownImpl() { ASSERT(GetState() == State::Disconnected); // We failed during initialization so early that we don't even have a VkDevice. There is // nothing to do. if (mVkDevice == VK_NULL_HANDLE) { return; } // The deleter is the second thing we initialize. If it is not present, it means that // only the VkDevice was created and nothing else. Destroy the device and do nothing else // because the function pointers might not have been loaded (and there is nothing to // destroy anyway). if (mDeleter == nullptr) { fn.DestroyDevice(mVkDevice, nullptr); mVkDevice = VK_NULL_HANDLE; return; } // Enough of the Device's initialization happened that we can now do regular robust // deinitialization. // Immediately tag the recording context as unused so we don't try to submit it in Tick. mRecordingContext.used = false; if (mRecordingContext.commandPool != VK_NULL_HANDLE) { // The VkCommandBuffer memory should be wholly owned by the pool and freed when it is // destroyed, but that's not the case in some drivers and the leak memory. // So we call FreeCommandBuffers before DestroyCommandPool to be safe. // TODO(enga): Only do this on a known list of bad drivers. fn.FreeCommandBuffers(mVkDevice, mRecordingContext.commandPool, 1, &mRecordingContext.commandBuffer); fn.DestroyCommandPool(mVkDevice, mRecordingContext.commandPool, nullptr); } for (VkSemaphore semaphore : mRecordingContext.waitSemaphores) { fn.DestroySemaphore(mVkDevice, semaphore, nullptr); } mRecordingContext.waitSemaphores.clear(); for (VkSemaphore semaphore : mRecordingContext.signalSemaphores) { fn.DestroySemaphore(mVkDevice, semaphore, nullptr); } mRecordingContext.signalSemaphores.clear(); // Some commands might still be marked as in-flight if we shut down because of a device // loss. Recycle them as unused so that we free them below. RecycleCompletedCommands(); ASSERT(mCommandsInFlight.Empty()); for (const CommandPoolAndBuffer& commands : mUnusedCommands) { // The VkCommandBuffer memory should be wholly owned by the pool and freed when it is // destroyed, but that's not the case in some drivers and the leak memory. // So we call FreeCommandBuffers before DestroyCommandPool to be safe. // TODO(enga): Only do this on a known list of bad drivers. fn.FreeCommandBuffers(mVkDevice, commands.pool, 1, &commands.commandBuffer); fn.DestroyCommandPool(mVkDevice, commands.pool, nullptr); } mUnusedCommands.clear(); // Some fences might still be marked as in-flight if we shut down because of a device loss. // Delete them since at this point all commands are complete. while (!mFencesInFlight.empty()) { fn.DestroyFence(mVkDevice, *mFencesInFlight.front().first, nullptr); mFencesInFlight.pop(); } for (VkFence fence : mUnusedFences) { fn.DestroyFence(mVkDevice, fence, nullptr); } mUnusedFences.clear(); ExecutionSerial completedSerial = GetCompletedCommandSerial(); for (Ref& bgl : mBindGroupLayoutsPendingDeallocation.IterateUpTo(completedSerial)) { bgl->FinishDeallocation(completedSerial); } mBindGroupLayoutsPendingDeallocation.ClearUpTo(completedSerial); // Releasing the uploader enqueues buffers to be released. // Call Tick() again to clear them before releasing the deleter. mResourceMemoryAllocator->Tick(completedSerial); mDeleter->Tick(completedSerial); // Allow recycled memory to be deleted. mResourceMemoryAllocator->DestroyPool(); // The VkRenderPasses in the cache can be destroyed immediately since all commands referring // to them are guaranteed to be finished executing. mRenderPassCache = nullptr; // We need handle deleting all child objects by calling Tick() again with a large serial to // force all operations to look as if they were completed, and delete all objects before // destroying the Deleter and vkDevice. ASSERT(mDeleter != nullptr); mDeleter->Tick(kMaxExecutionSerial); mDeleter = nullptr; // VkQueues are destroyed when the VkDevice is destroyed // The VkDevice is needed to destroy child objects, so it must be destroyed last after all // child objects have been deleted. ASSERT(mVkDevice != VK_NULL_HANDLE); fn.DestroyDevice(mVkDevice, nullptr); mVkDevice = VK_NULL_HANDLE; } uint32_t Device::GetOptimalBytesPerRowAlignment() const { return mDeviceInfo.properties.limits.optimalBufferCopyRowPitchAlignment; } uint64_t Device::GetOptimalBufferToTextureCopyOffsetAlignment() const { return mDeviceInfo.properties.limits.optimalBufferCopyOffsetAlignment; } float Device::GetTimestampPeriodInNS() const { return mDeviceInfo.properties.limits.timestampPeriod; } }} // namespace dawn_native::vulkan