// Copyright 2017 The Dawn Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "dawn/native/vulkan/BufferVk.h" #include "dawn/native/CommandBuffer.h" #include "dawn/native/vulkan/DeviceVk.h" #include "dawn/native/vulkan/FencedDeleter.h" #include "dawn/native/vulkan/ResourceHeapVk.h" #include "dawn/native/vulkan/ResourceMemoryAllocatorVk.h" #include "dawn/native/vulkan/UtilsVulkan.h" #include "dawn/native/vulkan/VulkanError.h" #include namespace dawn::native::vulkan { namespace { VkBufferUsageFlags VulkanBufferUsage(wgpu::BufferUsage usage) { VkBufferUsageFlags flags = 0; if (usage & wgpu::BufferUsage::CopySrc) { flags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; } if (usage & wgpu::BufferUsage::CopyDst) { flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; } if (usage & wgpu::BufferUsage::Index) { flags |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; } if (usage & wgpu::BufferUsage::Vertex) { flags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; } if (usage & wgpu::BufferUsage::Uniform) { flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; } if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer)) { flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; } if (usage & wgpu::BufferUsage::Indirect) { flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; } if (usage & wgpu::BufferUsage::QueryResolve) { flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; } return flags; } VkPipelineStageFlags VulkanPipelineStage(wgpu::BufferUsage usage) { VkPipelineStageFlags flags = 0; if (usage & kMappableBufferUsages) { flags |= VK_PIPELINE_STAGE_HOST_BIT; } if (usage & (wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst)) { flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; } if (usage & (wgpu::BufferUsage::Index | wgpu::BufferUsage::Vertex)) { flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; } if (usage & (wgpu::BufferUsage::Uniform | wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer)) { flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; } if (usage & wgpu::BufferUsage::Indirect) { flags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; } if (usage & wgpu::BufferUsage::QueryResolve) { flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; } return flags; } VkAccessFlags VulkanAccessFlags(wgpu::BufferUsage usage) { VkAccessFlags flags = 0; if (usage & wgpu::BufferUsage::MapRead) { flags |= VK_ACCESS_HOST_READ_BIT; } if (usage & wgpu::BufferUsage::MapWrite) { flags |= VK_ACCESS_HOST_WRITE_BIT; } if (usage & wgpu::BufferUsage::CopySrc) { flags |= VK_ACCESS_TRANSFER_READ_BIT; } if (usage & wgpu::BufferUsage::CopyDst) { flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } if (usage & wgpu::BufferUsage::Index) { flags |= VK_ACCESS_INDEX_READ_BIT; } if (usage & wgpu::BufferUsage::Vertex) { flags |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; } if (usage & wgpu::BufferUsage::Uniform) { flags |= VK_ACCESS_UNIFORM_READ_BIT; } if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer)) { flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } if (usage & kReadOnlyStorageBuffer) { flags |= VK_ACCESS_SHADER_READ_BIT; } if (usage & wgpu::BufferUsage::Indirect) { flags |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; } if (usage & wgpu::BufferUsage::QueryResolve) { flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } return flags; } } // namespace // static ResultOrError> Buffer::Create(Device* device, const BufferDescriptor* descriptor) { Ref buffer = AcquireRef(new Buffer(device, descriptor)); DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation)); return std::move(buffer); } MaybeError Buffer::Initialize(bool mappedAtCreation) { // vkCmdFillBuffer requires the size to be a multiple of 4. constexpr size_t kAlignment = 4u; uint32_t extraBytes = 0u; if (GetUsage() & (wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Index)) { // vkCmdSetIndexBuffer and vkCmdSetVertexBuffer are invalid if the offset // is equal to the whole buffer size. Allocate at least one more byte so it // is valid to setVertex/IndexBuffer with a zero-sized range at the end // of the buffer with (offset=buffer.size, size=0). extraBytes = 1u; } uint64_t size = GetSize(); if (size > std::numeric_limits::max() - extraBytes) { return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); } size += extraBytes; // Allocate at least 4 bytes so clamped accesses are always in bounds. // Also, Vulkan requires the size to be non-zero. size = std::max(size, uint64_t(4u)); if (size > std::numeric_limits::max() - kAlignment) { // Alignment would overlow. return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); } mAllocatedSize = Align(size, kAlignment); // Avoid passing ludicrously large sizes to drivers because it causes issues: drivers add // some constants to the size passed and align it, but for values close to the maximum // VkDeviceSize this can cause overflows and makes drivers crash or return bad sizes in the // VkmemoryRequirements. See https://gitlab.khronos.org/vulkan/vulkan/issues/1904 // Any size with one of two top bits of VkDeviceSize set is a HUGE allocation and we can // safely return an OOM error. if (mAllocatedSize & (uint64_t(3) << uint64_t(62))) { return DAWN_OUT_OF_MEMORY_ERROR("Buffer size is HUGE and could cause overflows"); } VkBufferCreateInfo createInfo; createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; createInfo.pNext = nullptr; createInfo.flags = 0; createInfo.size = mAllocatedSize; // Add CopyDst for non-mappable buffer initialization with mappedAtCreation // and robust resource initialization. createInfo.usage = VulkanBufferUsage(GetUsage() | wgpu::BufferUsage::CopyDst); createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; createInfo.queueFamilyIndexCount = 0; createInfo.pQueueFamilyIndices = 0; Device* device = ToBackend(GetDevice()); DAWN_TRY(CheckVkOOMThenSuccess( device->fn.CreateBuffer(device->GetVkDevice(), &createInfo, nullptr, &*mHandle), "vkCreateBuffer")); // Gather requirements for the buffer's memory and allocate it. VkMemoryRequirements requirements; device->fn.GetBufferMemoryRequirements(device->GetVkDevice(), mHandle, &requirements); MemoryKind requestKind = MemoryKind::Linear; if (GetUsage() & kMappableBufferUsages) { requestKind = MemoryKind::LinearMappable; } DAWN_TRY_ASSIGN(mMemoryAllocation, device->GetResourceMemoryAllocator()->Allocate(requirements, requestKind)); // Finally associate it with the buffer. DAWN_TRY(CheckVkSuccess( device->fn.BindBufferMemory(device->GetVkDevice(), mHandle, ToBackend(mMemoryAllocation.GetResourceHeap())->GetMemory(), mMemoryAllocation.GetOffset()), "vkBindBufferMemory")); // The buffers with mappedAtCreation == true will be initialized in // BufferBase::MapAtCreation(). if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting) && !mappedAtCreation) { ClearBuffer(device->GetPendingRecordingContext(), 0x01010101); } // Initialize the padding bytes to zero. if (device->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) && !mappedAtCreation) { uint32_t paddingBytes = GetAllocatedSize() - GetSize(); if (paddingBytes > 0) { uint32_t clearSize = Align(paddingBytes, 4); uint64_t clearOffset = GetAllocatedSize() - clearSize; CommandRecordingContext* recordingContext = device->GetPendingRecordingContext(); ClearBuffer(recordingContext, 0, clearOffset, clearSize); } } SetLabelImpl(); return {}; } Buffer::~Buffer() = default; VkBuffer Buffer::GetHandle() const { return mHandle; } void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage) { VkBufferMemoryBarrier barrier; VkPipelineStageFlags srcStages = 0; VkPipelineStageFlags dstStages = 0; if (TransitionUsageAndGetResourceBarrier(usage, &barrier, &srcStages, &dstStages)) { ASSERT(srcStages != 0 && dstStages != 0); ToBackend(GetDevice()) ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, nullptr, 1u, &barrier, 0, nullptr); } } bool Buffer::TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage, VkBufferMemoryBarrier* barrier, VkPipelineStageFlags* srcStages, VkPipelineStageFlags* dstStages) { bool lastIncludesTarget = IsSubset(usage, mLastUsage); bool lastReadOnly = IsSubset(mLastUsage, kReadOnlyBufferUsages); // We can skip transitions to already current read-only usages. if (lastIncludesTarget && lastReadOnly) { return false; } // Special-case for the initial transition: Vulkan doesn't allow access flags to be 0. if (mLastUsage == wgpu::BufferUsage::None) { mLastUsage = usage; return false; } *srcStages |= VulkanPipelineStage(mLastUsage); *dstStages |= VulkanPipelineStage(usage); barrier->sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier->pNext = nullptr; barrier->srcAccessMask = VulkanAccessFlags(mLastUsage); barrier->dstAccessMask = VulkanAccessFlags(usage); barrier->srcQueueFamilyIndex = 0; barrier->dstQueueFamilyIndex = 0; barrier->buffer = mHandle; barrier->offset = 0; // VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it. barrier->size = GetAllocatedSize(); mLastUsage = usage; return true; } bool Buffer::IsCPUWritableAtCreation() const { // TODO(enga): Handle CPU-visible memory on UMA return mMemoryAllocation.GetMappedPointer() != nullptr; } MaybeError Buffer::MapAtCreationImpl() { return {}; } MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) { Device* device = ToBackend(GetDevice()); CommandRecordingContext* recordingContext = device->GetPendingRecordingContext(); // TODO(crbug.com/dawn/852): initialize mapped buffer in CPU side. EnsureDataInitialized(recordingContext); if (mode & wgpu::MapMode::Read) { TransitionUsageNow(recordingContext, wgpu::BufferUsage::MapRead); } else { ASSERT(mode & wgpu::MapMode::Write); TransitionUsageNow(recordingContext, wgpu::BufferUsage::MapWrite); } return {}; } void Buffer::UnmapImpl() { // No need to do anything, we keep CPU-visible memory mapped at all time. } void* Buffer::GetMappedPointerImpl() { uint8_t* memory = mMemoryAllocation.GetMappedPointer(); ASSERT(memory != nullptr); return memory; } void Buffer::DestroyImpl() { BufferBase::DestroyImpl(); ToBackend(GetDevice())->GetResourceMemoryAllocator()->Deallocate(&mMemoryAllocation); if (mHandle != VK_NULL_HANDLE) { ToBackend(GetDevice())->GetFencedDeleter()->DeleteWhenUnused(mHandle); mHandle = VK_NULL_HANDLE; } } bool Buffer::EnsureDataInitialized(CommandRecordingContext* recordingContext) { if (!NeedsInitialization()) { return false; } InitializeToZero(recordingContext); return true; } bool Buffer::EnsureDataInitializedAsDestination(CommandRecordingContext* recordingContext, uint64_t offset, uint64_t size) { if (!NeedsInitialization()) { return false; } if (IsFullBufferRange(offset, size)) { SetIsDataInitialized(); return false; } InitializeToZero(recordingContext); return true; } bool Buffer::EnsureDataInitializedAsDestination(CommandRecordingContext* recordingContext, const CopyTextureToBufferCmd* copy) { if (!NeedsInitialization()) { return false; } if (IsFullBufferOverwrittenInTextureToBufferCopy(copy)) { SetIsDataInitialized(); return false; } InitializeToZero(recordingContext); return true; } void Buffer::SetLabelImpl() { SetDebugName(ToBackend(GetDevice()), mHandle, "Dawn_Buffer", GetLabel()); } void Buffer::InitializeToZero(CommandRecordingContext* recordingContext) { ASSERT(NeedsInitialization()); ClearBuffer(recordingContext, 0u); GetDevice()->IncrementLazyClearCountForTesting(); SetIsDataInitialized(); } void Buffer::ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue, uint64_t offset, uint64_t size) { ASSERT(recordingContext != nullptr); size = size > 0 ? size : GetAllocatedSize(); ASSERT(size > 0); TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); Device* device = ToBackend(GetDevice()); // VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it. // Note: Allocated size must be a multiple of 4. ASSERT(size % 4 == 0); device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, offset, size, clearValue); } } // namespace dawn::native::vulkan