From 80f927d763211ea8e6a6377f86282809c86dc107 Mon Sep 17 00:00:00 2001 From: Jiawei Shao Date: Mon, 6 Jul 2020 08:24:30 +0000 Subject: [PATCH] Implement buffer lazy initialization before writeBuffer This patch implements buffer lazy initialization before writeBuffer(): if the buffer is not initialized and writeBuffer() doesn't cover the whole buffer, the buffer will be cleared to 0, otherwise the buffer shouldn't be cleared. This patch also introduces a toggle LazyClearBufferOnFirstUse for the development of buffer lazy initialization: before buffer lazy initialization being completely supported, all the related code will only be enabled behind this toggle to prevent the buffers with valid content being unexpectedly cleared. BUG=dawn:414 TEST=dawn_end2end_tests Change-Id: I99a2aa98ca4b9b21d69c6b32080afb525e2c4ad3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24041 Commit-Queue: Jiawei Shao Reviewed-by: Corentin Wallez --- src/dawn_native/Buffer.cpp | 16 ++ src/dawn_native/Buffer.h | 10 +- src/dawn_native/Toggles.cpp | 215 +++++++++++----------- src/dawn_native/Toggles.h | 1 + src/dawn_native/d3d12/BufferD3D12.cpp | 29 ++- src/dawn_native/d3d12/BufferD3D12.h | 4 +- src/dawn_native/d3d12/DeviceD3D12.cpp | 33 +++- src/dawn_native/d3d12/DeviceD3D12.h | 7 + src/dawn_native/metal/BufferMTL.h | 5 +- src/dawn_native/metal/BufferMTL.mm | 24 ++- src/dawn_native/metal/DeviceMTL.mm | 11 ++ src/dawn_native/null/DeviceNull.cpp | 4 + src/dawn_native/opengl/BufferGL.cpp | 23 ++- src/dawn_native/opengl/BufferGL.h | 3 + src/dawn_native/opengl/QueueGL.cpp | 11 ++ src/dawn_native/vulkan/BufferVk.cpp | 21 ++- src/dawn_native/vulkan/BufferVk.h | 4 +- src/dawn_native/vulkan/DeviceVk.cpp | 14 +- src/tests/BUILD.gn | 1 + src/tests/end2end/BufferZeroInitTests.cpp | 98 ++++++++++ 20 files changed, 392 insertions(+), 142 deletions(-) create mode 100644 src/tests/end2end/BufferZeroInitTests.cpp diff --git a/src/dawn_native/Buffer.cpp b/src/dawn_native/Buffer.cpp index 0e4c384c76..0414159f1f 100644 --- a/src/dawn_native/Buffer.cpp +++ b/src/dawn_native/Buffer.cpp @@ -488,6 +488,10 @@ namespace dawn_native { mState = BufferState::Destroyed; } + bool BufferBase::IsMapped() const { + return mState == BufferState::Mapped; + } + void BufferBase::OnMapCommandSerialFinished(uint32_t mapSerial, bool isWrite) { void* data = GetMappedPointerImpl(); if (isWrite) { @@ -497,4 +501,16 @@ namespace dawn_native { } } + bool BufferBase::IsDataInitialized() const { + return mIsDataInitialized; + } + + void BufferBase::SetIsDataInitialized() { + mIsDataInitialized = true; + } + + bool BufferBase::IsFullBufferRange(uint64_t offset, uint64_t size) const { + return offset == 0 && size == GetSize(); + } + } // namespace dawn_native diff --git a/src/dawn_native/Buffer.h b/src/dawn_native/Buffer.h index c4f1d3297d..ea67fab4bc 100644 --- a/src/dawn_native/Buffer.h +++ b/src/dawn_native/Buffer.h @@ -40,8 +40,6 @@ namespace dawn_native { }; public: - enum class ClearValue { Zero, NonZero }; - BufferBase(DeviceBase* device, const BufferDescriptor* descriptor); static BufferBase* MakeError(DeviceBase* device); @@ -57,6 +55,10 @@ namespace dawn_native { MaybeError ValidateCanUseOnQueueNow() const; + bool IsFullBufferRange(uint64_t offset, uint64_t size) const; + bool IsDataInitialized() const; + void SetIsDataInitialized(); + // Dawn API void SetSubData(uint64_t start, uint64_t count, const void* data); void MapReadAsync(WGPUBufferMapReadCallback callback, void* userdata); @@ -81,6 +83,8 @@ namespace dawn_native { void DestroyInternal(); + bool IsMapped() const; + private: virtual MaybeError MapAtCreationImpl(uint8_t** mappedPointer) = 0; virtual MaybeError MapReadAsyncImpl(uint32_t serial) = 0; @@ -109,6 +113,8 @@ namespace dawn_native { std::unique_ptr mStagingBuffer; BufferState mState; + + bool mIsDataInitialized = false; }; } // namespace dawn_native diff --git a/src/dawn_native/Toggles.cpp b/src/dawn_native/Toggles.cpp index 9eb526a2ec..7f40ebd0c4 100644 --- a/src/dawn_native/Toggles.cpp +++ b/src/dawn_native/Toggles.cpp @@ -29,111 +29,116 @@ namespace dawn_native { using ToggleEnumAndInfoList = std::array(Toggle::EnumCount)>; - static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{ - {Toggle::EmulateStoreAndMSAAResolve, - {"emulate_store_and_msaa_resolve", - "Emulate storing into multisampled color attachments and doing MSAA resolve " - "simultaneously. This workaround is enabled by default on the Metal drivers that do " - "not support MTLStoreActionStoreAndMultisampleResolve. To support StoreOp::Store on " - "those platforms, we should do MSAA resolve in another render pass after ending the " - "previous one.", - "https://crbug.com/dawn/56"}}, - {Toggle::NonzeroClearResourcesOnCreationForTesting, - {"nonzero_clear_resources_on_creation_for_testing", - "Clears texture to full 1 bits as soon as they are created, but doesn't update " - "the tracking state of the texture. This way we can test the logic of clearing " - "textures that use recycled memory.", - "https://crbug.com/dawn/145"}}, - {Toggle::AlwaysResolveIntoZeroLevelAndLayer, - {"always_resolve_into_zero_level_and_layer", - "When the resolve target is a texture view that is created on the non-zero level or " - "layer of a texture, we first resolve into a temporarily 2D texture with only one " - "mipmap level and one array layer, and copy the result of MSAA resolve into the " - "true resolve target. This workaround is enabled by default on the Metal drivers " - "that have bugs when setting non-zero resolveLevel or resolveSlice.", - "https://crbug.com/dawn/56"}}, - {Toggle::LazyClearResourceOnFirstUse, - {"lazy_clear_resource_on_first_use", - "Clears resource to zero on first usage. This initializes the resource " - "so that no dirty bits from recycled memory is present in the new resource.", - "https://crbug.com/dawn/145"}}, - {Toggle::TurnOffVsync, - {"turn_off_vsync", - "Turn off vsync when rendering. In order to do performance test or run perf tests, " - "turn off vsync so that the fps can exeed 60.", - "https://crbug.com/dawn/237"}}, - {Toggle::UseTemporaryBufferInCompressedTextureToTextureCopy, - {"use_temporary_buffer_in_texture_to_texture_copy", - "Split texture-to-texture copy into two copies: copy from source texture into a " - "temporary buffer, and copy from the temporary buffer into the destination texture " - "when copying between compressed textures that don't have block-aligned sizes. This " - "workaround is enabled by default on all Vulkan drivers to solve an issue in the " - "Vulkan SPEC about the texture-to-texture copies with compressed formats. See #1005 " - "(https://github.com/KhronosGroup/Vulkan-Docs/issues/1005) for more details.", - "https://crbug.com/dawn/42"}}, - {Toggle::UseD3D12ResourceHeapTier2, - {"use_d3d12_resource_heap_tier2", - "Enable support for resource heap tier 2. Resource heap tier 2 allows mixing of " - "texture and buffers in the same heap. This allows better heap re-use and reduces " - "fragmentation.", - "https://crbug.com/dawn/27"}}, - {Toggle::UseD3D12RenderPass, - {"use_d3d12_render_pass", - "Use the D3D12 render pass API introduced in Windows build 1809 by default. On " - "versions of Windows prior to build 1809, or when this toggle is turned off, Dawn " - "will emulate a render pass.", - "https://crbug.com/dawn/36"}}, - {Toggle::UseD3D12ResidencyManagement, - {"use_d3d12_residency_management", - "Enable residency management. This allows page-in and page-out of resource heaps in " - "GPU memory. This component improves overcommitted performance by keeping the most " - "recently used resources local to the GPU. Turning this component off can cause " - "allocation failures when application memory exceeds physical device memory.", - "https://crbug.com/dawn/193"}}, - {Toggle::SkipValidation, - {"skip_validation", "Skip expensive validation of Dawn commands.", - "https://crbug.com/dawn/271"}}, - {Toggle::UseSpvc, - {"use_spvc", - "Enable use of spvc for shader compilation, instead of accessing spirv_cross " - "directly.", - "https://crbug.com/dawn/288"}}, - {Toggle::UseSpvcParser, - {"use_spvc_parser", - "Enable usage of spvc's internal parsing and IR generation code, instead of " - "spirv_cross's.", - "https://crbug.com/dawn/288"}}, - {Toggle::VulkanUseD32S8, - {"vulkan_use_d32s8", - "Vulkan mandates support of either D32_FLOAT_S8 or D24_UNORM_S8. When available the " - "backend will use D32S8 (toggle to on) but setting the toggle to off will make it" - "use the D24S8 format when possible.", - "https://crbug.com/dawn/286"}}, - {Toggle::MetalDisableSamplerCompare, - {"metal_disable_sampler_compare", - "Disables the use of sampler compare on Metal. This is unsupported before A9 " - "processors.", - "https://crbug.com/dawn/342"}}, - {Toggle::DisableBaseVertex, - {"disable_base_vertex", - "Disables the use of non-zero base vertex which is unsupported on some platforms.", - "https://crbug.com/dawn/343"}}, - {Toggle::DisableBaseInstance, - {"disable_base_instance", - "Disables the use of non-zero base instance which is unsupported on some " - "platforms.", - "https://crbug.com/dawn/343"}}, - {Toggle::UseD3D12SmallShaderVisibleHeapForTesting, - {"use_d3d12_small_shader_visible_heap", - "Enable use of a small D3D12 shader visible heap, instead of using a large one by " - "default. This setting is used to test bindgroup encoding.", - "https://crbug.com/dawn/155"}}, - {Toggle::UseDXC, - {"use_dxc", "Use DXC instead of FXC for compiling HLSL", - "https://crbug.com/dawn/402"}}, - {Toggle::DisableRobustness, - {"disable_robustness", "Disable robust buffer access", "https://crbug.com/dawn/480"}}, - }}; + static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = { + {{Toggle::EmulateStoreAndMSAAResolve, + {"emulate_store_and_msaa_resolve", + "Emulate storing into multisampled color attachments and doing MSAA resolve " + "simultaneously. This workaround is enabled by default on the Metal drivers that do " + "not support MTLStoreActionStoreAndMultisampleResolve. To support StoreOp::Store on " + "those platforms, we should do MSAA resolve in another render pass after ending the " + "previous one.", + "https://crbug.com/dawn/56"}}, + {Toggle::NonzeroClearResourcesOnCreationForTesting, + {"nonzero_clear_resources_on_creation_for_testing", + "Clears texture to full 1 bits as soon as they are created, but doesn't update " + "the tracking state of the texture. This way we can test the logic of clearing " + "textures that use recycled memory.", + "https://crbug.com/dawn/145"}}, + {Toggle::AlwaysResolveIntoZeroLevelAndLayer, + {"always_resolve_into_zero_level_and_layer", + "When the resolve target is a texture view that is created on the non-zero level or " + "layer of a texture, we first resolve into a temporarily 2D texture with only one " + "mipmap level and one array layer, and copy the result of MSAA resolve into the " + "true resolve target. This workaround is enabled by default on the Metal drivers " + "that have bugs when setting non-zero resolveLevel or resolveSlice.", + "https://crbug.com/dawn/56"}}, + {Toggle::LazyClearResourceOnFirstUse, + {"lazy_clear_resource_on_first_use", + "Clears resource to zero on first usage. This initializes the resource " + "so that no dirty bits from recycled memory is present in the new resource.", + "https://crbug.com/dawn/145"}}, + {Toggle::TurnOffVsync, + {"turn_off_vsync", + "Turn off vsync when rendering. In order to do performance test or run perf tests, " + "turn off vsync so that the fps can exeed 60.", + "https://crbug.com/dawn/237"}}, + {Toggle::UseTemporaryBufferInCompressedTextureToTextureCopy, + {"use_temporary_buffer_in_texture_to_texture_copy", + "Split texture-to-texture copy into two copies: copy from source texture into a " + "temporary buffer, and copy from the temporary buffer into the destination texture " + "when copying between compressed textures that don't have block-aligned sizes. This " + "workaround is enabled by default on all Vulkan drivers to solve an issue in the " + "Vulkan SPEC about the texture-to-texture copies with compressed formats. See #1005 " + "(https://github.com/KhronosGroup/Vulkan-Docs/issues/1005) for more details.", + "https://crbug.com/dawn/42"}}, + {Toggle::UseD3D12ResourceHeapTier2, + {"use_d3d12_resource_heap_tier2", + "Enable support for resource heap tier 2. Resource heap tier 2 allows mixing of " + "texture and buffers in the same heap. This allows better heap re-use and reduces " + "fragmentation.", + "https://crbug.com/dawn/27"}}, + {Toggle::UseD3D12RenderPass, + {"use_d3d12_render_pass", + "Use the D3D12 render pass API introduced in Windows build 1809 by default. On " + "versions of Windows prior to build 1809, or when this toggle is turned off, Dawn " + "will emulate a render pass.", + "https://crbug.com/dawn/36"}}, + {Toggle::UseD3D12ResidencyManagement, + {"use_d3d12_residency_management", + "Enable residency management. This allows page-in and page-out of resource heaps in " + "GPU memory. This component improves overcommitted performance by keeping the most " + "recently used resources local to the GPU. Turning this component off can cause " + "allocation failures when application memory exceeds physical device memory.", + "https://crbug.com/dawn/193"}}, + {Toggle::SkipValidation, + {"skip_validation", "Skip expensive validation of Dawn commands.", + "https://crbug.com/dawn/271"}}, + {Toggle::UseSpvc, + {"use_spvc", + "Enable use of spvc for shader compilation, instead of accessing spirv_cross " + "directly.", + "https://crbug.com/dawn/288"}}, + {Toggle::UseSpvcParser, + {"use_spvc_parser", + "Enable usage of spvc's internal parsing and IR generation code, instead of " + "spirv_cross's.", + "https://crbug.com/dawn/288"}}, + {Toggle::VulkanUseD32S8, + {"vulkan_use_d32s8", + "Vulkan mandates support of either D32_FLOAT_S8 or D24_UNORM_S8. When available the " + "backend will use D32S8 (toggle to on) but setting the toggle to off will make it" + "use the D24S8 format when possible.", + "https://crbug.com/dawn/286"}}, + {Toggle::MetalDisableSamplerCompare, + {"metal_disable_sampler_compare", + "Disables the use of sampler compare on Metal. This is unsupported before A9 " + "processors.", + "https://crbug.com/dawn/342"}}, + {Toggle::DisableBaseVertex, + {"disable_base_vertex", + "Disables the use of non-zero base vertex which is unsupported on some platforms.", + "https://crbug.com/dawn/343"}}, + {Toggle::DisableBaseInstance, + {"disable_base_instance", + "Disables the use of non-zero base instance which is unsupported on some " + "platforms.", + "https://crbug.com/dawn/343"}}, + {Toggle::UseD3D12SmallShaderVisibleHeapForTesting, + {"use_d3d12_small_shader_visible_heap", + "Enable use of a small D3D12 shader visible heap, instead of using a large one by " + "default. This setting is used to test bindgroup encoding.", + "https://crbug.com/dawn/155"}}, + {Toggle::UseDXC, + {"use_dxc", "Use DXC instead of FXC for compiling HLSL", + "https://crbug.com/dawn/402"}}, + {Toggle::DisableRobustness, + {"disable_robustness", "Disable robust buffer access", "https://crbug.com/dawn/480"}}, + {Toggle::LazyClearBufferOnFirstUse, + {"lazy_clear_buffer_on_first_use", + "Clear buffers on their first use. This is a temporary toggle only for the " + "development of buffer lazy initialization and will be removed after buffer lazy " + "initialization is completely implemented.", + "https://crbug.com/dawn/414"}}}}; } // anonymous namespace diff --git a/src/dawn_native/Toggles.h b/src/dawn_native/Toggles.h index 1e7e3e3b7a..f9d66a2d0f 100644 --- a/src/dawn_native/Toggles.h +++ b/src/dawn_native/Toggles.h @@ -43,6 +43,7 @@ namespace dawn_native { UseD3D12SmallShaderVisibleHeapForTesting, UseDXC, DisableRobustness, + LazyClearBufferOnFirstUse, EnumCount, InvalidEnum = EnumCount, diff --git a/src/dawn_native/d3d12/BufferD3D12.cpp b/src/dawn_native/d3d12/BufferD3D12.cpp index 65adaf0753..a7b8fd8b6c 100644 --- a/src/dawn_native/d3d12/BufferD3D12.cpp +++ b/src/dawn_native/d3d12/BufferD3D12.cpp @@ -123,7 +123,11 @@ namespace dawn_native { namespace d3d12 { ToBackend(GetDevice())->AllocateMemory(heapType, resourceDescriptor, bufferUsage)); if (GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) { - DAWN_TRY(ClearBuffer(ClearValue::NonZero)); + CommandRecordingContext* commandRecordingContext; + DAWN_TRY_ASSIGN(commandRecordingContext, + ToBackend(GetDevice())->GetPendingCommandContext()); + + DAWN_TRY(ClearBuffer(commandRecordingContext, uint8_t(1u))); } return {}; @@ -310,18 +314,25 @@ namespace dawn_native { namespace d3d12 { return mResourceAllocation.GetInfo().mMethod == allocationMethod; } - MaybeError Buffer::ClearBuffer(ClearValue clearValue) { - // TODO(jiawei.shao@intel.com): support buffer lazy-initialization to 0. - ASSERT(clearValue == BufferBase::ClearValue::NonZero); - constexpr uint8_t kClearBufferValue = 1u; + MaybeError Buffer::ClearBufferContentsToZero(CommandRecordingContext* commandContext) { + ASSERT(GetDevice()->IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse)); + ASSERT(!IsDataInitialized()); + DAWN_TRY(ClearBuffer(commandContext, uint8_t(0u))); + SetIsDataInitialized(); + GetDevice()->IncrementLazyClearCountForTesting(); + + return {}; + } + + MaybeError Buffer::ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue) { Device* device = ToBackend(GetDevice()); // The state of the buffers on UPLOAD heap must always be GENERIC_READ and cannot be // changed away, so we can only clear such buffer with buffer mapping. if (D3D12HeapType(GetUsage()) == D3D12_HEAP_TYPE_UPLOAD) { DAWN_TRY(MapInternal(true, "D3D12 map at clear buffer")); - memset(mMappedData, kClearBufferValue, GetSize()); + memset(mMappedData, clearValue, GetSize()); UnmapImpl(); } else { // TODO(jiawei.shao@intel.com): use ClearUnorderedAccessView*() when the buffer usage @@ -331,10 +342,10 @@ namespace dawn_native { namespace d3d12 { DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(GetSize(), device->GetPendingCommandSerial())); - memset(uploadHandle.mappedBuffer, kClearBufferValue, GetSize()); + memset(uploadHandle.mappedBuffer, clearValue, GetSize()); - DAWN_TRY(device->CopyFromStagingToBuffer(uploadHandle.stagingBuffer, - uploadHandle.startOffset, this, 0, GetSize())); + device->CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer, + uploadHandle.startOffset, this, 0, GetSize()); } return {}; diff --git a/src/dawn_native/d3d12/BufferD3D12.h b/src/dawn_native/d3d12/BufferD3D12.h index a081986244..a364250bde 100644 --- a/src/dawn_native/d3d12/BufferD3D12.h +++ b/src/dawn_native/d3d12/BufferD3D12.h @@ -44,6 +44,8 @@ namespace dawn_native { namespace d3d12 { bool CheckAllocationMethodForTesting(AllocationMethod allocationMethod) const; bool CheckIsResidentForTesting() const; + MaybeError ClearBufferContentsToZero(CommandRecordingContext* commandContext); + private: ~Buffer() override; // Dawn API @@ -61,7 +63,7 @@ namespace dawn_native { namespace d3d12 { D3D12_RESOURCE_BARRIER* barrier, wgpu::BufferUsage newUsage); - MaybeError ClearBuffer(ClearValue clearValue); + MaybeError ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue); ResourceHeapAllocation mResourceAllocation; bool mFixedResourceState = false; diff --git a/src/dawn_native/d3d12/DeviceD3D12.cpp b/src/dawn_native/d3d12/DeviceD3D12.cpp index d4b9b51a7e..7ecb984055 100644 --- a/src/dawn_native/d3d12/DeviceD3D12.cpp +++ b/src/dawn_native/d3d12/DeviceD3D12.cpp @@ -336,16 +336,39 @@ namespace dawn_native { namespace d3d12 { DAWN_TRY_ASSIGN(commandRecordingContext, GetPendingCommandContext()); Buffer* dstBuffer = ToBackend(destination); - StagingBuffer* srcBuffer = ToBackend(source); - dstBuffer->TrackUsageAndTransitionNow(commandRecordingContext, wgpu::BufferUsage::CopyDst); - commandRecordingContext->GetCommandList()->CopyBufferRegion( - dstBuffer->GetD3D12Resource(), destinationOffset, srcBuffer->GetResource(), - sourceOffset, size); + // TODO(jiawei.shao@intel.com): use Toggle::LazyClearResourceOnFirstUse when the support of + // buffer lazy initialization is completed. + if (IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse) && !dstBuffer->IsDataInitialized()) { + if (dstBuffer->IsFullBufferRange(destinationOffset, size)) { + dstBuffer->SetIsDataInitialized(); + } else { + DAWN_TRY(dstBuffer->ClearBufferContentsToZero(commandRecordingContext)); + } + } + + CopyFromStagingToBufferImpl(commandRecordingContext, source, sourceOffset, destination, + destinationOffset, size); return {}; } + void Device::CopyFromStagingToBufferImpl(CommandRecordingContext* commandContext, + StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size) { + ASSERT(commandContext != nullptr); + Buffer* dstBuffer = ToBackend(destination); + StagingBuffer* srcBuffer = ToBackend(source); + dstBuffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopyDst); + + commandContext->GetCommandList()->CopyBufferRegion( + dstBuffer->GetD3D12Resource(), destinationOffset, srcBuffer->GetResource(), + sourceOffset, size); + } + void Device::DeallocateMemory(ResourceHeapAllocation& allocation) { mResourceAllocatorManager->DeallocateMemory(allocation); } diff --git a/src/dawn_native/d3d12/DeviceD3D12.h b/src/dawn_native/d3d12/DeviceD3D12.h index 1ee40927df..983ed67fee 100644 --- a/src/dawn_native/d3d12/DeviceD3D12.h +++ b/src/dawn_native/d3d12/DeviceD3D12.h @@ -91,6 +91,13 @@ namespace dawn_native { namespace d3d12 { uint64_t destinationOffset, uint64_t size) override; + void CopyFromStagingToBufferImpl(CommandRecordingContext* commandContext, + StagingBufferBase* source, + uint64_t sourceOffset, + BufferBase* destination, + uint64_t destinationOffset, + uint64_t size); + ResultOrError AllocateMemory( D3D12_HEAP_TYPE heapType, const D3D12_RESOURCE_DESC& resourceDescriptor, diff --git a/src/dawn_native/metal/BufferMTL.h b/src/dawn_native/metal/BufferMTL.h index 98bab96244..f204e53551 100644 --- a/src/dawn_native/metal/BufferMTL.h +++ b/src/dawn_native/metal/BufferMTL.h @@ -22,6 +22,7 @@ namespace dawn_native { namespace metal { + class CommandRecordingContext; class Device; class Buffer : public BufferBase { @@ -29,6 +30,8 @@ namespace dawn_native { namespace metal { static ResultOrError Create(Device* device, const BufferDescriptor* descriptor); id GetMTLBuffer() const; + void ClearBufferContentsToZero(CommandRecordingContext* commandContext); + private: using BufferBase::BufferBase; MaybeError Initialize(); @@ -43,7 +46,7 @@ namespace dawn_native { namespace metal { bool IsMapWritable() const override; MaybeError MapAtCreationImpl(uint8_t** mappedPointer) override; - void ClearBuffer(BufferBase::ClearValue clearValue); + void ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue); id mMtlBuffer = nil; }; diff --git a/src/dawn_native/metal/BufferMTL.mm b/src/dawn_native/metal/BufferMTL.mm index 46724bfd21..858eab600d 100644 --- a/src/dawn_native/metal/BufferMTL.mm +++ b/src/dawn_native/metal/BufferMTL.mm @@ -15,6 +15,7 @@ #include "dawn_native/metal/BufferMTL.h" #include "common/Math.h" +#include "dawn_native/metal/CommandRecordingContext.h" #include "dawn_native/metal/DeviceMTL.h" #include @@ -87,7 +88,9 @@ namespace dawn_native { namespace metal { } if (GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) { - ClearBuffer(BufferBase::ClearValue::NonZero); + CommandRecordingContext* commandContext = + ToBackend(GetDevice())->GetPendingCommandContext(); + ClearBuffer(commandContext, uint8_t(1u)); } return {}; @@ -132,16 +135,21 @@ namespace dawn_native { namespace metal { mMtlBuffer = nil; } - void Buffer::ClearBuffer(BufferBase::ClearValue clearValue) { - // TODO(jiawei.shao@intel.com): support buffer lazy-initialization to 0. - ASSERT(clearValue == BufferBase::ClearValue::NonZero); - const uint8_t clearBufferValue = 1; + void Buffer::ClearBufferContentsToZero(CommandRecordingContext* commandContext) { + ASSERT(GetDevice()->IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse)); + ASSERT(!IsDataInitialized()); - Device* device = ToBackend(GetDevice()); - CommandRecordingContext* commandContext = device->GetPendingCommandContext(); + ClearBuffer(commandContext, uint8_t(0u)); + + SetIsDataInitialized(); + GetDevice()->IncrementLazyClearCountForTesting(); + } + + void Buffer::ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue) { + ASSERT(commandContext != nullptr); [commandContext->EnsureBlit() fillBuffer:mMtlBuffer range:NSMakeRange(0, GetSize()) - value:clearBufferValue]; + value:clearValue]; } }} // namespace dawn_native::metal diff --git a/src/dawn_native/metal/DeviceMTL.mm b/src/dawn_native/metal/DeviceMTL.mm index d8d0feb6a1..40875c551c 100644 --- a/src/dawn_native/metal/DeviceMTL.mm +++ b/src/dawn_native/metal/DeviceMTL.mm @@ -254,6 +254,17 @@ namespace dawn_native { namespace metal { // this function. ASSERT(size != 0); + // TODO(jiawei.shao@intel.com): use Toggle::LazyClearResourceOnFirstUse when the support of + // buffer lazy initialization is completed. + if (IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse) && + !destination->IsDataInitialized()) { + if (destination->IsFullBufferRange(destinationOffset, size)) { + destination->SetIsDataInitialized(); + } else { + ToBackend(destination)->ClearBufferContentsToZero(GetPendingCommandContext()); + } + } + id uploadBuffer = ToBackend(source)->GetBufferHandle(); id buffer = ToBackend(destination)->GetMTLBuffer(); [GetPendingCommandContext()->EnsureBlit() copyFromBuffer:uploadBuffer diff --git a/src/dawn_native/null/DeviceNull.cpp b/src/dawn_native/null/DeviceNull.cpp index fd91585faf..b65434721d 100644 --- a/src/dawn_native/null/DeviceNull.cpp +++ b/src/dawn_native/null/DeviceNull.cpp @@ -197,6 +197,10 @@ namespace dawn_native { namespace null { BufferBase* destination, uint64_t destinationOffset, uint64_t size) { + if (IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse)) { + destination->SetIsDataInitialized(); + } + auto operation = std::make_unique(); operation->staging = source; operation->destination = ToBackend(destination); diff --git a/src/dawn_native/opengl/BufferGL.cpp b/src/dawn_native/opengl/BufferGL.cpp index 7e91a4940c..f9839bd192 100644 --- a/src/dawn_native/opengl/BufferGL.cpp +++ b/src/dawn_native/opengl/BufferGL.cpp @@ -24,7 +24,7 @@ namespace dawn_native { namespace opengl { : BufferBase(device, descriptor) { // TODO(cwallez@chromium.org): Have a global "zero" buffer instead of creating a new 4-byte // buffer? - uint64_t size = std::max(GetSize(), uint64_t(4u)); + uint64_t size = GetAppliedSize(); device->gl.GenBuffers(1, &mBuffer); device->gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer); @@ -45,6 +45,27 @@ namespace dawn_native { namespace opengl { return mBuffer; } + uint64_t Buffer::GetAppliedSize() const { + // TODO(cwallez@chromium.org): Have a global "zero" buffer instead of creating a new 4-byte + // buffer? + return std::max(GetSize(), uint64_t(4u)); + } + + void Buffer::ClearBufferContentsToZero() { + ASSERT(GetDevice()->IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse)); + ASSERT(!IsDataInitialized()); + + const uint64_t size = GetAppliedSize(); + Device* device = ToBackend(GetDevice()); + + const std::vector clearValues(size, 0u); + device->gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer); + device->gl.BufferSubData(GL_ARRAY_BUFFER, 0, size, clearValues.data()); + + SetIsDataInitialized(); + device->IncrementLazyClearCountForTesting(); + } + bool Buffer::IsMapWritable() const { // TODO(enga): All buffers in GL can be mapped. Investigate if mapping them will cause the // driver to migrate it to shared memory. diff --git a/src/dawn_native/opengl/BufferGL.h b/src/dawn_native/opengl/BufferGL.h index 9949829a4b..272af036c6 100644 --- a/src/dawn_native/opengl/BufferGL.h +++ b/src/dawn_native/opengl/BufferGL.h @@ -29,6 +29,8 @@ namespace dawn_native { namespace opengl { GLuint GetHandle() const; + void ClearBufferContentsToZero(); + private: ~Buffer() override; // Dawn API @@ -40,6 +42,7 @@ namespace dawn_native { namespace opengl { bool IsMapWritable() const override; MaybeError MapAtCreationImpl(uint8_t** mappedPointer) override; void* GetMappedPointerImpl() override; + uint64_t GetAppliedSize() const; GLuint mBuffer = 0; void* mMappedData = nullptr; diff --git a/src/dawn_native/opengl/QueueGL.cpp b/src/dawn_native/opengl/QueueGL.cpp index a33cbd0da5..2547f17950 100644 --- a/src/dawn_native/opengl/QueueGL.cpp +++ b/src/dawn_native/opengl/QueueGL.cpp @@ -44,6 +44,17 @@ namespace dawn_native { namespace opengl { size_t size) { const OpenGLFunctions& gl = ToBackend(GetDevice())->gl; + // TODO(jiawei.shao@intel.com): use Toggle::LazyClearResourceOnFirstUse when the support of + // buffer lazy initialization is completed. + if (GetDevice()->IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse) && + !buffer->IsDataInitialized()) { + if (buffer->IsFullBufferRange(bufferOffset, size)) { + buffer->SetIsDataInitialized(); + } else { + ToBackend(buffer)->ClearBufferContentsToZero(); + } + } + gl.BindBuffer(GL_ARRAY_BUFFER, ToBackend(buffer)->GetHandle()); gl.BufferSubData(GL_ARRAY_BUFFER, bufferOffset, size, data); return {}; diff --git a/src/dawn_native/vulkan/BufferVk.cpp b/src/dawn_native/vulkan/BufferVk.cpp index 0b9385e939..74e84690e2 100644 --- a/src/dawn_native/vulkan/BufferVk.cpp +++ b/src/dawn_native/vulkan/BufferVk.cpp @@ -166,7 +166,7 @@ namespace dawn_native { namespace vulkan { "vkBindBufferMemory")); if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) { - ClearBuffer(device->GetPendingRecordingContext(), ClearValue::NonZero); + ClearBuffer(device->GetPendingRecordingContext(), 0x01010101); } return {}; @@ -287,20 +287,25 @@ namespace dawn_native { namespace vulkan { } } - void Buffer::ClearBuffer(CommandRecordingContext* recordingContext, ClearValue clearValue) { + void Buffer::ClearBufferContentsToZero(CommandRecordingContext* recordingContext) { + ASSERT(GetDevice()->IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse)); + ASSERT(!IsDataInitialized()); + + ClearBuffer(recordingContext, 0u); + + SetIsDataInitialized(); + GetDevice()->IncrementLazyClearCountForTesting(); + } + + void Buffer::ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue) { ASSERT(recordingContext != nullptr); - // TODO(jiawei.shao@intel.com): support buffer lazy-initialization to 0. - ASSERT(clearValue == BufferBase::ClearValue::NonZero); - - constexpr uint32_t kClearBufferValue = 0x01010101; - TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); Device* device = ToBackend(GetDevice()); // TODO(jiawei.shao@intel.com): find out why VK_WHOLE_SIZE doesn't work on old Windows Intel // Vulkan drivers. device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, 0, GetSize(), - kClearBufferValue); + clearValue); } }} // namespace dawn_native::vulkan diff --git a/src/dawn_native/vulkan/BufferVk.h b/src/dawn_native/vulkan/BufferVk.h index 1c04870376..ace13a90a6 100644 --- a/src/dawn_native/vulkan/BufferVk.h +++ b/src/dawn_native/vulkan/BufferVk.h @@ -45,11 +45,13 @@ namespace dawn_native { namespace vulkan { VkPipelineStageFlags* srcStages, VkPipelineStageFlags* dstStages); + void ClearBufferContentsToZero(CommandRecordingContext* recordingContext); + private: ~Buffer() override; using BufferBase::BufferBase; MaybeError Initialize(); - void ClearBuffer(CommandRecordingContext* recordingContext, ClearValue clearValue); + void ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue); // Dawn API MaybeError MapReadAsyncImpl(uint32_t serial) override; diff --git a/src/dawn_native/vulkan/DeviceVk.cpp b/src/dawn_native/vulkan/DeviceVk.cpp index 2415b49bc9..5a2a905ff0 100644 --- a/src/dawn_native/vulkan/DeviceVk.cpp +++ b/src/dawn_native/vulkan/DeviceVk.cpp @@ -590,6 +590,19 @@ namespace dawn_native { namespace vulkan { // calling this function. ASSERT(size != 0); + CommandRecordingContext* recordingContext = GetPendingRecordingContext(); + + // TODO(jiawei.shao@intel.com): use Toggle::LazyClearResourceOnFirstUse when the support of + // buffer lazy initialization is completed. + if (IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse) && + !destination->IsDataInitialized()) { + if (destination->IsFullBufferRange(destinationOffset, size)) { + destination->SetIsDataInitialized(); + } else { + ToBackend(destination)->ClearBufferContentsToZero(recordingContext); + } + } + // Insert memory barrier to ensure host write operations are made visible before // copying from the staging buffer. However, this barrier can be removed (see note below). // @@ -599,7 +612,6 @@ namespace dawn_native { namespace vulkan { // Insert pipeline barrier to ensure correct ordering with previous memory operations on the // buffer. - CommandRecordingContext* recordingContext = GetPendingRecordingContext(); ToBackend(destination)->TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); VkBufferCopy copy; diff --git a/src/tests/BUILD.gn b/src/tests/BUILD.gn index 201ff08cae..853e1f05fb 100644 --- a/src/tests/BUILD.gn +++ b/src/tests/BUILD.gn @@ -259,6 +259,7 @@ source_set("dawn_end2end_tests_sources") { "end2end/BasicTests.cpp", "end2end/BindGroupTests.cpp", "end2end/BufferTests.cpp", + "end2end/BufferZeroInitTests.cpp", "end2end/ClipSpaceTests.cpp", "end2end/ColorStateTests.cpp", "end2end/CompressedTextureFormatTests.cpp", diff --git a/src/tests/end2end/BufferZeroInitTests.cpp b/src/tests/end2end/BufferZeroInitTests.cpp new file mode 100644 index 0000000000..5392e49364 --- /dev/null +++ b/src/tests/end2end/BufferZeroInitTests.cpp @@ -0,0 +1,98 @@ +// Copyright 2020 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tests/DawnTest.h" + +#include "utils/WGPUHelpers.h" + +#define EXPECT_LAZY_CLEAR(N, statement) \ + do { \ + if (UsesWire()) { \ + statement; \ + } else { \ + size_t lazyClearsBefore = dawn_native::GetLazyClearCountForTesting(device.Get()); \ + statement; \ + size_t lazyClearsAfter = dawn_native::GetLazyClearCountForTesting(device.Get()); \ + EXPECT_EQ(N, lazyClearsAfter - lazyClearsBefore); \ + } \ + } while (0) + +class BufferZeroInitTest : public DawnTest { + public: + wgpu::Buffer CreateBuffer(uint64_t size, wgpu::BufferUsage usage) { + wgpu::BufferDescriptor descriptor; + descriptor.size = size; + descriptor.usage = usage; + return device.CreateBuffer(&descriptor); + } +}; + +// Test that calling writeBuffer to overwrite the entire buffer doesn't need to lazily initialize +// the destination buffer. +TEST_P(BufferZeroInitTest, WriteBufferToEntireBuffer) { + constexpr uint32_t kBufferSize = 8u; + constexpr wgpu::BufferUsage kBufferUsage = + wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; + wgpu::Buffer buffer = CreateBuffer(kBufferSize, kBufferUsage); + + constexpr std::array kExpectedData = { + {0x02020202u, 0x02020202u}}; + EXPECT_LAZY_CLEAR(0u, queue.WriteBuffer(buffer, 0, kExpectedData.data(), kBufferSize)); + + EXPECT_BUFFER_U32_RANGE_EQ(kExpectedData.data(), buffer, 0, kBufferSize / sizeof(uint32_t)); +} + +// Test that calling writeBuffer to overwrite a part of buffer needs to lazily initialize the +// destination buffer. +TEST_P(BufferZeroInitTest, WriteBufferToSubBuffer) { + constexpr uint32_t kBufferSize = 8u; + constexpr wgpu::BufferUsage kBufferUsage = + wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; + + constexpr uint32_t kCopyValue = 0x02020202u; + + // offset == 0 + { + wgpu::Buffer buffer = CreateBuffer(kBufferSize, kBufferUsage); + + constexpr uint32_t kCopyOffset = 0u; + EXPECT_LAZY_CLEAR(1u, + queue.WriteBuffer(buffer, kCopyOffset, &kCopyValue, sizeof(kCopyValue))); + + EXPECT_BUFFER_U32_EQ(kCopyValue, buffer, kCopyOffset); + EXPECT_BUFFER_U32_EQ(0, buffer, kBufferSize - sizeof(kCopyValue)); + } + + // offset > 0 + { + wgpu::Buffer buffer = CreateBuffer(kBufferSize, kBufferUsage); + + constexpr uint32_t kCopyOffset = 4u; + EXPECT_LAZY_CLEAR(1u, + queue.WriteBuffer(buffer, kCopyOffset, &kCopyValue, sizeof(kCopyValue))); + + EXPECT_BUFFER_U32_EQ(0, buffer, 0); + EXPECT_BUFFER_U32_EQ(kCopyValue, buffer, kCopyOffset); + } +} + +DAWN_INSTANTIATE_TEST(BufferZeroInitTest, + D3D12Backend({"nonzero_clear_resources_on_creation_for_testing", + "lazy_clear_buffer_on_first_use"}), + MetalBackend({"nonzero_clear_resources_on_creation_for_testing", + "lazy_clear_buffer_on_first_use"}), + OpenGLBackend({"nonzero_clear_resources_on_creation_for_testing", + "lazy_clear_buffer_on_first_use"}), + VulkanBackend({"nonzero_clear_resources_on_creation_for_testing", + "lazy_clear_buffer_on_first_use"}));