From 4313dba514e2b05c5941c65b92f489cda3655fb0 Mon Sep 17 00:00:00 2001 From: shrekshao Date: Thu, 2 Jun 2022 15:16:20 +0000 Subject: [PATCH] Pipeline cache D3D12 backend impl Add D3D12 pipeline caching impl: store cachedPSO blob in cached blob. Record root signature ID3DBlob in cache key together with D3D_SHADER_BYTECODE, D3D12_GRAPHICS_PIPELINE_STATE_DESC or D3D12_COMPUTE_PIPELINE_STATE_DESC. Shader caching is not added. Add some pipeline caching negative tests. Bug: dawn:549 Change-Id: Id1cb560b49f1cf495860e2e0bcf92d8d988c5379 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/91180 Auto-Submit: Shrek Shao Kokoro: Kokoro Reviewed-by: Austin Eng Reviewed-by: Loko Kung Commit-Queue: Austin Eng --- .gitattributes | 2 + src/dawn/native/BUILD.gn | 2 + src/dawn/native/BlobCache.cpp | 41 ++- src/dawn/native/BlobCache.h | 30 +- src/dawn/native/CMakeLists.txt | 2 + src/dawn/native/Device.cpp | 17 + src/dawn/native/Device.h | 4 +- src/dawn/native/PipelineCache.cpp | 2 +- src/dawn/native/PipelineCache.h | 7 +- src/dawn/native/d3d12/CacheKeyD3D12.cpp | 139 ++++++++ .../native/d3d12/ComputePipelineD3D12.cpp | 21 ++ src/dawn/native/d3d12/DeviceD3D12.cpp | 3 + src/dawn/native/d3d12/DeviceD3D12.h | 1 + src/dawn/native/d3d12/Forward.h | 2 + src/dawn/native/d3d12/PipelineCacheD3D12.cpp | 33 ++ src/dawn/native/d3d12/PipelineLayoutD3D12.cpp | 12 +- src/dawn/native/d3d12/PipelineLayoutD3D12.h | 4 + src/dawn/native/d3d12/RenderPipelineD3D12.cpp | 19 ++ src/dawn/native/metal/BufferMTL.mm | 2 + src/dawn/native/vulkan/PipelineCacheVk.cpp | 19 +- src/dawn/native/vulkan/PipelineCacheVk.h | 2 +- .../tests/end2end/PipelineCachingTests.cpp | 310 ++++++++++++++++-- 22 files changed, 601 insertions(+), 73 deletions(-) create mode 100644 src/dawn/native/d3d12/CacheKeyD3D12.cpp create mode 100644 src/dawn/native/d3d12/PipelineCacheD3D12.cpp diff --git a/.gitattributes b/.gitattributes index 5333936d7e..a0a0cf37d3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7,3 +7,5 @@ *.sh eol=lf *.spvasm eol=lf *.wgsl eol=lf +*.h eol=lf +*.cpp eol=lf \ No newline at end of file diff --git a/src/dawn/native/BUILD.gn b/src/dawn/native/BUILD.gn index b470336f9b..4ac4e302e6 100644 --- a/src/dawn/native/BUILD.gn +++ b/src/dawn/native/BUILD.gn @@ -378,6 +378,7 @@ source_set("sources") { "d3d12/BufferD3D12.h", "d3d12/CPUDescriptorHeapAllocationD3D12.cpp", "d3d12/CPUDescriptorHeapAllocationD3D12.h", + "d3d12/CacheKeyD3D12.cpp", "d3d12/CommandAllocatorManager.cpp", "d3d12/CommandAllocatorManager.h", "d3d12/CommandBufferD3D12.cpp", @@ -406,6 +407,7 @@ source_set("sources") { "d3d12/NativeSwapChainImplD3D12.h", "d3d12/PageableD3D12.cpp", "d3d12/PageableD3D12.h", + "d3d12/PipelineCacheD3D12.cpp", "d3d12/PipelineLayoutD3D12.cpp", "d3d12/PipelineLayoutD3D12.h", "d3d12/PlatformFunctions.cpp", diff --git a/src/dawn/native/BlobCache.cpp b/src/dawn/native/BlobCache.cpp index 878ded69f3..219fb1e988 100644 --- a/src/dawn/native/BlobCache.cpp +++ b/src/dawn/native/BlobCache.cpp @@ -21,39 +21,47 @@ namespace dawn::native { -CachedBlob::CachedBlob(size_t size) { - if (size != 0) { - Reset(size); +// static +CachedBlob CachedBlob::Create(size_t size) { + if (size > 0) { + uint8_t* data = new uint8_t[size]; + return CachedBlob(data, size, [=]() { delete[] data; }); + } else { + return CachedBlob(); } } +CachedBlob::CachedBlob() : mData(nullptr), mSize(0), mDeleter({}) {} + +CachedBlob::CachedBlob(uint8_t* data, size_t size, std::function deleter) + : mData(data), mSize(size), mDeleter(deleter) {} + CachedBlob::CachedBlob(CachedBlob&&) = default; -CachedBlob::~CachedBlob() = default; - CachedBlob& CachedBlob::operator=(CachedBlob&&) = default; +CachedBlob::~CachedBlob() { + if (mDeleter) { + mDeleter(); + } +} + bool CachedBlob::Empty() const { return mSize == 0; } const uint8_t* CachedBlob::Data() const { - return mData.get(); + return mData; } uint8_t* CachedBlob::Data() { - return mData.get(); + return mData; } size_t CachedBlob::Size() const { return mSize; } -void CachedBlob::Reset(size_t size) { - mSize = size; - mData = std::make_unique(size); -} - BlobCache::BlobCache(dawn::platform::CachingInterface* cachingInterface) : mCache(cachingInterface) {} @@ -72,18 +80,19 @@ void BlobCache::Store(const CacheKey& key, const CachedBlob& value) { } CachedBlob BlobCache::LoadInternal(const CacheKey& key) { - CachedBlob result; if (mCache == nullptr) { - return result; + return CachedBlob(); } const size_t expectedSize = mCache->LoadData(key.data(), key.size(), nullptr, 0); if (expectedSize > 0) { - result.Reset(expectedSize); + // Need to put this inside to trigger copy elision. + CachedBlob result = CachedBlob::Create(expectedSize); const size_t actualSize = mCache->LoadData(key.data(), key.size(), result.Data(), expectedSize); ASSERT(expectedSize == actualSize); + return result; } - return result; + return CachedBlob(); } void BlobCache::StoreInternal(const CacheKey& key, size_t valueSize, const void* value) { diff --git a/src/dawn/native/BlobCache.h b/src/dawn/native/BlobCache.h index b1c0a6375a..d491858bf1 100644 --- a/src/dawn/native/BlobCache.h +++ b/src/dawn/native/BlobCache.h @@ -15,9 +15,16 @@ #ifndef SRC_DAWN_NATIVE_BLOBCACHE_H_ #define SRC_DAWN_NATIVE_BLOBCACHE_H_ +#include #include #include +#include "dawn/common/Platform.h" + +#if defined(DAWN_PLATFORM_WINDOWS) +#include "dawn/native/d3d12/d3d12_platform.h" +#endif // DAWN_PLATFORM_WINDOWS + namespace dawn::platform { class CachingInterface; } @@ -30,21 +37,34 @@ class InstanceBase; class CachedBlob { public: - explicit CachedBlob(size_t size = 0); - CachedBlob(CachedBlob&&); - ~CachedBlob(); + static CachedBlob Create(size_t size); +#if defined(DAWN_PLATFORM_WINDOWS) + static CachedBlob Create(Microsoft::WRL::ComPtr blob); +#endif // DAWN_PLATFORM_WINDOWS + + CachedBlob(const CachedBlob&) = delete; + CachedBlob& operator=(const CachedBlob&) = delete; + + CachedBlob(CachedBlob&&); CachedBlob& operator=(CachedBlob&&); + ~CachedBlob(); + bool Empty() const; const uint8_t* Data() const; uint8_t* Data(); size_t Size() const; void Reset(size_t size); + CachedBlob(); + private: - std::unique_ptr mData = nullptr; - size_t mSize = 0; + explicit CachedBlob(uint8_t* data, size_t size, std::function deleter); + + uint8_t* mData; + size_t mSize; + std::function mDeleter; }; // This class should always be thread-safe because it may be called asynchronously. Its purpose diff --git a/src/dawn/native/CMakeLists.txt b/src/dawn/native/CMakeLists.txt index 2a03a182e5..0911e9d1ff 100644 --- a/src/dawn/native/CMakeLists.txt +++ b/src/dawn/native/CMakeLists.txt @@ -247,6 +247,7 @@ if (DAWN_ENABLE_D3D12) "d3d12/CPUDescriptorHeapAllocationD3D12.h" "d3d12/CommandAllocatorManager.cpp" "d3d12/CommandAllocatorManager.h" + "d3d12/CacheKeyD3D12.cpp" "d3d12/CommandBufferD3D12.cpp" "d3d12/CommandBufferD3D12.h" "d3d12/CommandRecordingContext.cpp" @@ -273,6 +274,7 @@ if (DAWN_ENABLE_D3D12) "d3d12/NativeSwapChainImplD3D12.h" "d3d12/PageableD3D12.cpp" "d3d12/PageableD3D12.h" + "d3d12/PipelineCacheD3D12.cpp" "d3d12/PipelineLayoutD3D12.cpp" "d3d12/PipelineLayoutD3D12.h" "d3d12/PlatformFunctions.cpp" diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp index 79b94a4fb3..dba1ed0c30 100644 --- a/src/dawn/native/Device.cpp +++ b/src/dawn/native/Device.cpp @@ -628,6 +628,23 @@ BlobCache* DeviceBase::GetBlobCache() { return nullptr; } +CachedBlob DeviceBase::LoadCachedBlob(const CacheKey& key) { + BlobCache* blobCache = GetBlobCache(); + if (!blobCache) { + return CachedBlob(); + } + return blobCache->Load(key); +} + +void DeviceBase::StoreCachedBlob(const CacheKey& key, const CachedBlob& blob) { + if (!blob.Empty()) { + BlobCache* blobCache = GetBlobCache(); + if (blobCache) { + blobCache->Store(key, blob); + } + } +} + MaybeError DeviceBase::ValidateObject(const ApiObjectBase* object) const { ASSERT(object != nullptr); DAWN_INVALID_IF(object->GetDevice() != this, diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h index fd9ba57317..ee791a4e55 100644 --- a/src/dawn/native/Device.h +++ b/src/dawn/native/Device.h @@ -22,6 +22,7 @@ #include #include +#include "dawn/native/BlobCache.h" #include "dawn/native/CacheKey.h" #include "dawn/native/Commands.h" #include "dawn/native/ComputePipeline.h" @@ -47,7 +48,6 @@ namespace dawn::native { class AsyncTaskManager; class AttachmentState; class AttachmentStateBlueprint; -class BlobCache; class CallbackTaskManager; class DynamicUploader; class ErrorScopeStack; @@ -284,6 +284,8 @@ class DeviceBase : public RefCountedWithExternalCount { MaybeError ValidateIsAlive() const; BlobCache* GetBlobCache(); + CachedBlob LoadCachedBlob(const CacheKey& key); + void StoreCachedBlob(const CacheKey& key, const CachedBlob& blob); virtual ResultOrError> CreateStagingBuffer(size_t size) = 0; virtual MaybeError CopyFromStagingToBuffer(StagingBufferBase* source, diff --git a/src/dawn/native/PipelineCache.cpp b/src/dawn/native/PipelineCache.cpp index 19b8b7bd75..7cb9081a81 100644 --- a/src/dawn/native/PipelineCache.cpp +++ b/src/dawn/native/PipelineCache.cpp @@ -38,7 +38,7 @@ MaybeError PipelineCacheBase::Flush() { } // Try to write the data out to the persistent cache. CachedBlob blob; - DAWN_TRY_ASSIGN(blob, SerializeToBlobImpl()); + DAWN_TRY(SerializeToBlobImpl(&blob)); if (blob.Size() > 0) { // Using a simple heuristic to decide whether to write out the blob right now. May need // smarter tracking when we are dealing with monolithic caches. diff --git a/src/dawn/native/PipelineCache.h b/src/dawn/native/PipelineCache.h index e69386e0d6..9b3011e650 100644 --- a/src/dawn/native/PipelineCache.h +++ b/src/dawn/native/PipelineCache.h @@ -45,9 +45,10 @@ class PipelineCacheBase : public RefCounted { CachedBlob Initialize(); private: - // Backend implementation of serialization of the cache into a blob. Note that an empty - // blob may be returned. - virtual ResultOrError SerializeToBlobImpl() = 0; + // Backend implementation of serialization of the cache into a blob. + // Note: given that no local cached blob should be destructed and copy elision has strict + // requirement cached blob is passed in as a pointer to be assigned. + virtual MaybeError SerializeToBlobImpl(CachedBlob* blob) = 0; // The blob cache is owned by the Adapter and pipeline caches are owned/created by devices // or adapters. Since the device owns a reference to the Instance which owns the Adapter, diff --git a/src/dawn/native/d3d12/CacheKeyD3D12.cpp b/src/dawn/native/d3d12/CacheKeyD3D12.cpp new file mode 100644 index 0000000000..0daf5264dd --- /dev/null +++ b/src/dawn/native/d3d12/CacheKeyD3D12.cpp @@ -0,0 +1,139 @@ +// Copyright 2022 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dawn/common/Assert.h" +#include "dawn/common/Constants.h" +#include "dawn/native/CacheKey.h" +#include "dawn/native/d3d12/d3d12_platform.h" + +namespace dawn::native { + +template <> +void CacheKeySerializer::Serialize( + CacheKey* key, + const D3D12_COMPUTE_PIPELINE_STATE_DESC& t) { + // Don't record pRootSignature as we already record the signature blob in pipline layout. + key->Record(t.CS).Record(t.NodeMask).Record(t.Flags); +} + +template <> +void CacheKeySerializer::Serialize( + CacheKey* key, + const D3D12_RENDER_TARGET_BLEND_DESC& t) { + key->Record(t.BlendEnable, t.LogicOpEnable, t.SrcBlend, t.DestBlend, t.BlendOp, t.SrcBlendAlpha, + t.DestBlendAlpha, t.BlendOpAlpha, t.LogicOp, t.RenderTargetWriteMask); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, const D3D12_BLEND_DESC& t) { + key->Record(t.AlphaToCoverageEnable, t.IndependentBlendEnable).Record(t.RenderTarget); +} + +template <> +void CacheKeySerializer::Serialize( + CacheKey* key, + const D3D12_DEPTH_STENCILOP_DESC& t) { + key->Record(t.StencilFailOp, t.StencilDepthFailOp, t.StencilPassOp, t.StencilFunc); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, + const D3D12_DEPTH_STENCIL_DESC& t) { + key->Record(t.DepthEnable, t.DepthWriteMask, t.DepthFunc, t.StencilEnable, t.StencilReadMask, + t.StencilWriteMask, t.FrontFace, t.BackFace); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, + const D3D12_RASTERIZER_DESC& t) { + key->Record(t.FillMode, t.CullMode, t.FrontCounterClockwise, t.DepthBias, t.DepthBiasClamp, + t.SlopeScaledDepthBias, t.DepthClipEnable, t.MultisampleEnable, + t.AntialiasedLineEnable, t.ForcedSampleCount, t.ConservativeRaster); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, + const D3D12_INPUT_ELEMENT_DESC& t) { + key->Record(t.SemanticName, t.SemanticIndex, t.Format, t.InputSlot, t.AlignedByteOffset, + t.InputSlotClass, t.InstanceDataStepRate); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, + const D3D12_INPUT_LAYOUT_DESC& t) { + key->RecordIterable(t.pInputElementDescs, t.NumElements); +} + +template <> +void CacheKeySerializer::Serialize( + CacheKey* key, + const D3D12_SO_DECLARATION_ENTRY& t) { + key->Record(t.Stream, t.SemanticName, t.SemanticIndex, t.StartComponent, t.ComponentCount, + t.OutputSlot); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, + const D3D12_STREAM_OUTPUT_DESC& t) { + key->RecordIterable(t.pSODeclaration, t.NumEntries) + .RecordIterable(t.pBufferStrides, t.NumStrides) + .Record(t.RasterizedStream); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, const DXGI_SAMPLE_DESC& t) { + key->Record(t.Count, t.Quality); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, + const D3D12_SHADER_BYTECODE& t) { + key->RecordIterable(reinterpret_cast(t.pShaderBytecode), t.BytecodeLength); +} + +template <> +void CacheKeySerializer::Serialize( + CacheKey* key, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC& t) { + // Don't record pRootSignature as we already record the signature blob in pipline layout. + // Don't record CachedPSO as it is in the cached blob. + key->Record(t.VS) + .Record(t.PS) + .Record(t.DS) + .Record(t.HS) + .Record(t.GS) + .Record(t.StreamOutput) + .Record(t.BlendState) + .Record(t.SampleMask) + .Record(t.RasterizerState) + .Record(t.DepthStencilState) + .Record(t.InputLayout) + .Record(t.IBStripCutValue) + .Record(t.PrimitiveTopologyType) + .RecordIterable(t.RTVFormats, t.NumRenderTargets) + .Record(t.DSVFormat) + .Record(t.SampleDesc) + .Record(t.NodeMask) + .Record(t.Flags); +} + +template <> +void CacheKeySerializer::Serialize(CacheKey* key, const ID3DBlob& t) { + // Workaround: GetBufferPointer and GetbufferSize are not marked as const + ID3DBlob* pBlob = const_cast(&t); + key->RecordIterable(reinterpret_cast(pBlob->GetBufferPointer()), + pBlob->GetBufferSize()); +} + +} // namespace dawn::native diff --git a/src/dawn/native/d3d12/ComputePipelineD3D12.cpp b/src/dawn/native/d3d12/ComputePipelineD3D12.cpp index cad0ce5352..2f34338625 100644 --- a/src/dawn/native/d3d12/ComputePipelineD3D12.cpp +++ b/src/dawn/native/d3d12/ComputePipelineD3D12.cpp @@ -55,15 +55,36 @@ MaybeError ComputePipeline::Initialize() { D3D12_COMPUTE_PIPELINE_STATE_DESC d3dDesc = {}; d3dDesc.pRootSignature = ToBackend(GetLayout())->GetRootSignature(); + // TODO(dawn:549): Compile shader everytime before we implement compiled shader cache CompiledShader compiledShader; DAWN_TRY_ASSIGN(compiledShader, module->Compile(computeStage, SingleShaderStage::Compute, ToBackend(GetLayout()), compileFlags)); d3dDesc.CS = compiledShader.GetD3D12ShaderBytecode(); + + mCacheKey.Record(d3dDesc, ToBackend(GetLayout())->GetRootSignatureBlob()); + + // Try to see if we have anything in the blob cache. + CachedBlob blob = device->LoadCachedBlob(GetCacheKey()); + const bool cacheHit = !blob.Empty(); + if (cacheHit) { + // Cache hits, attach cached blob to descriptor. + d3dDesc.CachedPSO.pCachedBlob = blob.Data(); + d3dDesc.CachedPSO.CachedBlobSizeInBytes = blob.Size(); + } + auto* d3d12Device = device->GetD3D12Device(); DAWN_TRY(CheckHRESULT( d3d12Device->CreateComputePipelineState(&d3dDesc, IID_PPV_ARGS(&mPipelineState)), "D3D12 creating pipeline state")); + if (!cacheHit) { + // Cache misses, need to get pipeline cached blob and store. + ComPtr d3dBlob; + DAWN_TRY(CheckHRESULT(GetPipelineState()->GetCachedBlob(&d3dBlob), + "D3D12 compute pipeline state get cached blob")); + device->StoreCachedBlob(GetCacheKey(), CachedBlob::Create(std::move(d3dBlob))); + } + SetLabelImpl(); return {}; diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp index 90ddc3041e..1e00fe1fa7 100644 --- a/src/dawn/native/d3d12/DeviceD3D12.cpp +++ b/src/dawn/native/d3d12/DeviceD3D12.cpp @@ -441,6 +441,9 @@ ResultOrError> Device::CreateTextureViewImpl( const TextureViewDescriptor* descriptor) { return TextureView::Create(texture, descriptor); } +Ref Device::GetOrCreatePipelineCacheImpl(const CacheKey& key) { + UNREACHABLE(); +} void Device::InitializeComputePipelineAsyncImpl(Ref computePipeline, WGPUCreateComputePipelineAsyncCallback callback, void* userdata) { diff --git a/src/dawn/native/d3d12/DeviceD3D12.h b/src/dawn/native/d3d12/DeviceD3D12.h index 99b03e3964..776fce2dfc 100644 --- a/src/dawn/native/d3d12/DeviceD3D12.h +++ b/src/dawn/native/d3d12/DeviceD3D12.h @@ -188,6 +188,7 @@ class Device final : public DeviceBase { const ComputePipelineDescriptor* descriptor) override; Ref CreateUninitializedRenderPipelineImpl( const RenderPipelineDescriptor* descriptor) override; + Ref GetOrCreatePipelineCacheImpl(const CacheKey& key) override; void InitializeComputePipelineAsyncImpl(Ref computePipeline, WGPUCreateComputePipelineAsyncCallback callback, void* userdata) override; diff --git a/src/dawn/native/d3d12/Forward.h b/src/dawn/native/d3d12/Forward.h index ecb10aa614..13f7c81e4a 100644 --- a/src/dawn/native/d3d12/Forward.h +++ b/src/dawn/native/d3d12/Forward.h @@ -27,6 +27,7 @@ class CommandBuffer; class ComputePipeline; class Device; class Heap; +class PipelineCache; class PipelineLayout; class QuerySet; class Queue; @@ -46,6 +47,7 @@ struct D3D12BackendTraits { using CommandBufferType = CommandBuffer; using ComputePipelineType = ComputePipeline; using DeviceType = Device; + using PipelineCacheType = PipelineCache; using PipelineLayoutType = PipelineLayout; using QuerySetType = QuerySet; using QueueType = Queue; diff --git a/src/dawn/native/d3d12/PipelineCacheD3D12.cpp b/src/dawn/native/d3d12/PipelineCacheD3D12.cpp new file mode 100644 index 0000000000..947f92222c --- /dev/null +++ b/src/dawn/native/d3d12/PipelineCacheD3D12.cpp @@ -0,0 +1,33 @@ +// Copyright 2022 The Dawn Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dawn/native/BlobCache.h" +#include "dawn/native/d3d12/d3d12_platform.h" + +namespace dawn::native { + +// static +CachedBlob CachedBlob::Create(ComPtr blob) { + // Detach so the deleter callback can "own" the reference + ID3DBlob* ptr = blob.Detach(); + return CachedBlob(reinterpret_cast(ptr->GetBufferPointer()), ptr->GetBufferSize(), + [=]() { + // Reattach and drop to delete it. + ComPtr b; + b.Attach(ptr); + b = nullptr; + }); +} + +} // namespace dawn::native diff --git a/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp b/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp index 91505c1c6e..636fae233d 100644 --- a/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp +++ b/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp @@ -252,10 +252,9 @@ MaybeError PipelineLayout::Initialize() { rootSignatureDescriptor.pStaticSamplers = nullptr; rootSignatureDescriptor.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - ComPtr signature; ComPtr error; HRESULT hr = device->GetFunctions()->d3d12SerializeRootSignature( - &rootSignatureDescriptor, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error); + &rootSignatureDescriptor, D3D_ROOT_SIGNATURE_VERSION_1, &mRootSignatureBlob, &error); if (DAWN_UNLIKELY(FAILED(hr))) { std::ostringstream messageStream; if (error) { @@ -269,9 +268,10 @@ MaybeError PipelineLayout::Initialize() { DAWN_TRY(CheckHRESULT(hr, messageStream.str().c_str())); } DAWN_TRY(CheckHRESULT(device->GetD3D12Device()->CreateRootSignature( - 0, signature->GetBufferPointer(), signature->GetBufferSize(), - IID_PPV_ARGS(&mRootSignature)), + 0, mRootSignatureBlob->GetBufferPointer(), + mRootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&mRootSignature)), "D3D12 create root signature")); + mCacheKey.Record(mRootSignatureBlob.Get()); return {}; } @@ -310,6 +310,10 @@ ID3D12RootSignature* PipelineLayout::GetRootSignature() const { return mRootSignature.Get(); } +ID3DBlob* PipelineLayout::GetRootSignatureBlob() const { + return mRootSignatureBlob.Get(); +} + const PipelineLayout::DynamicStorageBufferLengthInfo& PipelineLayout::GetDynamicStorageBufferLengthInfo() const { return mDynamicStorageBufferLengthInfo; diff --git a/src/dawn/native/d3d12/PipelineLayoutD3D12.h b/src/dawn/native/d3d12/PipelineLayoutD3D12.h index 5e5360e64b..204741265c 100644 --- a/src/dawn/native/d3d12/PipelineLayoutD3D12.h +++ b/src/dawn/native/d3d12/PipelineLayoutD3D12.h @@ -52,6 +52,8 @@ class PipelineLayout final : public PipelineLayoutBase { ID3D12RootSignature* GetRootSignature() const; + ID3DBlob* GetRootSignatureBlob() const; + ID3D12CommandSignature* GetDispatchIndirectCommandSignatureWithNumWorkgroups(); ID3D12CommandSignature* GetDrawIndirectCommandSignatureWithInstanceVertexOffsets(); @@ -98,6 +100,8 @@ class PipelineLayout final : public PipelineLayoutBase { uint32_t mNumWorkgroupsParameterIndex; uint32_t mDynamicStorageBufferLengthsParameterIndex; ComPtr mRootSignature; + // Store the root signature blob to put in pipeline cachekey + ComPtr mRootSignatureBlob; ComPtr mDispatchIndirectCommandSignatureWithNumWorkgroups; ComPtr mDrawIndirectCommandSignatureWithInstanceVertexOffsets; ComPtr mDrawIndexedIndirectCommandSignatureWithInstanceVertexOffsets; diff --git a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp index 8980b30a81..ce7b347e3a 100644 --- a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp +++ b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp @@ -429,10 +429,29 @@ MaybeError RenderPipeline::Initialize() { mD3d12PrimitiveTopology = D3D12PrimitiveTopology(GetPrimitiveTopology()); + mCacheKey.Record(descriptorD3D12, *layout->GetRootSignatureBlob()); + + // Try to see if we have anything in the blob cache. + CachedBlob blob = device->LoadCachedBlob(GetCacheKey()); + const bool cacheHit = !blob.Empty(); + if (cacheHit) { + // Cache hits, attach cached blob to descriptor. + descriptorD3D12.CachedPSO.pCachedBlob = blob.Data(); + descriptorD3D12.CachedPSO.CachedBlobSizeInBytes = blob.Size(); + } + DAWN_TRY(CheckHRESULT(device->GetD3D12Device()->CreateGraphicsPipelineState( &descriptorD3D12, IID_PPV_ARGS(&mPipelineState)), "D3D12 create graphics pipeline state")); + if (!cacheHit) { + // Cache misses, need to get pipeline cached blob and store. + ComPtr d3dBlob; + DAWN_TRY(CheckHRESULT(GetPipelineState()->GetCachedBlob(&d3dBlob), + "D3D12 render pipeline state get cached blob")); + device->StoreCachedBlob(GetCacheKey(), CachedBlob::Create(std::move(d3dBlob))); + } + SetLabelImpl(); return {}; diff --git a/src/dawn/native/metal/BufferMTL.mm b/src/dawn/native/metal/BufferMTL.mm index 42f8b7ee7e..e49fc2e242 100644 --- a/src/dawn/native/metal/BufferMTL.mm +++ b/src/dawn/native/metal/BufferMTL.mm @@ -53,6 +53,8 @@ uint64_t Buffer::QueryMaxBufferLength(id mtlDevice) { if (@available(macOS 10.11, *)) { return 256 * 1024 * 1024; } + // 256Mb for other platform if any. (Need to have a return for all branches). + return 256 * 1024 * 1024; #else // macOS / tvOS: 256Mb limit in versions without [MTLDevice maxBufferLength] return 256 * 1024 * 1024; diff --git a/src/dawn/native/vulkan/PipelineCacheVk.cpp b/src/dawn/native/vulkan/PipelineCacheVk.cpp index 60617ec17e..2bb1039466 100644 --- a/src/dawn/native/vulkan/PipelineCacheVk.cpp +++ b/src/dawn/native/vulkan/PipelineCacheVk.cpp @@ -50,10 +50,10 @@ VkPipelineCache PipelineCache::GetHandle() const { return mHandle; } -ResultOrError PipelineCache::SerializeToBlobImpl() { - CachedBlob emptyBlob; +MaybeError PipelineCache::SerializeToBlobImpl(CachedBlob* blob) { if (mHandle == VK_NULL_HANDLE) { - return emptyBlob; + // Pipeline cache isn't created successfully + return {}; } size_t bufferSize; @@ -61,12 +61,13 @@ ResultOrError PipelineCache::SerializeToBlobImpl() { DAWN_TRY(CheckVkSuccess( device->fn.GetPipelineCacheData(device->GetVkDevice(), mHandle, &bufferSize, nullptr), "GetPipelineCacheData")); - - CachedBlob blob(bufferSize); - DAWN_TRY(CheckVkSuccess( - device->fn.GetPipelineCacheData(device->GetVkDevice(), mHandle, &bufferSize, blob.Data()), - "GetPipelineCacheData")); - return blob; + if (bufferSize > 0) { + *blob = CachedBlob::Create(bufferSize); + DAWN_TRY(CheckVkSuccess(device->fn.GetPipelineCacheData(device->GetVkDevice(), mHandle, + &bufferSize, blob->Data()), + "GetPipelineCacheData")); + } + return {}; } void PipelineCache::Initialize() { diff --git a/src/dawn/native/vulkan/PipelineCacheVk.h b/src/dawn/native/vulkan/PipelineCacheVk.h index 7e56175a96..85a889171e 100644 --- a/src/dawn/native/vulkan/PipelineCacheVk.h +++ b/src/dawn/native/vulkan/PipelineCacheVk.h @@ -38,7 +38,7 @@ class PipelineCache final : public PipelineCacheBase { ~PipelineCache() override; void Initialize(); - ResultOrError SerializeToBlobImpl() override; + MaybeError SerializeToBlobImpl(CachedBlob* blob) override; DeviceBase* mDevice; VkPipelineCache mHandle = VK_NULL_HANDLE; diff --git a/src/dawn/tests/end2end/PipelineCachingTests.cpp b/src/dawn/tests/end2end/PipelineCachingTests.cpp index bdf6a1e9ed..94d30c52d9 100644 --- a/src/dawn/tests/end2end/PipelineCachingTests.cpp +++ b/src/dawn/tests/end2end/PipelineCachingTests.cpp @@ -26,18 +26,49 @@ using ::testing::NiceMock; // TODO(dawn:549) Add some sort of pipeline descriptor repository to test more caching. -static constexpr std::string_view kComputeShader = R"( +static constexpr std::string_view kComputeShaderDefault = R"( @stage(compute) @workgroup_size(1) fn main() {} )"; -static constexpr std::string_view kVertexShader = R"( +static constexpr std::string_view kComputeShaderMultipleEntryPoints = R"( + @stage(compute) @workgroup_size(16) fn main() {} + @stage(compute) @workgroup_size(64) fn main2() {} + )"; + +static constexpr std::string_view kVertexShaderDefault = R"( @stage(vertex) fn main() -> @builtin(position) vec4 { return vec4(0.0, 0.0, 0.0, 0.0); } )"; -static constexpr std::string_view kFragmentShader = R"( - @stage(fragment) fn main() {} +static constexpr std::string_view kVertexShaderMultipleEntryPoints = R"( + @stage(vertex) fn main() -> @builtin(position) vec4 { + return vec4(1.0, 0.0, 0.0, 1.0); + } + + @stage(vertex) fn main2() -> @builtin(position) vec4 { + return vec4(0.5, 0.5, 0.5, 1.0); + } + )"; + +static constexpr std::string_view kFragmentShaderDefault = R"( + @stage(fragment) fn main() -> @location(0) vec4 { + return vec4(0.1, 0.2, 0.3, 0.4); + } + )"; + +static constexpr std::string_view kFragmentShaderMultipleOutput = R"( + struct FragmentOut { + @location(0) fragColor0 : vec4, + @location(1) fragColor1 : vec4, + } + + @stage(fragment) fn main() -> FragmentOut { + var output : FragmentOut; + output.fragColor0 = vec4(0.1, 0.2, 0.3, 0.4); + output.fragColor1 = vec4(0.5, 0.6, 0.7, 0.8); + return output; + } )"; class PipelineCachingTests : public DawnTest { @@ -62,7 +93,7 @@ TEST_P(SinglePipelineCachingTests, ComputePipelineNoCache) { { wgpu::Device device = CreateDevice(); wgpu::ComputePipelineDescriptor desc; - desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data()); + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); desc.compute.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); } @@ -72,7 +103,7 @@ TEST_P(SinglePipelineCachingTests, ComputePipelineNoCache) { { wgpu::Device device = CreateDevice(); wgpu::ComputePipelineDescriptor desc; - desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data()); + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); desc.compute.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); } @@ -82,7 +113,7 @@ TEST_P(SinglePipelineCachingTests, ComputePipelineNoCache) { // Tests that pipeline creation on the same device uses frontend cache when possible. TEST_P(SinglePipelineCachingTests, ComputePipelineFrontedCache) { wgpu::ComputePipelineDescriptor desc; - desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data()); + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); desc.compute.entryPoint = "main"; // First creation should create a cache entry. @@ -106,7 +137,7 @@ TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCache) { { wgpu::Device device = CreateDevice(); wgpu::ComputePipelineDescriptor desc; - desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data()); + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); desc.compute.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); } @@ -116,13 +147,74 @@ TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCache) { { wgpu::Device device = CreateDevice(); wgpu::ComputePipelineDescriptor desc; - desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data()); + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); desc.compute.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 1u, device.CreateComputePipeline(&desc)); } EXPECT_EQ(mMockCache.GetNumEntries(), 1u); } +// Tests that pipeline creation hits the cache when using the same pipeline but with explicit +// layout. +TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCacheExplictLayout) { + // First time should create and write out to the cache. + { + wgpu::Device device = CreateDevice(); + wgpu::ComputePipelineDescriptor desc; + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); + desc.compute.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), 1u); + + // Cache should hit: use the same pipeline but with explicit pipeline layout. + { + wgpu::Device device = CreateDevice(); + wgpu::ComputePipelineDescriptor desc; + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); + desc.compute.entryPoint = "main"; + desc.layout = utils::MakeBasicPipelineLayout(device, {}); + EXPECT_CACHE_HIT(mMockCache, 1u, device.CreateComputePipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), 1u); +} + +// Tests that pipeline creation wouldn't hit the cache if the pipelines are not exactly the same. +TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCacheShaderNegativeCases) { + size_t numCacheEntries = 0u; + // First time should create and write out to the cache. + { + wgpu::Device device = CreateDevice(); + wgpu::ComputePipelineDescriptor desc; + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); + desc.compute.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); + + // Cache should not hit: different shader module. + { + wgpu::Device device = CreateDevice(); + wgpu::ComputePipelineDescriptor desc; + desc.compute.module = + utils::CreateShaderModule(device, kComputeShaderMultipleEntryPoints.data()); + desc.compute.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); + + // Cache should not hit: same shader module but different shader entry point. + { + wgpu::Device device = CreateDevice(); + wgpu::ComputePipelineDescriptor desc; + desc.compute.module = + utils::CreateShaderModule(device, kComputeShaderMultipleEntryPoints.data()); + desc.compute.entryPoint = "main2"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); +} + // Tests that pipeline creation does not hits the cache when it is enabled but we use different // isolation keys. TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCacheIsolationKey) { @@ -130,7 +222,7 @@ TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCacheIsolationKey) { { wgpu::Device device = CreateDevice("isolation key 1"); wgpu::ComputePipelineDescriptor desc; - desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data()); + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); desc.compute.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); } @@ -140,7 +232,7 @@ TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCacheIsolationKey) { { wgpu::Device device = CreateDevice("isolation key 2"); wgpu::ComputePipelineDescriptor desc; - desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data()); + desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data()); desc.compute.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc)); } @@ -158,10 +250,9 @@ TEST_P(SinglePipelineCachingTests, RenderPipelineNoCache) { { wgpu::Device device = CreateDevice(); utils::ComboRenderPipelineDescriptor desc; - desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None; - desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data()); + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); desc.vertex.entryPoint = "main"; - desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data()); + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); desc.cFragment.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); } @@ -171,10 +262,9 @@ TEST_P(SinglePipelineCachingTests, RenderPipelineNoCache) { { wgpu::Device device = CreateDevice(); utils::ComboRenderPipelineDescriptor desc; - desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None; - desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data()); + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); desc.vertex.entryPoint = "main"; - desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data()); + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); desc.cFragment.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); } @@ -184,10 +274,9 @@ TEST_P(SinglePipelineCachingTests, RenderPipelineNoCache) { // Tests that pipeline creation on the same device uses frontend cache when possible. TEST_P(SinglePipelineCachingTests, RenderPipelineFrontedCache) { utils::ComboRenderPipelineDescriptor desc; - desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None; - desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data()); + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); desc.vertex.entryPoint = "main"; - desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data()); + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); desc.cFragment.entryPoint = "main"; // First creation should create a cache entry. @@ -211,10 +300,9 @@ TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCache) { { wgpu::Device device = CreateDevice(); utils::ComboRenderPipelineDescriptor desc; - desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None; - desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data()); + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); desc.vertex.entryPoint = "main"; - desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data()); + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); desc.cFragment.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); } @@ -224,16 +312,172 @@ TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCache) { { wgpu::Device device = CreateDevice(); utils::ComboRenderPipelineDescriptor desc; - desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None; - desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data()); + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); desc.vertex.entryPoint = "main"; - desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data()); + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); desc.cFragment.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 1u, device.CreateRenderPipeline(&desc)); } EXPECT_EQ(mMockCache.GetNumEntries(), 1u); } +// Tests that pipeline creation hits the cache when using the same pipeline but with explicit +// layout. +TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheExplictLayout) { + // First time should create and write out to the cache. + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), 1u); + + // Cache should hit: use the same pipeline but with explicit pipeline layout. + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); + desc.cFragment.entryPoint = "main"; + desc.layout = utils::MakeBasicPipelineLayout(device, {}); + EXPECT_CACHE_HIT(mMockCache, 1u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), 1u); +} + +// Tests that pipeline creation wouldn't hit the cache if the pipelines have different state set in +// the descriptor. +TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheDescriptorNegativeCases) { + // First time should create and write out to the cache. + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), 1u); + + // Cache should not hit: different pipeline descriptor state. + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.primitive.topology = wgpu::PrimitiveTopology::PointList; + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), 2u); +} + +// Tests that pipeline creation wouldn't hit the cache if the pipelines are not exactly the same in +// terms of shader. +TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheShaderNegativeCases) { + size_t numCacheEntries = 0u; + // First time should create and write out to the cache. + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); + + // Cache should not hit: different shader module. + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.vertex.module = + utils::CreateShaderModule(device, kVertexShaderMultipleEntryPoints.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); + + // Cache should not hit: same shader module but different shader entry point. + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.vertex.module = + utils::CreateShaderModule(device, kVertexShaderMultipleEntryPoints.data()); + desc.vertex.entryPoint = "main2"; + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); +} + +// Tests that pipeline creation wouldn't hit the cache if the pipelines are not exactly the same +// (fragment color targets differences). +TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheNegativeCasesFragmentColorTargets) { + size_t numCacheEntries = 0u; + // First time should create and write out to the cache. + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.cFragment.targetCount = 2; + desc.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm; + desc.cTargets[1].writeMask = wgpu::ColorWriteMask::None; + desc.cTargets[1].format = wgpu::TextureFormat::RGBA8Unorm; + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = + utils::CreateShaderModule(device, kFragmentShaderMultipleOutput.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); + + // Cache should not hit: different fragment color target state (sparse). + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.cFragment.targetCount = 2; + desc.cTargets[0].format = wgpu::TextureFormat::Undefined; + desc.cTargets[1].writeMask = wgpu::ColorWriteMask::None; + desc.cTargets[1].format = wgpu::TextureFormat::RGBA8Unorm; + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = + utils::CreateShaderModule(device, kFragmentShaderMultipleOutput.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); + + // Cache should not hit: different fragment color target state (trailing empty). + { + wgpu::Device device = CreateDevice(); + utils::ComboRenderPipelineDescriptor desc; + desc.cFragment.targetCount = 2; + desc.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm; + desc.cTargets[1].writeMask = wgpu::ColorWriteMask::None; + desc.cTargets[1].format = wgpu::TextureFormat::Undefined; + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); + desc.vertex.entryPoint = "main"; + desc.cFragment.module = + utils::CreateShaderModule(device, kFragmentShaderMultipleOutput.data()); + desc.cFragment.entryPoint = "main"; + EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); + } + EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries); +} + // Tests that pipeline creation does not hits the cache when it is enabled but we use different // isolation keys. TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheIsolationKey) { @@ -241,10 +485,9 @@ TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheIsolationKey) { { wgpu::Device device = CreateDevice("isolation key 1"); utils::ComboRenderPipelineDescriptor desc; - desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None; - desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data()); + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); desc.vertex.entryPoint = "main"; - desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data()); + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); desc.cFragment.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); } @@ -254,16 +497,17 @@ TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheIsolationKey) { { wgpu::Device device = CreateDevice("isolation key 2"); utils::ComboRenderPipelineDescriptor desc; - desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None; - desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data()); + desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data()); desc.vertex.entryPoint = "main"; - desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data()); + desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data()); desc.cFragment.entryPoint = "main"; EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc)); } EXPECT_EQ(mMockCache.GetNumEntries(), 2u); } -DAWN_INSTANTIATE_TEST(SinglePipelineCachingTests, VulkanBackend({"enable_blob_cache"})); +DAWN_INSTANTIATE_TEST(SinglePipelineCachingTests, + VulkanBackend({"enable_blob_cache"}), + D3D12Backend({"enable_blob_cache"})); } // namespace