diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp index 8d06963136..55fc22778a 100644 --- a/src/dawn_native/CommandEncoder.cpp +++ b/src/dawn_native/CommandEncoder.cpp @@ -467,16 +467,7 @@ namespace dawn_native { } // Set the query at queryIndex to available for resolving in query set. - querySet->SetQueryAvailability(queryIndex, 1); - - // Gets the iterator for that querySet or create a new vector of bool set to false - // if the querySet wasn't registered. - auto it = mQueryAvailabilityMap.emplace(querySet, querySet->GetQueryCount()).first; - it->second[queryIndex] = 1; - } - - const QueryAvailabilityMap& CommandEncoder::GetQueryAvailabilityMap() const { - return mQueryAvailabilityMap; + querySet->SetQueryAvailability(queryIndex, true); } // Implementation of the API's command recording methods diff --git a/src/dawn_native/CommandEncoder.h b/src/dawn_native/CommandEncoder.h index c422e9830e..a8bf6a0d28 100644 --- a/src/dawn_native/CommandEncoder.h +++ b/src/dawn_native/CommandEncoder.h @@ -22,13 +22,10 @@ #include "dawn_native/ObjectBase.h" #include "dawn_native/PassResourceUsage.h" -#include #include namespace dawn_native { - using QueryAvailabilityMap = std::map>; - class CommandEncoder final : public ObjectBase { public: CommandEncoder(DeviceBase* device, const CommandEncoderDescriptor* descriptor); @@ -38,7 +35,6 @@ namespace dawn_native { void TrackUsedQuerySet(QuerySetBase* querySet); void TrackQueryAvailability(QuerySetBase* querySet, uint32_t queryIndex); - const QueryAvailabilityMap& GetQueryAvailabilityMap() const; // Dawn API ComputePassEncoder* APIBeginComputePass(const ComputePassDescriptor* descriptor); @@ -84,7 +80,6 @@ namespace dawn_native { std::set mTopLevelBuffers; std::set mTopLevelTextures; std::set mUsedQuerySets; - QueryAvailabilityMap mQueryAvailabilityMap; uint64_t mDebugGroupStackSize = 0; }; diff --git a/src/dawn_native/PassResourceUsage.h b/src/dawn_native/PassResourceUsage.h index f0aa43a059..772f8c2e34 100644 --- a/src/dawn_native/PassResourceUsage.h +++ b/src/dawn_native/PassResourceUsage.h @@ -42,6 +42,9 @@ namespace dawn_native { std::vector textures; std::vector textureUsages; + + std::vector querySets; + std::vector> queryAvailabilities; }; using PerPassUsages = std::vector; diff --git a/src/dawn_native/PassResourceUsageTracker.cpp b/src/dawn_native/PassResourceUsageTracker.cpp index 5816280a2a..81d72edf40 100644 --- a/src/dawn_native/PassResourceUsageTracker.cpp +++ b/src/dawn_native/PassResourceUsageTracker.cpp @@ -17,6 +17,7 @@ #include "dawn_native/Buffer.h" #include "dawn_native/EnumMaskIterator.h" #include "dawn_native/Format.h" +#include "dawn_native/QuerySet.h" #include "dawn_native/Texture.h" #include @@ -65,6 +66,23 @@ namespace dawn_native { const wgpu::TextureUsage& addedUsage) { *storedUsage |= addedUsage; }); } + void PassResourceUsageTracker::TrackQueryAvailability(QuerySetBase* querySet, + uint32_t queryIndex) { + // The query availability only need to be tracked again on render pass for checking query + // overwrite on render pass and resetting query set on Vulkan backend. + DAWN_ASSERT(mPassType == PassType::Render); + DAWN_ASSERT(querySet != nullptr); + + // Gets the iterator for that querySet or create a new vector of bool set to false + // if the querySet wasn't registered. + auto it = mQueryAvailabilities.emplace(querySet, querySet->GetQueryCount()).first; + it->second[queryIndex] = true; + } + + const QueryAvailabilityMap& PassResourceUsageTracker::GetQueryAvailabilityMap() const { + return mQueryAvailabilities; + } + // Returns the per-pass usage for use by backends for APIs with explicit barriers. PassResourceUsage PassResourceUsageTracker::AcquireResourceUsage() { PassResourceUsage result; @@ -73,6 +91,8 @@ namespace dawn_native { result.bufferUsages.reserve(mBufferUsages.size()); result.textures.reserve(mTextureUsages.size()); result.textureUsages.reserve(mTextureUsages.size()); + result.querySets.reserve(mQueryAvailabilities.size()); + result.queryAvailabilities.reserve(mQueryAvailabilities.size()); for (auto& it : mBufferUsages) { result.buffers.push_back(it.first); @@ -84,8 +104,14 @@ namespace dawn_native { result.textureUsages.push_back(std::move(it.second)); } + for (auto& it : mQueryAvailabilities) { + result.querySets.push_back(it.first); + result.queryAvailabilities.push_back(std::move(it.second)); + } + mBufferUsages.clear(); mTextureUsages.clear(); + mQueryAvailabilities.clear(); return result; } diff --git a/src/dawn_native/PassResourceUsageTracker.h b/src/dawn_native/PassResourceUsageTracker.h index cfcaa225e1..cd54f8c346 100644 --- a/src/dawn_native/PassResourceUsageTracker.h +++ b/src/dawn_native/PassResourceUsageTracker.h @@ -24,8 +24,11 @@ namespace dawn_native { class BufferBase; + class QuerySetBase; class TextureBase; + using QueryAvailabilityMap = std::map>; + // Helper class to encapsulate the logic of tracking per-resource usage during the // validation of command buffer passes. It is used both to know if there are validation // errors, and to get a list of resources used per pass for backends that need the @@ -36,6 +39,8 @@ namespace dawn_native { void BufferUsedAs(BufferBase* buffer, wgpu::BufferUsage usage); void TextureViewUsedAs(TextureViewBase* texture, wgpu::TextureUsage usage); void AddTextureUsage(TextureBase* texture, const PassTextureUsage& textureUsage); + void TrackQueryAvailability(QuerySetBase* querySet, uint32_t queryIndex); + const QueryAvailabilityMap& GetQueryAvailabilityMap() const; // Returns the per-pass usage for use by backends for APIs with explicit barriers. PassResourceUsage AcquireResourceUsage(); @@ -44,6 +49,10 @@ namespace dawn_native { PassType mPassType; std::map mBufferUsages; std::map mTextureUsages; + // Dedicated to track the availability of the queries used on render pass. The same query + // cannot be written twice in same render pass, so each render pass also need to have its + // own query availability map for validation. + QueryAvailabilityMap mQueryAvailabilities; }; } // namespace dawn_native diff --git a/src/dawn_native/RenderPassEncoder.cpp b/src/dawn_native/RenderPassEncoder.cpp index 3c9e527cf9..4c48bb43e8 100644 --- a/src/dawn_native/RenderPassEncoder.cpp +++ b/src/dawn_native/RenderPassEncoder.cpp @@ -80,19 +80,14 @@ namespace dawn_native { void RenderPassEncoder::TrackQueryAvailability(QuerySetBase* querySet, uint32_t queryIndex) { DAWN_ASSERT(querySet != nullptr); - // Gets the iterator for that querySet or create a new vector of bool set to false - // if the querySet wasn't registered. - auto it = mQueryAvailabilityMap.emplace(querySet, querySet->GetQueryCount()).first; - it->second[queryIndex] = 1; + // Track the query availability with true on render pass for rewrite validation and query + // reset on render pass on Vulkan + mUsageTracker.TrackQueryAvailability(querySet, queryIndex); // Track it again on command encoder for zero-initializing when resolving unused queries. mCommandEncoder->TrackQueryAvailability(querySet, queryIndex); } - const QueryAvailabilityMap& RenderPassEncoder::GetQueryAvailabilityMap() const { - return mQueryAvailabilityMap; - } - void RenderPassEncoder::APIEndPass() { if (mEncodingContext->TryEncode(this, [&](CommandAllocator* allocator) -> MaybeError { if (IsValidationEnabled()) { @@ -254,9 +249,7 @@ namespace dawn_native { } DAWN_TRY(ValidateQueryIndexOverwrite(mOcclusionQuerySet.Get(), queryIndex, - GetQueryAvailabilityMap())); - - mCommandEncoder->TrackUsedQuerySet(mOcclusionQuerySet.Get()); + mUsageTracker.GetQueryAvailabilityMap())); } // Record the current query index for endOcclusionQuery. @@ -283,6 +276,7 @@ namespace dawn_native { } TrackQueryAvailability(mOcclusionQuerySet.Get(), mCurrentOcclusionQueryIndex); + mOcclusionQueryActive = false; EndOcclusionQueryCmd* cmd = @@ -299,8 +293,8 @@ namespace dawn_native { if (IsValidationEnabled()) { DAWN_TRY(GetDevice()->ValidateObject(querySet)); DAWN_TRY(ValidateTimestampQuery(querySet, queryIndex)); - DAWN_TRY( - ValidateQueryIndexOverwrite(querySet, queryIndex, GetQueryAvailabilityMap())); + DAWN_TRY(ValidateQueryIndexOverwrite(querySet, queryIndex, + mUsageTracker.GetQueryAvailabilityMap())); } TrackQueryAvailability(querySet, queryIndex); diff --git a/src/dawn_native/RenderPassEncoder.h b/src/dawn_native/RenderPassEncoder.h index 08a24f4215..a8bf460548 100644 --- a/src/dawn_native/RenderPassEncoder.h +++ b/src/dawn_native/RenderPassEncoder.h @@ -37,9 +37,6 @@ namespace dawn_native { CommandEncoder* commandEncoder, EncodingContext* encodingContext); - void TrackQueryAvailability(QuerySetBase* querySet, uint32_t queryIndex); - const QueryAvailabilityMap& GetQueryAvailabilityMap() const; - void APIEndPass(); void APISetStencilReference(uint32_t reference); @@ -65,6 +62,8 @@ namespace dawn_native { ErrorTag errorTag); private: + void TrackQueryAvailability(QuerySetBase* querySet, uint32_t queryIndex); + // For render and compute passes, the encoding context is borrowed from the command encoder. // Keep a reference to the encoder to make sure the context isn't freed. Ref mCommandEncoder; @@ -72,11 +71,6 @@ namespace dawn_native { uint32_t mRenderTargetWidth; uint32_t mRenderTargetHeight; - // This map is to indicate the availability of the queries used in render pass. The same - // query cannot be written twice in same render pass, so each render pass also need to have - // its own query availability map. - QueryAvailabilityMap mQueryAvailabilityMap; - // The resources for occlusion query Ref mOcclusionQuerySet; uint32_t mCurrentOcclusionQueryIndex = 0; diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp index 0986db8fc7..47fac441a5 100644 --- a/src/dawn_native/vulkan/CommandBufferVk.cpp +++ b/src/dawn_native/vulkan/CommandBufferVk.cpp @@ -369,13 +369,36 @@ namespace dawn_native { namespace vulkan { return {}; } - void ResetUsedQuerySets(Device* device, - VkCommandBuffer commands, - const std::set& usedQuerySets) { - // TODO(hao.x.li@intel.com): Reset the queries based on the used indexes. - for (QuerySetBase* querySet : usedQuerySets) { - device->fn.CmdResetQueryPool(commands, ToBackend(querySet)->GetHandle(), 0, - querySet->GetQueryCount()); + // Reset the query sets used on render pass because the reset command must be called outside + // render pass. + void ResetUsedQuerySetsOnRenderPass(Device* device, + VkCommandBuffer commands, + QuerySetBase* querySet, + const std::vector& availability) { + ASSERT(availability.size() == querySet->GetQueryAvailability().size()); + + auto currentIt = availability.begin(); + auto lastIt = availability.end(); + // Traverse the used queries which availability are true. + while (currentIt != lastIt) { + auto firstTrueIt = std::find(currentIt, lastIt, true); + // No used queries need to be reset + if (firstTrueIt == lastIt) { + break; + } + + auto nextFalseIt = std::find(firstTrueIt, lastIt, false); + + uint32_t queryIndex = std::distance(availability.begin(), firstTrueIt); + uint32_t queryCount = std::distance(firstTrueIt, nextFalseIt); + + // Reset the queries between firstTrueIt and nextFalseIt (which is at most + // lastIt) + device->fn.CmdResetQueryPool(commands, ToBackend(querySet)->GetHandle(), queryIndex, + queryCount); + + // Set current iterator to next false + currentIt = nextFalseIt; } } @@ -425,7 +448,7 @@ namespace dawn_native { namespace vulkan { destination->GetHandle(), resolveDestinationOffset, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); - // Set current interator to next false + // Set current iterator to next false currentIt = nextFalseIt; } } @@ -504,7 +527,8 @@ namespace dawn_native { namespace vulkan { Device* device = ToBackend(GetDevice()); VkCommandBuffer commands = recordingContext->commandBuffer; - // Records the necessary barriers for the resource usage pre-computed by the frontend + // Records the necessary barriers for the resource usage pre-computed by the frontend. + // And resets the used query sets which are rewritten on the render pass. auto PrepareResourcesForRenderPass = [](Device* device, CommandRecordingContext* recordingContext, const PassResourceUsage& usages) { @@ -546,6 +570,13 @@ namespace dawn_native { namespace vulkan { bufferBarriers.data(), imageBarriers.size(), imageBarriers.data()); } + + // Reset all query set used on current render pass together before beginning render pass + // because the reset command must be called outside render pass + for (size_t i = 0; i < usages.querySets.size(); ++i) { + ResetUsedQuerySetsOnRenderPass(device, recordingContext->commandBuffer, + usages.querySets[i], usages.queryAvailabilities[i]); + } }; // TODO(jiawei.shao@intel.com): move the resource lazy clearing inside the barrier tracking @@ -568,9 +599,6 @@ namespace dawn_native { namespace vulkan { const std::vector& passResourceUsages = GetResourceUsages().perPass; size_t nextPassNumber = 0; - // QuerySet must be reset between uses. - ResetUsedQuerySets(device, commands, GetResourceUsages().usedQuerySets); - Command type; while (mCommands.NextCommandId(&type)) { switch (type) { @@ -772,10 +800,15 @@ namespace dawn_native { namespace vulkan { QuerySet* querySet = ToBackend(cmd->querySet.Get()); Buffer* destination = ToBackend(cmd->destination.Get()); + // TODO(hao.x.li@intel.com): Clear the resolve region of the buffer to 0 if at + // least one query is unavailable for the resolving and the resolve buffer has + // been initialized or fully used. + destination->EnsureDataInitializedAsDestination( recordingContext, cmd->destinationOffset, cmd->queryCount * sizeof(uint64_t)); - destination->TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); + destination->TransitionUsageNow(recordingContext, + wgpu::BufferUsage::QueryResolve); RecordResolveQuerySetCmd(commands, device, querySet, cmd->firstQuery, cmd->queryCount, destination, cmd->destinationOffset); @@ -786,6 +819,10 @@ namespace dawn_native { namespace vulkan { case Command::WriteTimestamp: { WriteTimestampCmd* cmd = mCommands.NextCommand(); + // The query must be reset between uses. + device->fn.CmdResetQueryPool(commands, ToBackend(cmd->querySet)->GetHandle(), + cmd->queryIndex, 1); + RecordWriteTimestampCmd(recordingContext, device, cmd); break; } @@ -960,6 +997,10 @@ namespace dawn_native { namespace vulkan { case Command::WriteTimestamp: { WriteTimestampCmd* cmd = mCommands.NextCommand(); + // The query must be reset between uses. + device->fn.CmdResetQueryPool(commands, ToBackend(cmd->querySet)->GetHandle(), + cmd->queryIndex, 1); + RecordWriteTimestampCmd(recordingContext, device, cmd); break; } diff --git a/src/tests/end2end/QueryTests.cpp b/src/tests/end2end/QueryTests.cpp index 1f6f37aa39..8537c539b6 100644 --- a/src/tests/end2end/QueryTests.cpp +++ b/src/tests/end2end/QueryTests.cpp @@ -34,7 +34,8 @@ class QueryTests : public DawnTest { }; // Clear the content of the result buffer into 0xFFFFFFFF. -constexpr static uint64_t kSentinelValue = ~uint64_t(0); +constexpr static uint64_t kSentinelValue = ~uint64_t(0u); +constexpr static uint64_t kZero = 0u; class OcclusionExpectation : public detail::Expectation { public: @@ -78,6 +79,7 @@ class OcclusionQueryTests : public QueryTests { void SetUp() override { DawnTest::SetUp(); + // Create basic render pipeline vsModule = utils::CreateShaderModule(device, R"( [[builtin(vertex_index)]] var VertexIndex : u32; [[builtin(position)]] var Position : vec4; @@ -94,6 +96,12 @@ class OcclusionQueryTests : public QueryTests { [[stage(fragment)]] fn main() -> void { fragColor = vec4(0.0, 1.0, 0.0, 1.0); })"); + + utils::ComboRenderPipelineDescriptor2 descriptor; + descriptor.vertex.module = vsModule; + descriptor.cFragment.module = fsModule; + + pipeline = device.CreateRenderPipeline2(&descriptor); } struct ScissorRect { @@ -121,6 +129,8 @@ class OcclusionQueryTests : public QueryTests { void TestOcclusionQueryWithDepthStencilTest(bool depthTestEnabled, bool stencilTestEnabled, OcclusionExpectation::Result expected) { + constexpr uint32_t kQueryCount = 1; + utils::ComboRenderPipelineDescriptor2 descriptor; descriptor.vertex.module = vsModule; descriptor.cFragment.module = fsModule; @@ -135,7 +145,7 @@ class OcclusionQueryTests : public QueryTests { depthStencil->stencilBack.compare = stencilTestEnabled ? wgpu::CompareFunction::Never : wgpu::CompareFunction::Always; - wgpu::RenderPipeline pipeline = device.CreateRenderPipeline2(&descriptor); + wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline2(&descriptor); wgpu::Texture renderTarget = CreateRenderTexture(wgpu::TextureFormat::RGBA8Unorm); wgpu::TextureView renderTargetView = renderTarget.CreateView(); @@ -154,7 +164,7 @@ class OcclusionQueryTests : public QueryTests { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass); - pass.SetPipeline(pipeline); + pass.SetPipeline(renderPipeline); pass.SetStencilReference(0); pass.BeginOcclusionQuery(0); pass.Draw(3); @@ -170,11 +180,7 @@ class OcclusionQueryTests : public QueryTests { void TestOcclusionQueryWithScissorTest(ScissorRect rect, OcclusionExpectation::Result expected) { - utils::ComboRenderPipelineDescriptor2 descriptor; - descriptor.vertex.module = vsModule; - descriptor.cFragment.module = fsModule; - - wgpu::RenderPipeline pipeline = device.CreateRenderPipeline2(&descriptor); + constexpr uint32_t kQueryCount = 1; wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount); wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); @@ -204,18 +210,19 @@ class OcclusionQueryTests : public QueryTests { wgpu::ShaderModule vsModule; wgpu::ShaderModule fsModule; + wgpu::RenderPipeline pipeline; + constexpr static unsigned int kRTSize = 4; - constexpr static uint32_t kQueryCount = 1; }; // Test creating query set with the type of Occlusion TEST_P(OcclusionQueryTests, QuerySetCreation) { - CreateOcclusionQuerySet(kQueryCount); + CreateOcclusionQuerySet(1); } // Test destroying query set TEST_P(OcclusionQueryTests, QuerySetDestroy) { - wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount); + wgpu::QuerySet querySet = CreateOcclusionQuerySet(1); querySet.Destroy(); } @@ -257,6 +264,189 @@ TEST_P(OcclusionQueryTests, QueryWithScissorTest) { TestOcclusionQueryWithScissorTest({0, 0, 2, 1}, OcclusionExpectation::Result::Zero); } +// Test begin occlusion query with same query index on different render pass +TEST_P(OcclusionQueryTests, Rewrite) { + constexpr uint32_t kQueryCount = 1; + + wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + // Set all bits in buffer to check 0 is correctly written if there is no sample passed the + // occlusion testing + queue.WriteBuffer(destination, 0, &kSentinelValue, sizeof(kSentinelValue)); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, kRTSize, kRTSize); + renderPass.renderPassInfo.occlusionQuerySet = querySet; + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + + // Begin occlusion without draw call + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.BeginOcclusionQuery(0); + pass.EndOcclusionQuery(); + pass.EndPass(); + + // Begin occlusion with same query index with draw call + wgpu::RenderPassEncoder rewritePass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + rewritePass.SetPipeline(pipeline); + rewritePass.BeginOcclusionQuery(0); + rewritePass.Draw(3); + rewritePass.EndOcclusionQuery(); + rewritePass.EndPass(); + + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_BUFFER(destination, 0, sizeof(uint64_t), + new OcclusionExpectation(OcclusionExpectation::Result::NonZero)); +} + +// Test resolving occlusion query correctly if the queries are written sparsely, which also tests +// the query resetting at the start of render passes on Vulkan backend. +TEST_P(OcclusionQueryTests, ResolveSparseQueries) { + // TODO(hao.x.li@intel.com): Clear the resolve region of the buffer to 0 if there is at least + // one query not written and the resolve buffer has been initialized or fully used. + DAWN_SKIP_TEST_IF(IsVulkan()); + + // TODO(hao.x.li@intel.com): Investigate why it's failed on D3D12 on Nvidia when running with + // the previous occlusion tests. Expect resolve to 0 for these unwritten queries but the + // occlusion result of the previous tests is got. + DAWN_SKIP_TEST_IF(IsD3D12() & IsNvidia()); + + constexpr uint32_t kQueryCount = 7; + + wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + // Set sentinel values to check the queries are resolved correctly if the queries are + // written sparsely. + std::vector sentinelValues(kQueryCount, kSentinelValue); + queue.WriteBuffer(destination, 0, sentinelValues.data(), kQueryCount * sizeof(uint64_t)); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, kRTSize, kRTSize); + renderPass.renderPassInfo.occlusionQuerySet = querySet; + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.SetPipeline(pipeline); + + // Write queries sparsely for testing the query resetting on Vulkan and resolving unwritten + // queries to 0. + // 0 - not written (tests starting with not written). + // 1 - written (tests combing multiple written, although other tests already do it). + // 2 - written. + // 3 - not written (tests skipping over not written in the middle). + // 4 - not written. + // 5 - written (tests another written query in the middle). + // 6 - not written (tests the last query not being written). + pass.BeginOcclusionQuery(1); + pass.Draw(3); + pass.EndOcclusionQuery(); + pass.BeginOcclusionQuery(2); + pass.Draw(3); + pass.EndOcclusionQuery(); + pass.BeginOcclusionQuery(5); + pass.Draw(3); + pass.EndOcclusionQuery(); + pass.EndPass(); + + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + // The query at index 0 should be resolved to 0. + EXPECT_BUFFER_U64_RANGE_EQ(&kZero, destination, 0, 1); + EXPECT_BUFFER(destination, sizeof(uint64_t), 2 * sizeof(uint64_t), + new OcclusionExpectation(OcclusionExpectation::Result::NonZero)); + // The queries at index 3 and 4 should be resolved to 0. + std::vector zeros(2, kZero); + EXPECT_BUFFER_U64_RANGE_EQ(zeros.data(), destination, 3 * sizeof(uint64_t), 2); + EXPECT_BUFFER(destination, 5 * sizeof(uint64_t), sizeof(uint64_t), + new OcclusionExpectation(OcclusionExpectation::Result::NonZero)); + // The query at index 6 should be resolved to 0. + EXPECT_BUFFER_U64_RANGE_EQ(&kZero, destination, 6 * sizeof(uint64_t), 1); +} + +// Test resolving occlusion query to 0 if all queries are not written +TEST_P(OcclusionQueryTests, ResolveWithoutWritten) { + // TODO(hao.x.li@intel.com): Clear the resolve region of the buffer to 0 if there is at least + // one query not written and the resolve buffer has been initialized or fully used. + DAWN_SKIP_TEST_IF(IsVulkan()); + + // TODO(hao.x.li@intel.com): Investigate why it's failed on D3D12 on Nvidia when running with + // the previous occlusion tests. Expect resolve to 0 but the occlusion result of the previous + // tests is got. + DAWN_SKIP_TEST_IF(IsD3D12() & IsNvidia()); + + constexpr uint32_t kQueryCount = 1; + + wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + // Set sentinel values to check 0 is correctly written if resolving query set without + // any written. + queue.WriteBuffer(destination, 0, &kSentinelValue, sizeof(kSentinelValue)); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_BUFFER_U64_RANGE_EQ(&kZero, destination, 0, 1); +} + +// Test resolving occlusion query to the destination buffer with offset +TEST_P(OcclusionQueryTests, ResolveToBufferWithOffset) { + constexpr uint32_t kQueryCount = 2; + + wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, kRTSize, kRTSize); + renderPass.renderPassInfo.occlusionQuerySet = querySet; + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.SetPipeline(pipeline); + pass.BeginOcclusionQuery(0); + pass.Draw(3); + pass.EndOcclusionQuery(); + pass.EndPass(); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + // Resolve the query result to first slot in the buffer, other slots should not be written. + { + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + // Set sentinel values to check the query is resolved to the correct slot of the buffer. + std::vector sentinelValues(kQueryCount, kSentinelValue); + queue.WriteBuffer(destination, 0, sentinelValues.data(), kQueryCount * sizeof(uint64_t)); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.ResolveQuerySet(querySet, 0, 1, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_BUFFER(destination, 0, sizeof(uint64_t), + new OcclusionExpectation(OcclusionExpectation::Result::NonZero)); + EXPECT_BUFFER_U64_RANGE_EQ(&kSentinelValue, destination, sizeof(uint64_t), 1); + } + + // Resolve the query result to second slot in the buffer, the first one should not be written. + { + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + // Set sentinel values to check the query is resolved to the correct slot of the buffer. + std::vector sentinelValues(kQueryCount, kSentinelValue); + queue.WriteBuffer(destination, 0, sentinelValues.data(), kQueryCount * sizeof(uint64_t)); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.ResolveQuerySet(querySet, 0, 1, destination, sizeof(uint64_t)); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_BUFFER_U64_RANGE_EQ(&kSentinelValue, destination, 0, 1); + EXPECT_BUFFER(destination, sizeof(uint64_t), sizeof(uint64_t), + new OcclusionExpectation(OcclusionExpectation::Result::NonZero)); + } +} + DAWN_INSTANTIATE_TEST(OcclusionQueryTests, D3D12Backend(), MetalBackend(), VulkanBackend()); class PipelineStatisticsQueryTests : public QueryTests { @@ -358,65 +548,149 @@ TEST_P(TimestampQueryTests, TimestampOnCommandEncoder) { constexpr uint32_t kQueryCount = 2; - wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); - wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + // Write timestamp with different query indexes + { + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); - wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - encoder.WriteTimestamp(querySet, 0); - encoder.WriteTimestamp(querySet, 1); - encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); - wgpu::CommandBuffer commands = encoder.Finish(); - queue.Submit(1, &commands); + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.WriteTimestamp(querySet, 0); + encoder.WriteTimestamp(querySet, 1); + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); - EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + } + + // Write timestamp with same query index outside pass on same encoder + { + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.WriteTimestamp(querySet, 0); + encoder.WriteTimestamp(querySet, 1); + encoder.WriteTimestamp(querySet, 0); + encoder.WriteTimestamp(querySet, 1); + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + } } // Test calling timestamp query from render pass encoder TEST_P(TimestampQueryTests, TimestampOnRenderPass) { constexpr uint32_t kQueryCount = 2; - wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); - wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + // Write timestamp with different query indexes + { + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); - wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); - wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); - pass.WriteTimestamp(querySet, 0); - pass.WriteTimestamp(querySet, 1); - pass.EndPass(); - encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); - wgpu::CommandBuffer commands = encoder.Finish(); - queue.Submit(1, &commands); + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.WriteTimestamp(querySet, 0); + pass.WriteTimestamp(querySet, 1); + pass.EndPass(); + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); - EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + } + + // Write timestamp with same query index, not need test rewrite inside render pass due to it's + // not allowed + { + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.WriteTimestamp(querySet, 0); + encoder.WriteTimestamp(querySet, 1); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.WriteTimestamp(querySet, 0); + pass.WriteTimestamp(querySet, 1); + pass.EndPass(); + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + } } // Test calling timestamp query from compute pass encoder TEST_P(TimestampQueryTests, TimestampOnComputePass) { constexpr uint32_t kQueryCount = 2; - wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); - wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + // Write timestamp with different query indexes + { + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); - wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); - pass.WriteTimestamp(querySet, 0); - pass.WriteTimestamp(querySet, 1); - pass.EndPass(); - encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); - wgpu::CommandBuffer commands = encoder.Finish(); - queue.Submit(1, &commands); + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); + pass.WriteTimestamp(querySet, 0); + pass.WriteTimestamp(querySet, 1); + pass.EndPass(); + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); - EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + } + + // Write timestamp with same query index on both the outside and the inside of the compute pass + { + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.WriteTimestamp(querySet, 0); + encoder.WriteTimestamp(querySet, 1); + + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); + pass.WriteTimestamp(querySet, 0); + pass.WriteTimestamp(querySet, 1); + pass.EndPass(); + + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + } + + // Write timestamp with same query index inside compute pass + { + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); + pass.WriteTimestamp(querySet, 0); + pass.WriteTimestamp(querySet, 1); + pass.WriteTimestamp(querySet, 0); + pass.WriteTimestamp(querySet, 1); + pass.EndPass(); + + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation); + } } // Test resolving timestamp query from another different encoder TEST_P(TimestampQueryTests, ResolveFromAnotherEncoder) { - // TODO(hao.x.li@intel.com): Fix queries reset on Vulkan backend, it does not allow to resolve - // unissued queries. Currently we reset the whole query set at the beginning of command buffer - // creation. - DAWN_SKIP_TEST_IF(IsVulkan()); - constexpr uint32_t kQueryCount = 2; wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); @@ -438,12 +712,7 @@ TEST_P(TimestampQueryTests, ResolveFromAnotherEncoder) { // Test resolving timestamp query correctly if the queries are written sparsely TEST_P(TimestampQueryTests, ResolveSparseQueries) { - // TODO(hao.x.li@intel.com): Fix queries reset and sparsely resolving on Vulkan backend, - // otherwise its validation layer reports unissued queries resolving error - DAWN_SKIP_TEST_IF(IsVulkan() && IsBackendValidationEnabled()); - constexpr uint32_t kQueryCount = 4; - constexpr uint64_t kZero = 0; wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); @@ -469,10 +738,6 @@ TEST_P(TimestampQueryTests, ResolveSparseQueries) { // Test resolving timestamp query to 0 if all queries are not written TEST_P(TimestampQueryTests, ResolveWithoutWritten) { - // TODO(hao.x.li@intel.com): Fix queries reset and sparsely resolving on Vulkan backend, - // otherwise its validation layer reports unissued queries resolving error - DAWN_SKIP_TEST_IF(IsVulkan() && IsBackendValidationEnabled()); - constexpr uint32_t kQueryCount = 2; wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount);