diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp index 3816f20a57..ae64158bf1 100644 --- a/src/dawn/native/Toggles.cpp +++ b/src/dawn/native/Toggles.cpp @@ -306,6 +306,12 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{ "integer that is greater than 2^24 or smaller than -2^24). This toggle is also enabled on " "Intel GPUs on Metal backend due to a driver issue on Intel Metal driver.", "https://crbug.com/dawn/537"}}, + {Toggle::MetalUseDummyBlitEncoderForWriteTimestamp, + {"metal_use_dummy_blit_encoder_for_write_timestamp", + "Add dummy blit command to blit encoder when encoding writeTimestamp as workaround on Metal." + "This toggle is enabled by default on Metal backend where GPU counters cannot be stored to" + "sampleBufferAttachments on empty blit encoder.", + "https://crbug.com/dawn/1473"}}, // Comment to separate the }} so it is clearer what to copy-paste to add a toggle. }}; } // anonymous namespace diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h index 981aa94e0c..942f76a167 100644 --- a/src/dawn/native/Toggles.h +++ b/src/dawn/native/Toggles.h @@ -79,6 +79,7 @@ enum class Toggle { D3D12AllocateExtraMemoryFor2DArrayTexture, D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset, ApplyClearBigIntegerColorValueWithDraw, + MetalUseDummyBlitEncoderForWriteTimestamp, EnumCount, InvalidEnum = EnumCount, diff --git a/src/dawn/native/metal/BackendMTL.mm b/src/dawn/native/metal/BackendMTL.mm index 9415e52828..9d5648033c 100644 --- a/src/dawn/native/metal/BackendMTL.mm +++ b/src/dawn/native/metal/BackendMTL.mm @@ -24,6 +24,7 @@ #include "dawn/native/MetalBackend.h" #include "dawn/native/metal/BufferMTL.h" #include "dawn/native/metal/DeviceMTL.h" +#include "dawn/native/metal/UtilsMetal.h" #if DAWN_PLATFORM_IS(MACOS) #import @@ -170,18 +171,6 @@ MaybeError GetDevicePCIInfo(id device, PCIIDs* ids) { #error "Unsupported Apple platform." #endif -DAWN_NOINLINE bool IsCounterSamplingBoundarySupport(id device) - API_AVAILABLE(macos(11.0), ios(14.0)) { - bool isBlitBoundarySupported = - [device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]; - bool isDispatchBoundarySupported = - [device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]; - bool isDrawBoundarySupported = - [device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]; - - return isBlitBoundarySupported && isDispatchBoundarySupported && isDrawBoundarySupported; -} - // This method has seen hard-to-debug crashes. See crbug.com/dawn/1102. // For now, it is written defensively, with many potentially unnecessary guards until // we narrow down the cause of the problem. @@ -246,11 +235,13 @@ DAWN_NOINLINE bool IsGPUCounterSupported(id device, } if (@available(macOS 11.0, iOS 14.0, *)) { - // Check whether it can read GPU counters at the specified command boundary. Apple - // family GPUs do not support sampling between different Metal commands, because - // they defer fragment processing until after the GPU processes all the primitives - // in the render pass. - if (!IsCounterSamplingBoundarySupport(device)) { + // Check whether it can read GPU counters at the specified command boundary or stage + // boundary. Apple family GPUs do not support sampling between different Metal commands, + // because they defer fragment processing until after the GPU processes all the primitives + // in the render pass. GPU counters are only available if sampling at least one of the + // command or stage boundaries is supported. + if (!SupportCounterSamplingAtCommandBoundary(device) && + !SupportCounterSamplingAtStageBoundary(device)) { return false; } } diff --git a/src/dawn/native/metal/CommandBufferMTL.h b/src/dawn/native/metal/CommandBufferMTL.h index 0f95ef4243..2976ec81dd 100644 --- a/src/dawn/native/metal/CommandBufferMTL.h +++ b/src/dawn/native/metal/CommandBufferMTL.h @@ -16,12 +16,15 @@ #define SRC_DAWN_NATIVE_METAL_COMMANDBUFFERMTL_H_ #include "dawn/native/CommandBuffer.h" +#include "dawn/native/Commands.h" #include "dawn/native/Error.h" #import namespace dawn::native { class CommandEncoder; +struct BeginComputePassCmd; +struct BeginRenderPassCmd; } namespace dawn::native::metal { @@ -55,8 +58,10 @@ class CommandBuffer final : public CommandBufferBase { private: using CommandBufferBase::CommandBufferBase; - MaybeError EncodeComputePass(CommandRecordingContext* commandContext); - MaybeError EncodeRenderPass(id encoder); + MaybeError EncodeComputePass(CommandRecordingContext* commandContext, + BeginComputePassCmd* computePassCmd); + MaybeError EncodeRenderPass(id encoder, + BeginRenderPassCmd* renderPassCmd); }; } // namespace dawn::native::metal diff --git a/src/dawn/native/metal/CommandBufferMTL.mm b/src/dawn/native/metal/CommandBufferMTL.mm index 66a4569277..a5e5fbe52e 100644 --- a/src/dawn/native/metal/CommandBufferMTL.mm +++ b/src/dawn/native/metal/CommandBufferMTL.mm @@ -49,7 +49,129 @@ MTLIndexType MTLIndexFormat(wgpu::IndexFormat format) { } } -NSRef CreateMTLRenderPassDescriptor(BeginRenderPassCmd* renderPass) { +template +class SampleBufferAttachment { + public: + void SetSampleBuffer(PassDescriptor* descriptor, id sampleBuffer) + API_AVAILABLE(macos(11.0), ios(14.0)); + void SetStartSampleIndex(PassDescriptor* descriptor, NSUInteger sampleIndex) + API_AVAILABLE(macos(11.0), ios(14.0)); + void SetEndSampleIndex(PassDescriptor* descriptor, NSUInteger sampleIndex) + API_AVAILABLE(macos(11.0), ios(14.0)); + + private: + // Initialized to the maximum value, in order to start from 0 after the first increment. + NSUInteger attachmentIndex = NSUIntegerMax; + // TODO(dawn:1473): The maximum of sampleBufferAttachments depends on the length of MTLDevice's + // counterSets, but Metal does not match the allowed maximum of sampleBufferAttachments with the + // length of counterSets on AGX family. Hardcode as a constant and check this whenever Metal + // could get the matched value. + static constexpr NSUInteger kMaxSampleBufferAttachments = 4; +}; + +template +void SampleBufferAttachment::SetSampleBuffer( + PassDescriptor* descriptor, + id sampleBuffer) API_AVAILABLE(macos(11.0), ios(14.0)) { + attachmentIndex++; + ASSERT(attachmentIndex < kMaxSampleBufferAttachments); + descriptor.sampleBufferAttachments[attachmentIndex].sampleBuffer = sampleBuffer; +} + +// Must be called after SetSampleBuffer +template <> +void SampleBufferAttachment::SetStartSampleIndex( + MTLRenderPassDescriptor* descriptor, + NSUInteger sampleIndex) API_AVAILABLE(macos(11.0), ios(14.0)) { + ASSERT(attachmentIndex < kMaxSampleBufferAttachments); + descriptor.sampleBufferAttachments[attachmentIndex].startOfVertexSampleIndex = sampleIndex; +} + +// Must be called after SetSampleBuffer +template <> +void SampleBufferAttachment::SetEndSampleIndex( + MTLRenderPassDescriptor* descriptor, + NSUInteger sampleIndex) API_AVAILABLE(macos(11.0), ios(14.0)) { + ASSERT(attachmentIndex < kMaxSampleBufferAttachments); + descriptor.sampleBufferAttachments[attachmentIndex].endOfFragmentSampleIndex = sampleIndex; +} + +// Must be called after SetSampleBuffer +template <> +void SampleBufferAttachment::SetStartSampleIndex( + MTLComputePassDescriptor* descriptor, + NSUInteger sampleIndex) API_AVAILABLE(macos(11.0), ios(14.0)) { + ASSERT(attachmentIndex < kMaxSampleBufferAttachments); + descriptor.sampleBufferAttachments[attachmentIndex].startOfEncoderSampleIndex = sampleIndex; +} + +// Must be called after SetSampleBuffer +template <> +void SampleBufferAttachment::SetEndSampleIndex( + MTLComputePassDescriptor* descriptor, + NSUInteger sampleIndex) API_AVAILABLE(macos(11.0), ios(14.0)) { + // TODO(dawn:1473): Use MTLComputePassSampleBuffers or query method instead of the magic number + // 4 when Metal could get the maximum of sampleBufferAttachments on compute pass + ASSERT(attachmentIndex < kMaxSampleBufferAttachments); + descriptor.sampleBufferAttachments[attachmentIndex].endOfEncoderSampleIndex = sampleIndex; +} + +template +void SetSampleBufferAttachments(PassDescriptor* descriptor, BeginPass* cmd) { + // Use @available instead of API_AVAILABLE because GetCounterSampleBuffer() also needs checking + // API availability. + if (@available(macOS 11.0, iOS 14.0, *)) { + QuerySetBase* beginQuerySet = cmd->beginTimestamp.querySet.Get(); + QuerySetBase* endQuerySet = cmd->endTimestamp.querySet.Get(); + + SampleBufferAttachment sampleBufferAttachment; + + if (beginQuerySet != nullptr) { + sampleBufferAttachment.SetSampleBuffer( + descriptor, ToBackend(beginQuerySet)->GetCounterSampleBuffer()); + sampleBufferAttachment.SetStartSampleIndex(descriptor, + NSUInteger(cmd->beginTimestamp.queryIndex)); + + if (beginQuerySet == endQuerySet) { + sampleBufferAttachment.SetEndSampleIndex(descriptor, + NSUInteger(cmd->endTimestamp.queryIndex)); + } else { + sampleBufferAttachment.SetEndSampleIndex(descriptor, MTLCounterDontSample); + } + } + + // Set to other sampleBufferAttachment if the endQuerySet is different with beginQuerySet. + if (endQuerySet != nullptr && beginQuerySet != endQuerySet) { + sampleBufferAttachment.SetSampleBuffer( + descriptor, ToBackend(endQuerySet)->GetCounterSampleBuffer()); + sampleBufferAttachment.SetStartSampleIndex(descriptor, MTLCounterDontSample); + sampleBufferAttachment.SetEndSampleIndex(descriptor, + NSUInteger(cmd->endTimestamp.queryIndex)); + } + } else { + UNREACHABLE(); + } +} + +NSRef CreateMTLComputePassDescriptor(BeginComputePassCmd* computePass) + API_AVAILABLE(macos(11.0), ios(14.0)) { + // Note that this creates a descriptor that's autoreleased so we don't use AcquireNSRef + NSRef descriptorRef = + [MTLComputePassDescriptor computePassDescriptor]; + MTLComputePassDescriptor* descriptor = descriptorRef.Get(); + // MTLDispatchTypeSerial is the same dispatch type as the deafult MTLComputeCommandEncoder. + // MTLDispatchTypeConcurrent requires memory barriers to ensure multiple commands synchronize + // access to the same resources, which we may support it later. + descriptor.dispatchType = MTLDispatchTypeSerial; + + SetSampleBufferAttachments(descriptor, computePass); + + return descriptorRef; +} + +NSRef CreateMTLRenderPassDescriptor( + BeginRenderPassCmd* renderPass, + bool useCounterSamplingAtStageBoundary) { // Note that this creates a descriptor that's autoreleased so we don't use AcquireNSRef NSRef descriptorRef = [MTLRenderPassDescriptor renderPassDescriptor]; MTLRenderPassDescriptor* descriptor = descriptorRef.Get(); @@ -197,9 +319,36 @@ NSRef CreateMTLRenderPassDescriptor(BeginRenderPassCmd* ToBackend(renderPass->occlusionQuerySet.Get())->GetVisibilityBuffer(); } + if (@available(macOS 11.0, iOS 14.0, *)) { + if (useCounterSamplingAtStageBoundary) { + SetSampleBufferAttachments(descriptor, renderPass); + } + } + return descriptorRef; } +void EncodeEmptyBlitEncoderForWriteTimestamp(Device* device, + CommandRecordingContext* commandContext, + WriteTimestampCmd* cmd) + API_AVAILABLE(macos(11.0), ios(14.0)) { + commandContext->EndBlit(); + + MTLBlitPassDescriptor* descriptor = [[MTLBlitPassDescriptor alloc] init]; + if (cmd->querySet.Get() != nullptr) { + descriptor.sampleBufferAttachments[0].sampleBuffer = + ToBackend(cmd->querySet.Get())->GetCounterSampleBuffer(); + descriptor.sampleBufferAttachments[0].startOfEncoderSampleIndex = MTLCounterDontSample; + descriptor.sampleBufferAttachments[0].endOfEncoderSampleIndex = NSUInteger(cmd->queryIndex); + + id blit = commandContext->BeginBlit(descriptor); + if (device->IsToggleEnabled(Toggle::MetalUseDummyBlitEncoderForWriteTimestamp)) { + [blit fillBuffer:device->GetDummyBlitMtlBuffer() range:NSMakeRange(0, 1) value:0]; + } + commandContext->EndBlit(); + } +} + // Metal uses a physical addressing mode which means buffers in the shading language are // just pointers to the virtual address of their start. This means there is no way to know // the length of a buffer to compute the length() of unsized arrays at the end of storage @@ -612,18 +761,13 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) case Command::BeginComputePass: { BeginComputePassCmd* cmd = mCommands.NextCommand(); - if (cmd->beginTimestamp.querySet.Get() != nullptr || - cmd->endTimestamp.querySet.Get() != nullptr) { - return DAWN_UNIMPLEMENTED_ERROR("timestampWrites unimplemented."); - } - for (const SyncScopeResourceUsage& scope : GetResourceUsages().computePasses[nextComputePassNumber].dispatchUsages) { LazyClearSyncScope(scope, commandContext); } commandContext->EndBlit(); - DAWN_TRY(EncodeComputePass(commandContext)); + DAWN_TRY(EncodeComputePass(commandContext, cmd)); nextComputePassNumber++; break; @@ -632,22 +776,19 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) case Command::BeginRenderPass: { BeginRenderPassCmd* cmd = mCommands.NextCommand(); - if (cmd->beginTimestamp.querySet.Get() != nullptr || - cmd->endTimestamp.querySet.Get() != nullptr) { - return DAWN_UNIMPLEMENTED_ERROR("timestampWrites unimplemented."); - } - LazyClearSyncScope(GetResourceUsages().renderPasses[nextRenderPassNumber], commandContext); commandContext->EndBlit(); LazyClearRenderPassAttachments(cmd); - NSRef descriptor = CreateMTLRenderPassDescriptor(cmd); + NSRef descriptor = CreateMTLRenderPassDescriptor( + cmd, ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary()); DAWN_TRY(EncodeMetalRenderPass( ToBackend(GetDevice()), commandContext, descriptor.Get(), cmd->width, - cmd->height, [this](id encoder) -> MaybeError { - return this->EncodeRenderPass(encoder); - })); + cmd->height, + [this](id encoder, BeginRenderPassCmd* cmd) + -> MaybeError { return this->EncodeRenderPass(encoder, cmd); }, + cmd)); nextRenderPassNumber++; break; @@ -905,16 +1046,29 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) case Command::WriteTimestamp: { WriteTimestampCmd* cmd = mCommands.NextCommand(); - QuerySet* querySet = ToBackend(cmd->querySet.Get()); - if (@available(macos 10.15, iOS 14.0, *)) { - [commandContext->EnsureBlit() - sampleCountersInBuffer:querySet->GetCounterSampleBuffer() - atSampleIndex:NSUInteger(cmd->queryIndex) - withBarrier:YES]; + if (ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary()) { + if (@available(macos 11.0, iOS 14.0, *)) { + // Simulate writeTimestamp cmd between blit commands on the devices which + // supports counter sampling at stage boundary. + EncodeEmptyBlitEncoderForWriteTimestamp(ToBackend(GetDevice()), + commandContext, cmd); + } else { + UNREACHABLE(); + } } else { - UNREACHABLE(); + if (@available(macos 10.15, iOS 14.0, *)) { + ASSERT(ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary()); + [commandContext->EnsureBlit() + sampleCountersInBuffer:ToBackend(cmd->querySet.Get()) + ->GetCounterSampleBuffer() + atSampleIndex:NSUInteger(cmd->queryIndex) + withBarrier:YES]; + } else { + UNREACHABLE(); + } } + break; } @@ -985,18 +1139,64 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) return {}; } -MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandContext) { +MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandContext, + BeginComputePassCmd* computePassCmd) { ComputePipeline* lastPipeline = nullptr; StorageBufferLengthTracker storageBufferLengths = {}; BindGroupTracker bindGroups(&storageBufferLengths); - id encoder = commandContext->BeginCompute(); + id encoder; + // When counter sampling is supported at stage boundary, begin a configurable compute pass + // encoder which is supported since macOS 11.0+ and iOS 14.0+ and set timestamp writes to + // compute pass descriptor, otherwise begin a default compute pass encoder, and simulate + // timestamp writes using sampleCountersInBuffer API at the beginning and end of compute pass. + if (ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary()) { + if (@available(macOS 11.0, iOS 14.0, *)) { + NSRef descriptor = + CreateMTLComputePassDescriptor(computePassCmd); + encoder = commandContext->BeginCompute(descriptor.Get()); + } else { + UNREACHABLE(); + } + } else { + encoder = commandContext->BeginCompute(); + + if (@available(macos 10.15, iOS 14.0, *)) { + if (computePassCmd->beginTimestamp.querySet.Get() != nullptr) { + ASSERT(ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary()); + + [encoder + sampleCountersInBuffer:ToBackend(computePassCmd->beginTimestamp.querySet.Get()) + ->GetCounterSampleBuffer() + atSampleIndex:NSUInteger(computePassCmd->beginTimestamp.queryIndex) + withBarrier:YES]; + } + } + } Command type; while (mCommands.NextCommandId(&type)) { switch (type) { case Command::EndComputePass: { mCommands.NextCommand(); + + if (@available(macos 10.15, iOS 14.0, *)) { + // Simulate timestamp write at the end of render pass if it does not support + // counter sampling at stage boundary. + if (ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary() && + computePassCmd->endTimestamp.querySet.Get() != nullptr) { + ASSERT(!ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary()); + + [encoder + sampleCountersInBuffer:ToBackend( + computePassCmd->endTimestamp.querySet.Get()) + ->GetCounterSampleBuffer() + atSampleIndex:NSUInteger( + computePassCmd->endTimestamp.queryIndex) + withBarrier:YES]; + } + } + commandContext->EndCompute(); return {}; } @@ -1104,7 +1304,8 @@ MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandCont UNREACHABLE(); } -MaybeError CommandBuffer::EncodeRenderPass(id encoder) { +MaybeError CommandBuffer::EncodeRenderPass(id encoder, + BeginRenderPassCmd* renderPassCmd) { bool enableVertexPulling = GetDevice()->IsToggleEnabled(Toggle::MetalEnableVertexPulling); RenderPipeline* lastPipeline = nullptr; id indexBuffer = nullptr; @@ -1116,6 +1317,20 @@ MaybeError CommandBuffer::EncodeRenderPass(id encoder) VertexBufferTracker vertexBuffers(&storageBufferLengths); BindGroupTracker bindGroups(&storageBufferLengths); + if (@available(macos 10.15, iOS 14.0, *)) { + // Simulate timestamp write at the beginning of render pass by + // sampleCountersInBuffer if it does not support counter sampling at stage boundary. + if (ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary() && + renderPassCmd->beginTimestamp.querySet.Get() != nullptr) { + ASSERT(!ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary()); + + [encoder sampleCountersInBuffer:ToBackend(renderPassCmd->beginTimestamp.querySet.Get()) + ->GetCounterSampleBuffer() + atSampleIndex:NSUInteger(renderPassCmd->beginTimestamp.queryIndex) + withBarrier:YES]; + } + } + auto EncodeRenderBundleCommand = [&](CommandIterator* iter, Command type) { switch (type) { case Command::Draw: { @@ -1304,6 +1519,24 @@ MaybeError CommandBuffer::EncodeRenderPass(id encoder) switch (type) { case Command::EndRenderPass: { mCommands.NextCommand(); + + if (@available(macos 10.15, iOS 14.0, *)) { + // Simulate timestamp write at the end of render pass if it does not support + // counter sampling at stage boundary. + if (ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary() && + renderPassCmd->endTimestamp.querySet.Get() != nullptr) { + ASSERT(!ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary()); + + [encoder + sampleCountersInBuffer:ToBackend( + renderPassCmd->endTimestamp.querySet.Get()) + ->GetCounterSampleBuffer() + atSampleIndex:NSUInteger( + renderPassCmd->endTimestamp.queryIndex) + withBarrier:YES]; + } + } + return {}; } diff --git a/src/dawn/native/metal/CommandRecordingContext.h b/src/dawn/native/metal/CommandRecordingContext.h index b5ec3defaf..025c88bbe0 100644 --- a/src/dawn/native/metal/CommandRecordingContext.h +++ b/src/dawn/native/metal/CommandRecordingContext.h @@ -36,10 +36,18 @@ class CommandRecordingContext : NonMovable { MaybeError PrepareNextCommandBuffer(id queue); NSPRef> AcquireCommands(); + // Create blit pass encoder from blit pass descriptor + id BeginBlit(MTLBlitPassDescriptor* descriptor) + API_AVAILABLE(macos(11.0), ios(14.0)); id EnsureBlit(); void EndBlit(); + // Create a sequential compute pass by default. id BeginCompute(); + // Create configurable compute pass from a descriptor with serial dispatch type which commands + // are executed sequentially. + id BeginCompute(MTLComputePassDescriptor* descriptor) + API_AVAILABLE(macos(11.0), ios(14.0)); void EndCompute(); id BeginRender(MTLRenderPassDescriptor* descriptor); diff --git a/src/dawn/native/metal/CommandRecordingContext.mm b/src/dawn/native/metal/CommandRecordingContext.mm index d4bbef068d..294e53f8ea 100644 --- a/src/dawn/native/metal/CommandRecordingContext.mm +++ b/src/dawn/native/metal/CommandRecordingContext.mm @@ -62,6 +62,20 @@ NSPRef> CommandRecordingContext::AcquireCommands() { return std::move(mCommands); } +id CommandRecordingContext::BeginBlit(MTLBlitPassDescriptor* descriptor) + API_AVAILABLE(macos(11.0), ios(14.0)) { + ASSERT(descriptor); + ASSERT(mCommands != nullptr); + ASSERT(mBlit == nullptr); + ASSERT(!mInEncoder); + + mInEncoder = true; + // The encoder is created autoreleased. Retain it to avoid the autoreleasepool from + // draining from under us. + mBlit.Acquire([[*mCommands blitCommandEncoderWithDescriptor:descriptor] retain]); + return mBlit.Get(); +} + id CommandRecordingContext::EnsureBlit() { ASSERT(mCommands != nullptr); @@ -98,6 +112,20 @@ id CommandRecordingContext::BeginCompute() { return mCompute.Get(); } +id CommandRecordingContext::BeginCompute( + MTLComputePassDescriptor* descriptor) API_AVAILABLE(macos(11.0), ios(14.0)) { + ASSERT(descriptor); + ASSERT(mCommands != nullptr); + ASSERT(mCompute == nullptr); + ASSERT(!mInEncoder); + + mInEncoder = true; + // The encoder is created autoreleased. Retain it to avoid the autoreleasepool from + // draining from under us. + mCompute.Acquire([[*mCommands computeCommandEncoderWithDescriptor:descriptor] retain]); + return mCompute.Get(); +} + void CommandRecordingContext::EndCompute() { ASSERT(mCommands != nullptr); ASSERT(mCompute != nullptr); diff --git a/src/dawn/native/metal/DeviceMTL.h b/src/dawn/native/metal/DeviceMTL.h index 62d4f909ba..dca0033d16 100644 --- a/src/dawn/native/metal/DeviceMTL.h +++ b/src/dawn/native/metal/DeviceMTL.h @@ -72,6 +72,13 @@ class Device final : public DeviceBase { float GetTimestampPeriodInNS() const override; + bool UseCounterSamplingAtCommandBoundary() const; + bool UseCounterSamplingAtStageBoundary() const; + + // Get a MTLBuffer that can be used as a dummy in a no-op blit encoder based on filling this + // single-byte buffer + id GetDummyBlitMtlBuffer(); + private: Device(AdapterBase* adapter, NSPRef> mtlDevice, @@ -144,6 +151,13 @@ class Device final : public DeviceBase { MTLTimestamp mGpuTimestamp API_AVAILABLE(macos(10.15), ios(14.0)) = 0; // The parameters for kalman filter std::unique_ptr mKalmanInfo; + + // Support counter sampling between blit commands, dispatches and draw calls + bool mCounterSamplingAtCommandBoundary; + // Support counter sampling at the begin and end of blit pass, compute pass and render pass's + // vertex/fragement stage + bool mCounterSamplingAtStageBoundary; + NSPRef> mDummyBlitMtlBuffer; }; } // namespace dawn::native::metal diff --git a/src/dawn/native/metal/DeviceMTL.mm b/src/dawn/native/metal/DeviceMTL.mm index a3fdac1e51..e5e592b6c2 100644 --- a/src/dawn/native/metal/DeviceMTL.mm +++ b/src/dawn/native/metal/DeviceMTL.mm @@ -121,7 +121,18 @@ Device::Device(AdapterBase* adapter, const TripleStateTogglesSet& userProvidedToggles) : DeviceBase(adapter, descriptor, userProvidedToggles), mMtlDevice(std::move(mtlDevice)), - mCompletedSerial(0) {} + mCompletedSerial(0) { + // On macOS < 11.0, we only can check whether counter sampling is supported, and the counter + // only can be sampled between command boundary using sampleCountersInBuffer API if it's + // supported. + if (@available(macOS 11.0, iOS 14.0, *)) { + mCounterSamplingAtCommandBoundary = SupportCounterSamplingAtCommandBoundary(GetMTLDevice()); + mCounterSamplingAtStageBoundary = SupportCounterSamplingAtStageBoundary(GetMTLDevice()); + } else { + mCounterSamplingAtCommandBoundary = true; + mCounterSamplingAtStageBoundary = false; + } +} Device::~Device() { Destroy(); @@ -234,6 +245,14 @@ void Device::InitTogglesFromDriver() { if (gpu_info::IsIntel(vendorId)) { SetToggle(Toggle::ApplyClearBigIntegerColorValueWithDraw, true); } + + // TODO(dawn:1473): Metal fails to store GPU counters to sampleBufferAttachments on empty + // encoders on macOS 11.0+, we need to add dummy blit command to blit encoder when encoding + // writeTimestamp as workaround by enabling the toggle + // "metal_use_dummy_blit_encoder_for_write_timestamp". + if (@available(macos 11.0, iOS 14.0, *)) { + SetToggle(Toggle::MetalUseDummyBlitEncoderForWriteTimestamp, true); + } } ResultOrError> Device::CreateBindGroupImpl( @@ -498,6 +517,7 @@ void Device::DestroyImpl() { mCommandQueue = nullptr; mMtlDevice = nullptr; + mDummyBlitMtlBuffer = nullptr; } uint32_t Device::GetOptimalBytesPerRowAlignment() const { @@ -512,4 +532,21 @@ float Device::GetTimestampPeriodInNS() const { return mTimestampPeriod; } +bool Device::UseCounterSamplingAtCommandBoundary() const { + return mCounterSamplingAtCommandBoundary; +} + +bool Device::UseCounterSamplingAtStageBoundary() const { + return mCounterSamplingAtStageBoundary; +} + +id Device::GetDummyBlitMtlBuffer() { + if (mDummyBlitMtlBuffer == nullptr) { + mDummyBlitMtlBuffer.Acquire( + [GetMTLDevice() newBufferWithLength:1 options:MTLResourceStorageModePrivate]); + } + + return mDummyBlitMtlBuffer.Get(); +} + } // namespace dawn::native::metal diff --git a/src/dawn/native/metal/UtilsMetal.h b/src/dawn/native/metal/UtilsMetal.h index 418c4a6864..d491ecbd62 100644 --- a/src/dawn/native/metal/UtilsMetal.h +++ b/src/dawn/native/metal/UtilsMetal.h @@ -23,6 +23,7 @@ #import namespace dawn::native { +struct BeginRenderPassCmd; struct ProgrammableStage; struct EntryPointMetadata; enum class SingleShaderStage; @@ -81,19 +82,26 @@ constexpr MTLStoreAction kMTLStoreActionStoreAndMultisampleResolve = // happen at the render pass start and end. Because workarounds wrap the encoding of the render // pass, the encoding must be entirely done by the `encodeInside` callback. // At the end of this function, `commandContext` will have no encoder open. -using EncodeInsideRenderPass = std::function)>; +using EncodeInsideRenderPass = + std::function, BeginRenderPassCmd* renderPassCmd)>; MaybeError EncodeMetalRenderPass(Device* device, CommandRecordingContext* commandContext, MTLRenderPassDescriptor* mtlRenderPass, uint32_t width, uint32_t height, - EncodeInsideRenderPass encodeInside); + EncodeInsideRenderPass encodeInside, + BeginRenderPassCmd* renderPassCmd = nullptr); MaybeError EncodeEmptyMetalRenderPass(Device* device, CommandRecordingContext* commandContext, MTLRenderPassDescriptor* mtlRenderPass, Extent3D size); +bool SupportCounterSamplingAtCommandBoundary(id device) + API_AVAILABLE(macos(11.0), ios(14.0)); +bool SupportCounterSamplingAtStageBoundary(id device) + API_AVAILABLE(macos(11.0), ios(14.0)); + } // namespace dawn::native::metal #endif // SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_ diff --git a/src/dawn/native/metal/UtilsMetal.mm b/src/dawn/native/metal/UtilsMetal.mm index 4eb5fb6e81..9a091b88fa 100644 --- a/src/dawn/native/metal/UtilsMetal.mm +++ b/src/dawn/native/metal/UtilsMetal.mm @@ -328,7 +328,8 @@ MaybeError EncodeMetalRenderPass(Device* device, MTLRenderPassDescriptor* mtlRenderPass, uint32_t width, uint32_t height, - EncodeInsideRenderPass encodeInside) { + EncodeInsideRenderPass encodeInside, + BeginRenderPassCmd* renderPassCmd) { // This function handles multiple workarounds. Because some cases requires multiple // workarounds to happen at the same time, it handles workarounds one by one and calls // itself recursively to handle the next workaround if needed. @@ -359,7 +360,7 @@ MaybeError EncodeMetalRenderPass(Device* device, // resolve back to the true resolve targets. if (workaroundUsed) { DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height, - std::move(encodeInside))); + std::move(encodeInside), renderPassCmd)); for (uint32_t i = 0; i < kMaxColorAttachments; ++i) { if (trueResolveAttachments[i].texture == nullptr) { @@ -403,7 +404,7 @@ MaybeError EncodeMetalRenderPass(Device* device, if (workaroundUsed) { DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height, - std::move(encodeInside))); + std::move(encodeInside), renderPassCmd)); for (uint32_t i = 0; i < kMaxColorAttachments; ++i) { if (originalAttachments[i].texture == nullptr) { @@ -439,7 +440,7 @@ MaybeError EncodeMetalRenderPass(Device* device, // If we found a store + MSAA resolve we need to resolve in a different render pass. if (hasStoreAndMSAAResolve) { DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height, - std::move(encodeInside))); + std::move(encodeInside), renderPassCmd)); ResolveInAnotherRenderPass(commandContext, mtlRenderPass, resolveTextures); return {}; @@ -448,7 +449,7 @@ MaybeError EncodeMetalRenderPass(Device* device, // No (more) workarounds needed! We can finally encode the actual render pass. commandContext->EndBlit(); - DAWN_TRY(encodeInside(commandContext->BeginRender(mtlRenderPass))); + DAWN_TRY(encodeInside(commandContext->BeginRender(mtlRenderPass), renderPassCmd)); commandContext->EndRender(); return {}; } @@ -457,8 +458,26 @@ MaybeError EncodeEmptyMetalRenderPass(Device* device, CommandRecordingContext* commandContext, MTLRenderPassDescriptor* mtlRenderPass, Extent3D size) { - return EncodeMetalRenderPass(device, commandContext, mtlRenderPass, size.width, size.height, - [&](id) -> MaybeError { return {}; }); + return EncodeMetalRenderPass( + device, commandContext, mtlRenderPass, size.width, size.height, + [&](id, BeginRenderPassCmd*) -> MaybeError { return {}; }); +} + +DAWN_NOINLINE bool SupportCounterSamplingAtCommandBoundary(id device) + API_AVAILABLE(macos(11.0), ios(14.0)) { + bool isBlitBoundarySupported = + [device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]; + bool isDispatchBoundarySupported = + [device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]; + bool isDrawBoundarySupported = + [device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]; + + return isBlitBoundarySupported && isDispatchBoundarySupported && isDrawBoundarySupported; +} + +DAWN_NOINLINE bool SupportCounterSamplingAtStageBoundary(id device) + API_AVAILABLE(macos(11.0), ios(14.0)) { + return [device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary]; } } // namespace dawn::native::metal diff --git a/src/dawn/tests/DawnTest.cpp b/src/dawn/tests/DawnTest.cpp index f1c3fb49ca..ff72d3482c 100644 --- a/src/dawn/tests/DawnTest.cpp +++ b/src/dawn/tests/DawnTest.cpp @@ -667,6 +667,10 @@ bool DawnTestBase::IsAMD() const { return gpu_info::IsAMD(mParam.adapterProperties.vendorID); } +bool DawnTestBase::IsApple() const { + return gpu_info::IsApple(mParam.adapterProperties.vendorID); +} + bool DawnTestBase::IsARM() const { return gpu_info::IsARM(mParam.adapterProperties.vendorID); } diff --git a/src/dawn/tests/DawnTest.h b/src/dawn/tests/DawnTest.h index 7622dd23d5..0a95873392 100644 --- a/src/dawn/tests/DawnTest.h +++ b/src/dawn/tests/DawnTest.h @@ -225,6 +225,7 @@ class DawnTestBase { bool IsVulkan() const; bool IsAMD() const; + bool IsApple() const; bool IsARM() const; bool IsImgTec() const; bool IsIntel() const; diff --git a/src/dawn/tests/end2end/QueryTests.cpp b/src/dawn/tests/end2end/QueryTests.cpp index ff195376c6..5c641ad8f4 100644 --- a/src/dawn/tests/end2end/QueryTests.cpp +++ b/src/dawn/tests/end2end/QueryTests.cpp @@ -18,6 +18,15 @@ #include "dawn/utils/ComboRenderPipelineDescriptor.h" #include "dawn/utils/WGPUHelpers.h" +// Clear the content of the result buffer into 0xFFFFFFFF. +constexpr static uint64_t kSentinelValue = ~uint64_t(0u); +constexpr static uint64_t kZero = 0u; +constexpr static unsigned int kRTSize = 4; +constexpr uint64_t kMinDestinationOffset = 256; +constexpr uint64_t kMinCount = kMinDestinationOffset / sizeof(uint64_t); +constexpr wgpu::TextureFormat kColorFormat = wgpu::TextureFormat::RGBA8Unorm; +constexpr wgpu::TextureFormat kDepthStencilFormat = wgpu::TextureFormat::Depth24PlusStencil8; + class QueryTests : public DawnTest { protected: wgpu::Buffer CreateResolveBuffer(uint64_t size) { @@ -27,13 +36,15 @@ class QueryTests : public DawnTest { wgpu::BufferUsage::CopyDst; return device.CreateBuffer(&descriptor); } -}; -// Clear the content of the result buffer into 0xFFFFFFFF. -constexpr static uint64_t kSentinelValue = ~uint64_t(0u); -constexpr static uint64_t kZero = 0u; -constexpr uint64_t kMinDestinationOffset = 256; -constexpr uint64_t kMinCount = kMinDestinationOffset / sizeof(uint64_t); + wgpu::Texture CreateRenderTexture(wgpu::TextureFormat format) { + wgpu::TextureDescriptor descriptor; + descriptor.size = {kRTSize, kRTSize, 1}; + descriptor.format = format; + descriptor.usage = wgpu::TextureUsage::RenderAttachment; + return device.CreateTexture(&descriptor); + } +}; class OcclusionExpectation : public detail::Expectation { public: @@ -112,14 +123,6 @@ class OcclusionQueryTests : public QueryTests { return device.CreateQuerySet(&descriptor); } - wgpu::Texture CreateRenderTexture(wgpu::TextureFormat format) { - wgpu::TextureDescriptor descriptor; - descriptor.size = {kRTSize, kRTSize, 1}; - descriptor.format = format; - descriptor.usage = wgpu::TextureUsage::RenderAttachment; - return device.CreateTexture(&descriptor); - } - void TestOcclusionQueryWithDepthStencilTest(bool depthTestEnabled, bool stencilTestEnabled, OcclusionExpectation::Result expected) { @@ -130,8 +133,7 @@ class OcclusionQueryTests : public QueryTests { descriptor.cFragment.module = fsModule; // Enable depth and stencil tests and set comparison tests never pass. - wgpu::DepthStencilState* depthStencil = - descriptor.EnableDepthStencil(wgpu::TextureFormat::Depth24PlusStencil8); + wgpu::DepthStencilState* depthStencil = descriptor.EnableDepthStencil(kDepthStencilFormat); depthStencil->depthCompare = depthTestEnabled ? wgpu::CompareFunction::Never : wgpu::CompareFunction::Always; depthStencil->stencilFront.compare = @@ -141,10 +143,10 @@ class OcclusionQueryTests : public QueryTests { wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&descriptor); - wgpu::Texture renderTarget = CreateRenderTexture(wgpu::TextureFormat::RGBA8Unorm); + wgpu::Texture renderTarget = CreateRenderTexture(kColorFormat); wgpu::TextureView renderTargetView = renderTarget.CreateView(); - wgpu::Texture depthTexture = CreateRenderTexture(wgpu::TextureFormat::Depth24PlusStencil8); + wgpu::Texture depthTexture = CreateRenderTexture(kDepthStencilFormat); wgpu::TextureView depthTextureView = depthTexture.CreateView(); wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount); @@ -205,8 +207,6 @@ class OcclusionQueryTests : public QueryTests { wgpu::ShaderModule fsModule; wgpu::RenderPipeline pipeline; - - constexpr static unsigned int kRTSize = 4; }; // Test creating query set with the type of Occlusion @@ -551,23 +551,87 @@ class TimestampQueryTests : public QueryTests { return device.CreateQuerySet(&descriptor); } + wgpu::RenderPipeline CreateRenderPipeline(bool hasFragmentStage = true) { + utils::ComboRenderPipelineDescriptor descriptor; + + descriptor.vertex.module = utils::CreateShaderModule(device, R"( + @vertex + fn main(@builtin(vertex_index) VertexIndex : u32) -> @builtin(position) vec4 { + var pos = array, 3>( + vec2( 1.0, 1.0), + vec2(-1.0, -1.0), + vec2( 1.0, -1.0)); + return vec4(pos[VertexIndex], 0.0, 1.0); + })"); + + if (hasFragmentStage) { + descriptor.cFragment.module = utils::CreateShaderModule(device, R"( + @fragment fn main() -> @location(0) vec4 { + return vec4(0.0, 1.0, 0.0, 1.0); + })"); + } else { + descriptor.fragment = nullptr; + descriptor.EnableDepthStencil(kDepthStencilFormat); + } + + return device.CreateRenderPipeline(&descriptor); + } + + void EncodeComputeTimestampWrites( + const wgpu::CommandEncoder& encoder, + const std::vector& timestampWrites, + bool hasPipeline = true) { + wgpu::ComputePassDescriptor descriptor; + descriptor.timestampWriteCount = timestampWrites.size(); + descriptor.timestampWrites = timestampWrites.data(); + + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(&descriptor); + if (hasPipeline) { + pass.SetPipeline(computePipeline); + pass.DispatchWorkgroups(1, 1, 1); + } + pass.End(); + } + + void EncodeRenderTimestampWrites( + const wgpu::CommandEncoder& encoder, + const std::vector& timestampWrites, + bool hasPipeline = true, + bool hasFragmentStage = true) { + wgpu::Texture depthTexture = CreateRenderTexture(kDepthStencilFormat); + utils::ComboRenderPassDescriptor renderPassDesc = + hasFragmentStage + ? utils::ComboRenderPassDescriptor({CreateRenderTexture(kColorFormat).CreateView()}) + : utils::ComboRenderPassDescriptor( + {}, CreateRenderTexture(kDepthStencilFormat).CreateView()); + renderPassDesc.timestampWriteCount = timestampWrites.size(); + renderPassDesc.timestampWrites = timestampWrites.data(); + + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPassDesc); + if (hasPipeline) { + wgpu::RenderPipeline renderPipeline = CreateRenderPipeline(hasFragmentStage); + pass.SetPipeline(renderPipeline); + pass.Draw(3); + } + pass.End(); + } + void TestTimestampWritesOnComputePass( const std::vector& timestampWrites, - const std::vector& timestampWritesOnAnotherPass = {}) { + const std::vector& timestampWritesOnAnotherPass = {}, + bool hasPipeline = true) { size_t queryCount = timestampWrites.size() + timestampWritesOnAnotherPass.size(); // The destination buffer offset must be a multiple of 256. wgpu::Buffer destination = CreateResolveBuffer(queryCount * kMinDestinationOffset + sizeof(uint64_t)); - wgpu::ComputePassDescriptor descriptor; - descriptor.timestampWriteCount = timestampWrites.size(); - descriptor.timestampWrites = timestampWrites.data(); - wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - wgpu::ComputePassEncoder pass = encoder.BeginComputePass(&descriptor); - pass.SetPipeline(computePipeline); - pass.DispatchWorkgroups(1, 1, 1); - pass.End(); + EncodeComputeTimestampWrites(encoder, timestampWrites, hasPipeline); + + // Begin another compute pass if the timestampWritesOnAnotherPass is set. + if (!timestampWritesOnAnotherPass.empty()) { + EncodeComputeTimestampWrites(encoder, timestampWritesOnAnotherPass, hasPipeline); + } // Resolve queries one by one because the query set at the beginning of pass may be // different with the one at the end of pass. @@ -575,25 +639,10 @@ class TimestampQueryTests : public QueryTests { encoder.ResolveQuerySet(timestampWrites[i].querySet, timestampWrites[i].queryIndex, 1, destination, i * kMinDestinationOffset); } - - // Begin another compute pass if the timestampWritesOnAnotherPass is set. - if (!timestampWritesOnAnotherPass.empty()) { - wgpu::ComputePassDescriptor descriptor2; - descriptor2.timestampWriteCount = timestampWritesOnAnotherPass.size(); - descriptor2.timestampWrites = timestampWritesOnAnotherPass.data(); - - wgpu::ComputePassEncoder pass2 = encoder.BeginComputePass(&descriptor2); - pass2.SetPipeline(computePipeline); - pass2.DispatchWorkgroups(1, 1, 1); - pass2.End(); - - for (size_t i = 0; i < timestampWritesOnAnotherPass.size(); i++) { - // Resolve queries one by one because the query set at the beginning of pass may be - // different with the one at the end of pass. - encoder.ResolveQuerySet(timestampWritesOnAnotherPass[i].querySet, - timestampWritesOnAnotherPass[i].queryIndex, 1, destination, - (timestampWrites.size() + i) * kMinDestinationOffset); - } + for (size_t i = 0; i < timestampWritesOnAnotherPass.size(); i++) { + encoder.ResolveQuerySet(timestampWritesOnAnotherPass[i].querySet, + timestampWritesOnAnotherPass[i].queryIndex, 1, destination, + (timestampWrites.size() + i) * kMinDestinationOffset); } wgpu::CommandBuffer commands = encoder.Finish(); @@ -607,19 +656,22 @@ class TimestampQueryTests : public QueryTests { void TestTimestampWritesOnRenderPass( const std::vector& timestampWrites, - const std::vector& timestampWritesOnAnotherPass = {}) { + const std::vector& timestampWritesOnAnotherPass = {}, + bool hasPipeline = true, + bool hasFragmentStage = true) { size_t queryCount = timestampWrites.size() + timestampWritesOnAnotherPass.size(); // The destination buffer offset must be a multiple of 256. wgpu::Buffer destination = CreateResolveBuffer(queryCount * kMinDestinationOffset + sizeof(uint64_t)); - utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); - renderPass.renderPassInfo.timestampWriteCount = timestampWrites.size(); - renderPass.renderPassInfo.timestampWrites = timestampWrites.data(); - wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); - pass.End(); + EncodeRenderTimestampWrites(encoder, timestampWrites, hasPipeline, hasFragmentStage); + + // Begin another render pass if the timestampWritesOnAnotherPass is set. + if (!timestampWritesOnAnotherPass.empty()) { + EncodeRenderTimestampWrites(encoder, timestampWritesOnAnotherPass, hasPipeline, + hasFragmentStage); + } // Resolve queries one by one because the query set at the beginning of pass may be // different with the one at the end of pass. @@ -628,22 +680,10 @@ class TimestampQueryTests : public QueryTests { destination, i * kMinDestinationOffset); } - // Begin another render pass if the timestampWritesOnAnotherPass is set. - if (!timestampWritesOnAnotherPass.empty()) { - utils::BasicRenderPass renderPass2 = utils::CreateBasicRenderPass(device, 1, 1); - renderPass2.renderPassInfo.timestampWriteCount = timestampWritesOnAnotherPass.size(); - renderPass2.renderPassInfo.timestampWrites = timestampWritesOnAnotherPass.data(); - - wgpu::RenderPassEncoder pass2 = encoder.BeginRenderPass(&renderPass2.renderPassInfo); - pass2.End(); - - for (size_t i = 0; i < timestampWritesOnAnotherPass.size(); i++) { - // Resolve queries one by one because the query set at the beginning of pass may be - // different with the one at the end of pass. - encoder.ResolveQuerySet(timestampWritesOnAnotherPass[i].querySet, - timestampWritesOnAnotherPass[i].queryIndex, 1, destination, - (timestampWrites.size() + i) * kMinDestinationOffset); - } + for (size_t i = 0; i < timestampWritesOnAnotherPass.size(); i++) { + encoder.ResolveQuerySet(timestampWritesOnAnotherPass[i].querySet, + timestampWritesOnAnotherPass[i].queryIndex, 1, destination, + (timestampWrites.size() + i) * kMinDestinationOffset); } wgpu::CommandBuffer commands = encoder.Finish(); @@ -669,9 +709,6 @@ TEST_P(TimestampQueryTests, QuerySetCreation) { // Test calling timestamp query from command encoder TEST_P(TimestampQueryTests, TimestampOnCommandEncoder) { - // TODO (dawn:1250): Still not implemented on Metal backend. - DAWN_TEST_UNSUPPORTED_IF(IsMetal()); - constexpr uint32_t kQueryCount = 2; // Write timestamp with different query indexes @@ -709,6 +746,10 @@ TEST_P(TimestampQueryTests, TimestampOnCommandEncoder) { // Test calling timestamp query from render pass encoder TEST_P(TimestampQueryTests, TimestampOnRenderPass) { + // TODO (dawn:1250): Split writeTimestamp() to another extension which is not supported on Apple + // devices + DAWN_TEST_UNSUPPORTED_IF(IsMacOS() && IsMetal() && IsApple()); + constexpr uint32_t kQueryCount = 2; // Write timestamp with different query indexes @@ -754,6 +795,10 @@ TEST_P(TimestampQueryTests, TimestampOnRenderPass) { // Test calling timestamp query from compute pass encoder TEST_P(TimestampQueryTests, TimestampOnComputePass) { + // TODO (dawn:1250): Split writeTimestamp() to another extension which is not supported on Apple + // devices + DAWN_TEST_UNSUPPORTED_IF(IsMacOS() && IsMetal() && IsApple()); + constexpr uint32_t kQueryCount = 2; // Write timestamp with different query indexes @@ -815,19 +860,34 @@ TEST_P(TimestampQueryTests, TimestampOnComputePass) { } } -// Test timestampWrites setting in compute pass descriptor -TEST_P(TimestampQueryTests, TimestampWritesOnComputePass) { +// Test timestampWrites with query set in compute pass descriptor +TEST_P(TimestampQueryTests, TimestampWritesQuerySetOnComputePass) { // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until // the issue is fixed. DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); - // TODO (dawn:1250): Still not implemented on Metal backend. - DAWN_TEST_UNSUPPORTED_IF(IsMetal()); + // TODO (dawn:1473): Metal bug which fails to store GPU counters to different sample buffer. + DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsMetal() && IsApple()); + + // Set timestampWrites with different query set on same compute pass + wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1); + wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1); + + TestTimestampWritesOnComputePass({{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning}, + {querySet1, 0, wgpu::ComputePassTimestampLocation::End}}); +} + +// Test timestampWrites with query index in compute pass descriptor +TEST_P(TimestampQueryTests, TimestampWritesQueryIndexOnComputePass) { + // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that + // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until + // the issue is fixed. + DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); constexpr uint32_t kQueryCount = 2; - // Set timestampWrites with different query indexes and locations on same compute pass + // Set timestampWrites with different query indexes on same compute pass { wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); @@ -836,16 +896,35 @@ TEST_P(TimestampQueryTests, TimestampWritesOnComputePass) { {querySet, 1, wgpu::ComputePassTimestampLocation::End}}); } - // Set timestampWrites with different query set on same compute pass + // Set timestampWrites with same query index on same compute pass { - wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1); - wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1); + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); TestTimestampWritesOnComputePass( - {{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning}, - {querySet1, 0, wgpu::ComputePassTimestampLocation::End}}); + {{querySet, 0, wgpu::ComputePassTimestampLocation::Beginning}, + {querySet, 0, wgpu::ComputePassTimestampLocation::End}}); } + // Set timestampWrites with same query indexes on different compute pass + { + wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(kQueryCount); + wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(kQueryCount); + + TestTimestampWritesOnComputePass( + {{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning}}, + {{querySet1, 0, wgpu::ComputePassTimestampLocation::End}}); + } +} + +// Test timestampWrites with timestamp location in compute pass descriptor +TEST_P(TimestampQueryTests, TimestampWritesLocationOnComputePass) { + // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that + // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until + // the issue is fixed. + DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); + + constexpr uint32_t kQueryCount = 2; + // Set timestampWrites with only one value of ComputePassTimestampLocation { wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); @@ -856,63 +935,78 @@ TEST_P(TimestampQueryTests, TimestampWritesOnComputePass) { TestTimestampWritesOnComputePass({{querySet, 1, wgpu::ComputePassTimestampLocation::End}}); } - // Set timestampWrites with same query set and query index on same compute pass + // Set timestampWrites with same location on different compute pass { - wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1); + wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1); TestTimestampWritesOnComputePass( - {{querySet, 0, wgpu::ComputePassTimestampLocation::Beginning}, - {querySet, 0, wgpu::ComputePassTimestampLocation::End}}); - } - - // Set timestampWrites with same query indexes and locations on different compute pass - { - wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(kQueryCount); - wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(kQueryCount); - - TestTimestampWritesOnComputePass( - {{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning}, - {querySet0, 1, wgpu::ComputePassTimestampLocation::End}}, - {{querySet1, 0, wgpu::ComputePassTimestampLocation::Beginning}, - {querySet1, 1, wgpu::ComputePassTimestampLocation::End}}); + {{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning}}, + {{querySet1, 0, wgpu::ComputePassTimestampLocation::Beginning}}); } } -// Test timestampWrites setting in render pass descriptor -TEST_P(TimestampQueryTests, TimestampWritesOnRenderPass) { +// Test timestampWrites on compute pass without pipeline +TEST_P(TimestampQueryTests, TimestampWritesOnComputePassWithNoPipline) { // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until // the issue is fixed. DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); - // TODO (dawn:1250): Still not implemented on Metal backend. - DAWN_TEST_UNSUPPORTED_IF(IsMetal()); + // TODO (dawn:1473): Metal fails to store GPU counters to sampleBufferAttachments on empty + // encoders. + DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsMetal() && IsApple()); - constexpr uint32_t kQueryCount = 2; + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2); + + TestTimestampWritesOnComputePass({{querySet, 0, wgpu::ComputePassTimestampLocation::Beginning}, + {querySet, 1, wgpu::ComputePassTimestampLocation::End}}, + {}, false); +} + +// Test timestampWrites with query set in render pass descriptor +TEST_P(TimestampQueryTests, TimestampWritesQuerySetOnRenderPass) { + // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that + // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until + // the issue is fixed. + DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); + + // TODO (dawn:1473): Metal bug which fails to store GPU counters to different sample buffer. + DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsMetal() && IsApple()); + + // Set timestampWrites with different query set on same render pass + wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1); + wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1); + + TestTimestampWritesOnRenderPass({{querySet0, 0, wgpu::RenderPassTimestampLocation::Beginning}, + {querySet1, 0, wgpu::RenderPassTimestampLocation::End}}); +} + +// Test timestampWrites with query index in compute pass descriptor +TEST_P(TimestampQueryTests, TimestampWritesQueryIndexOnRenderPass) { + // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that + // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until + // the issue is fixed. + DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); // Set timestampWrites with different query indexes and locations, not need test write same // query index due to it's not allowed on render pass. - { - wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2); - TestTimestampWritesOnRenderPass( - {{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning}, - {querySet, 1, wgpu::RenderPassTimestampLocation::End}}); - } + TestTimestampWritesOnRenderPass({{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning}, + {querySet, 1, wgpu::RenderPassTimestampLocation::End}}); +} - // Set timestampWrites with different query set on same render pass - { - wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1); - wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1); - - TestTimestampWritesOnRenderPass( - {{querySet0, 0, wgpu::RenderPassTimestampLocation::Beginning}, - {querySet1, 0, wgpu::RenderPassTimestampLocation::End}}); - } +// Test timestampWrites with timestamp location in render pass descriptor +TEST_P(TimestampQueryTests, TimestampWritesLocationOnRenderPass) { + // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that + // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until + // the issue is fixed. + DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); // Set timestampWrites with only one value of RenderPassTimestampLocation { - wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2); TestTimestampWritesOnRenderPass( {{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning}}); @@ -920,19 +1014,45 @@ TEST_P(TimestampQueryTests, TimestampWritesOnRenderPass) { TestTimestampWritesOnRenderPass({{querySet, 1, wgpu::RenderPassTimestampLocation::End}}); } - // Set timestampWrites with same query indexes and locations on different render pass + // Set timestampWrites with same location on different render pass { - wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(kQueryCount); - wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(kQueryCount); + wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1); + wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1); TestTimestampWritesOnRenderPass( - {{querySet0, 0, wgpu::RenderPassTimestampLocation::Beginning}, - {querySet0, 1, wgpu::RenderPassTimestampLocation::End}}, - {{querySet1, 0, wgpu::RenderPassTimestampLocation::Beginning}, - {querySet1, 1, wgpu::RenderPassTimestampLocation::End}}); + {{querySet0, 0, wgpu::RenderPassTimestampLocation::Beginning}}, + {{querySet1, 0, wgpu::RenderPassTimestampLocation::Beginning}}); } } +// Test timestampWrites on render pass without pipeline +TEST_P(TimestampQueryTests, TimestampWritesOnRenderPassWithNoPipline) { + // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that + // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until + // the issue is fixed. + DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); + + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2); + TestTimestampWritesOnRenderPass({{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning}, + {querySet, 1, wgpu::RenderPassTimestampLocation::End}}, + {}, false); +} + +// Test timestampWrites on render pass with pipeline but no fragment stage +TEST_P(TimestampQueryTests, TimestampWritesOnRenderPassWithOnlyVertexStage) { + DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("use_placeholder_fragment_in_vertex_only_pipeline")); + + // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that + // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until + // the issue is fixed. + DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); + + wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2); + TestTimestampWritesOnRenderPass({{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning}, + {querySet, 1, wgpu::RenderPassTimestampLocation::End}}, + {}, true, false); +} + // Test resolving timestamp query from another different encoder TEST_P(TimestampQueryTests, ResolveFromAnotherEncoder) { constexpr uint32_t kQueryCount = 2;