Implement timestampWrites on Metal

Metal configures the query set and query index for the beginning and end
of passes in pass descriptor when beginning a pass encoder, so we need
to record all timestamp writes information in BeginXxxPassCmd. For the
platfroms that support timestamp query, it must support timestamp write
at command boundary or stage boundary, if the stage boundary is
supported, use sampleBufferAttachments API for Metal implementation,
otherwise simulate timestamp write using sampleCountersInBuffer API
after begining a pass and before ending a pass.

Bug: dawn:1250
Change-Id: I462cb05a0102521cd2df4db3ac6f71863419b933
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/93940
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Hao Li <hao.x.li@intel.com>
This commit is contained in:
Li Hao 2022-09-16 15:00:19 +00:00 committed by Dawn LUCI CQ
parent ba68620f6f
commit 92182412b8
14 changed files with 658 additions and 183 deletions

View File

@ -306,6 +306,12 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
"integer that is greater than 2^24 or smaller than -2^24). This toggle is also enabled on " "integer that is greater than 2^24 or smaller than -2^24). This toggle is also enabled on "
"Intel GPUs on Metal backend due to a driver issue on Intel Metal driver.", "Intel GPUs on Metal backend due to a driver issue on Intel Metal driver.",
"https://crbug.com/dawn/537"}}, "https://crbug.com/dawn/537"}},
{Toggle::MetalUseDummyBlitEncoderForWriteTimestamp,
{"metal_use_dummy_blit_encoder_for_write_timestamp",
"Add dummy blit command to blit encoder when encoding writeTimestamp as workaround on Metal."
"This toggle is enabled by default on Metal backend where GPU counters cannot be stored to"
"sampleBufferAttachments on empty blit encoder.",
"https://crbug.com/dawn/1473"}},
// Comment to separate the }} so it is clearer what to copy-paste to add a toggle. // Comment to separate the }} so it is clearer what to copy-paste to add a toggle.
}}; }};
} // anonymous namespace } // anonymous namespace

View File

@ -79,6 +79,7 @@ enum class Toggle {
D3D12AllocateExtraMemoryFor2DArrayTexture, D3D12AllocateExtraMemoryFor2DArrayTexture,
D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset, D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset,
ApplyClearBigIntegerColorValueWithDraw, ApplyClearBigIntegerColorValueWithDraw,
MetalUseDummyBlitEncoderForWriteTimestamp,
EnumCount, EnumCount,
InvalidEnum = EnumCount, InvalidEnum = EnumCount,

View File

@ -24,6 +24,7 @@
#include "dawn/native/MetalBackend.h" #include "dawn/native/MetalBackend.h"
#include "dawn/native/metal/BufferMTL.h" #include "dawn/native/metal/BufferMTL.h"
#include "dawn/native/metal/DeviceMTL.h" #include "dawn/native/metal/DeviceMTL.h"
#include "dawn/native/metal/UtilsMetal.h"
#if DAWN_PLATFORM_IS(MACOS) #if DAWN_PLATFORM_IS(MACOS)
#import <IOKit/IOKitLib.h> #import <IOKit/IOKitLib.h>
@ -170,18 +171,6 @@ MaybeError GetDevicePCIInfo(id<MTLDevice> device, PCIIDs* ids) {
#error "Unsupported Apple platform." #error "Unsupported Apple platform."
#endif #endif
DAWN_NOINLINE bool IsCounterSamplingBoundarySupport(id<MTLDevice> device)
API_AVAILABLE(macos(11.0), ios(14.0)) {
bool isBlitBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary];
bool isDispatchBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary];
bool isDrawBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary];
return isBlitBoundarySupported && isDispatchBoundarySupported && isDrawBoundarySupported;
}
// This method has seen hard-to-debug crashes. See crbug.com/dawn/1102. // This method has seen hard-to-debug crashes. See crbug.com/dawn/1102.
// For now, it is written defensively, with many potentially unnecessary guards until // For now, it is written defensively, with many potentially unnecessary guards until
// we narrow down the cause of the problem. // we narrow down the cause of the problem.
@ -246,11 +235,13 @@ DAWN_NOINLINE bool IsGPUCounterSupported(id<MTLDevice> device,
} }
if (@available(macOS 11.0, iOS 14.0, *)) { if (@available(macOS 11.0, iOS 14.0, *)) {
// Check whether it can read GPU counters at the specified command boundary. Apple // Check whether it can read GPU counters at the specified command boundary or stage
// family GPUs do not support sampling between different Metal commands, because // boundary. Apple family GPUs do not support sampling between different Metal commands,
// they defer fragment processing until after the GPU processes all the primitives // because they defer fragment processing until after the GPU processes all the primitives
// in the render pass. // in the render pass. GPU counters are only available if sampling at least one of the
if (!IsCounterSamplingBoundarySupport(device)) { // command or stage boundaries is supported.
if (!SupportCounterSamplingAtCommandBoundary(device) &&
!SupportCounterSamplingAtStageBoundary(device)) {
return false; return false;
} }
} }

View File

@ -16,12 +16,15 @@
#define SRC_DAWN_NATIVE_METAL_COMMANDBUFFERMTL_H_ #define SRC_DAWN_NATIVE_METAL_COMMANDBUFFERMTL_H_
#include "dawn/native/CommandBuffer.h" #include "dawn/native/CommandBuffer.h"
#include "dawn/native/Commands.h"
#include "dawn/native/Error.h" #include "dawn/native/Error.h"
#import <Metal/Metal.h> #import <Metal/Metal.h>
namespace dawn::native { namespace dawn::native {
class CommandEncoder; class CommandEncoder;
struct BeginComputePassCmd;
struct BeginRenderPassCmd;
} }
namespace dawn::native::metal { namespace dawn::native::metal {
@ -55,8 +58,10 @@ class CommandBuffer final : public CommandBufferBase {
private: private:
using CommandBufferBase::CommandBufferBase; using CommandBufferBase::CommandBufferBase;
MaybeError EncodeComputePass(CommandRecordingContext* commandContext); MaybeError EncodeComputePass(CommandRecordingContext* commandContext,
MaybeError EncodeRenderPass(id<MTLRenderCommandEncoder> encoder); BeginComputePassCmd* computePassCmd);
MaybeError EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
BeginRenderPassCmd* renderPassCmd);
}; };
} // namespace dawn::native::metal } // namespace dawn::native::metal

View File

@ -49,7 +49,129 @@ MTLIndexType MTLIndexFormat(wgpu::IndexFormat format) {
} }
} }
NSRef<MTLRenderPassDescriptor> CreateMTLRenderPassDescriptor(BeginRenderPassCmd* renderPass) { template <typename PassDescriptor>
class SampleBufferAttachment {
public:
void SetSampleBuffer(PassDescriptor* descriptor, id<MTLCounterSampleBuffer> sampleBuffer)
API_AVAILABLE(macos(11.0), ios(14.0));
void SetStartSampleIndex(PassDescriptor* descriptor, NSUInteger sampleIndex)
API_AVAILABLE(macos(11.0), ios(14.0));
void SetEndSampleIndex(PassDescriptor* descriptor, NSUInteger sampleIndex)
API_AVAILABLE(macos(11.0), ios(14.0));
private:
// Initialized to the maximum value, in order to start from 0 after the first increment.
NSUInteger attachmentIndex = NSUIntegerMax;
// TODO(dawn:1473): The maximum of sampleBufferAttachments depends on the length of MTLDevice's
// counterSets, but Metal does not match the allowed maximum of sampleBufferAttachments with the
// length of counterSets on AGX family. Hardcode as a constant and check this whenever Metal
// could get the matched value.
static constexpr NSUInteger kMaxSampleBufferAttachments = 4;
};
template <typename PassDescriptor>
void SampleBufferAttachment<PassDescriptor>::SetSampleBuffer(
PassDescriptor* descriptor,
id<MTLCounterSampleBuffer> sampleBuffer) API_AVAILABLE(macos(11.0), ios(14.0)) {
attachmentIndex++;
ASSERT(attachmentIndex < kMaxSampleBufferAttachments);
descriptor.sampleBufferAttachments[attachmentIndex].sampleBuffer = sampleBuffer;
}
// Must be called after SetSampleBuffer
template <>
void SampleBufferAttachment<MTLRenderPassDescriptor>::SetStartSampleIndex(
MTLRenderPassDescriptor* descriptor,
NSUInteger sampleIndex) API_AVAILABLE(macos(11.0), ios(14.0)) {
ASSERT(attachmentIndex < kMaxSampleBufferAttachments);
descriptor.sampleBufferAttachments[attachmentIndex].startOfVertexSampleIndex = sampleIndex;
}
// Must be called after SetSampleBuffer
template <>
void SampleBufferAttachment<MTLRenderPassDescriptor>::SetEndSampleIndex(
MTLRenderPassDescriptor* descriptor,
NSUInteger sampleIndex) API_AVAILABLE(macos(11.0), ios(14.0)) {
ASSERT(attachmentIndex < kMaxSampleBufferAttachments);
descriptor.sampleBufferAttachments[attachmentIndex].endOfFragmentSampleIndex = sampleIndex;
}
// Must be called after SetSampleBuffer
template <>
void SampleBufferAttachment<MTLComputePassDescriptor>::SetStartSampleIndex(
MTLComputePassDescriptor* descriptor,
NSUInteger sampleIndex) API_AVAILABLE(macos(11.0), ios(14.0)) {
ASSERT(attachmentIndex < kMaxSampleBufferAttachments);
descriptor.sampleBufferAttachments[attachmentIndex].startOfEncoderSampleIndex = sampleIndex;
}
// Must be called after SetSampleBuffer
template <>
void SampleBufferAttachment<MTLComputePassDescriptor>::SetEndSampleIndex(
MTLComputePassDescriptor* descriptor,
NSUInteger sampleIndex) API_AVAILABLE(macos(11.0), ios(14.0)) {
// TODO(dawn:1473): Use MTLComputePassSampleBuffers or query method instead of the magic number
// 4 when Metal could get the maximum of sampleBufferAttachments on compute pass
ASSERT(attachmentIndex < kMaxSampleBufferAttachments);
descriptor.sampleBufferAttachments[attachmentIndex].endOfEncoderSampleIndex = sampleIndex;
}
template <typename PassDescriptor, typename BeginPass>
void SetSampleBufferAttachments(PassDescriptor* descriptor, BeginPass* cmd) {
// Use @available instead of API_AVAILABLE because GetCounterSampleBuffer() also needs checking
// API availability.
if (@available(macOS 11.0, iOS 14.0, *)) {
QuerySetBase* beginQuerySet = cmd->beginTimestamp.querySet.Get();
QuerySetBase* endQuerySet = cmd->endTimestamp.querySet.Get();
SampleBufferAttachment<PassDescriptor> sampleBufferAttachment;
if (beginQuerySet != nullptr) {
sampleBufferAttachment.SetSampleBuffer(
descriptor, ToBackend(beginQuerySet)->GetCounterSampleBuffer());
sampleBufferAttachment.SetStartSampleIndex(descriptor,
NSUInteger(cmd->beginTimestamp.queryIndex));
if (beginQuerySet == endQuerySet) {
sampleBufferAttachment.SetEndSampleIndex(descriptor,
NSUInteger(cmd->endTimestamp.queryIndex));
} else {
sampleBufferAttachment.SetEndSampleIndex(descriptor, MTLCounterDontSample);
}
}
// Set to other sampleBufferAttachment if the endQuerySet is different with beginQuerySet.
if (endQuerySet != nullptr && beginQuerySet != endQuerySet) {
sampleBufferAttachment.SetSampleBuffer(
descriptor, ToBackend(endQuerySet)->GetCounterSampleBuffer());
sampleBufferAttachment.SetStartSampleIndex(descriptor, MTLCounterDontSample);
sampleBufferAttachment.SetEndSampleIndex(descriptor,
NSUInteger(cmd->endTimestamp.queryIndex));
}
} else {
UNREACHABLE();
}
}
NSRef<MTLComputePassDescriptor> CreateMTLComputePassDescriptor(BeginComputePassCmd* computePass)
API_AVAILABLE(macos(11.0), ios(14.0)) {
// Note that this creates a descriptor that's autoreleased so we don't use AcquireNSRef
NSRef<MTLComputePassDescriptor> descriptorRef =
[MTLComputePassDescriptor computePassDescriptor];
MTLComputePassDescriptor* descriptor = descriptorRef.Get();
// MTLDispatchTypeSerial is the same dispatch type as the deafult MTLComputeCommandEncoder.
// MTLDispatchTypeConcurrent requires memory barriers to ensure multiple commands synchronize
// access to the same resources, which we may support it later.
descriptor.dispatchType = MTLDispatchTypeSerial;
SetSampleBufferAttachments(descriptor, computePass);
return descriptorRef;
}
NSRef<MTLRenderPassDescriptor> CreateMTLRenderPassDescriptor(
BeginRenderPassCmd* renderPass,
bool useCounterSamplingAtStageBoundary) {
// Note that this creates a descriptor that's autoreleased so we don't use AcquireNSRef // Note that this creates a descriptor that's autoreleased so we don't use AcquireNSRef
NSRef<MTLRenderPassDescriptor> descriptorRef = [MTLRenderPassDescriptor renderPassDescriptor]; NSRef<MTLRenderPassDescriptor> descriptorRef = [MTLRenderPassDescriptor renderPassDescriptor];
MTLRenderPassDescriptor* descriptor = descriptorRef.Get(); MTLRenderPassDescriptor* descriptor = descriptorRef.Get();
@ -197,9 +319,36 @@ NSRef<MTLRenderPassDescriptor> CreateMTLRenderPassDescriptor(BeginRenderPassCmd*
ToBackend(renderPass->occlusionQuerySet.Get())->GetVisibilityBuffer(); ToBackend(renderPass->occlusionQuerySet.Get())->GetVisibilityBuffer();
} }
if (@available(macOS 11.0, iOS 14.0, *)) {
if (useCounterSamplingAtStageBoundary) {
SetSampleBufferAttachments(descriptor, renderPass);
}
}
return descriptorRef; return descriptorRef;
} }
void EncodeEmptyBlitEncoderForWriteTimestamp(Device* device,
CommandRecordingContext* commandContext,
WriteTimestampCmd* cmd)
API_AVAILABLE(macos(11.0), ios(14.0)) {
commandContext->EndBlit();
MTLBlitPassDescriptor* descriptor = [[MTLBlitPassDescriptor alloc] init];
if (cmd->querySet.Get() != nullptr) {
descriptor.sampleBufferAttachments[0].sampleBuffer =
ToBackend(cmd->querySet.Get())->GetCounterSampleBuffer();
descriptor.sampleBufferAttachments[0].startOfEncoderSampleIndex = MTLCounterDontSample;
descriptor.sampleBufferAttachments[0].endOfEncoderSampleIndex = NSUInteger(cmd->queryIndex);
id<MTLBlitCommandEncoder> blit = commandContext->BeginBlit(descriptor);
if (device->IsToggleEnabled(Toggle::MetalUseDummyBlitEncoderForWriteTimestamp)) {
[blit fillBuffer:device->GetDummyBlitMtlBuffer() range:NSMakeRange(0, 1) value:0];
}
commandContext->EndBlit();
}
}
// Metal uses a physical addressing mode which means buffers in the shading language are // Metal uses a physical addressing mode which means buffers in the shading language are
// just pointers to the virtual address of their start. This means there is no way to know // just pointers to the virtual address of their start. This means there is no way to know
// the length of a buffer to compute the length() of unsized arrays at the end of storage // the length of a buffer to compute the length() of unsized arrays at the end of storage
@ -612,18 +761,13 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext)
case Command::BeginComputePass: { case Command::BeginComputePass: {
BeginComputePassCmd* cmd = mCommands.NextCommand<BeginComputePassCmd>(); BeginComputePassCmd* cmd = mCommands.NextCommand<BeginComputePassCmd>();
if (cmd->beginTimestamp.querySet.Get() != nullptr ||
cmd->endTimestamp.querySet.Get() != nullptr) {
return DAWN_UNIMPLEMENTED_ERROR("timestampWrites unimplemented.");
}
for (const SyncScopeResourceUsage& scope : for (const SyncScopeResourceUsage& scope :
GetResourceUsages().computePasses[nextComputePassNumber].dispatchUsages) { GetResourceUsages().computePasses[nextComputePassNumber].dispatchUsages) {
LazyClearSyncScope(scope, commandContext); LazyClearSyncScope(scope, commandContext);
} }
commandContext->EndBlit(); commandContext->EndBlit();
DAWN_TRY(EncodeComputePass(commandContext)); DAWN_TRY(EncodeComputePass(commandContext, cmd));
nextComputePassNumber++; nextComputePassNumber++;
break; break;
@ -632,22 +776,19 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext)
case Command::BeginRenderPass: { case Command::BeginRenderPass: {
BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>(); BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();
if (cmd->beginTimestamp.querySet.Get() != nullptr ||
cmd->endTimestamp.querySet.Get() != nullptr) {
return DAWN_UNIMPLEMENTED_ERROR("timestampWrites unimplemented.");
}
LazyClearSyncScope(GetResourceUsages().renderPasses[nextRenderPassNumber], LazyClearSyncScope(GetResourceUsages().renderPasses[nextRenderPassNumber],
commandContext); commandContext);
commandContext->EndBlit(); commandContext->EndBlit();
LazyClearRenderPassAttachments(cmd); LazyClearRenderPassAttachments(cmd);
NSRef<MTLRenderPassDescriptor> descriptor = CreateMTLRenderPassDescriptor(cmd); NSRef<MTLRenderPassDescriptor> descriptor = CreateMTLRenderPassDescriptor(
cmd, ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary());
DAWN_TRY(EncodeMetalRenderPass( DAWN_TRY(EncodeMetalRenderPass(
ToBackend(GetDevice()), commandContext, descriptor.Get(), cmd->width, ToBackend(GetDevice()), commandContext, descriptor.Get(), cmd->width,
cmd->height, [this](id<MTLRenderCommandEncoder> encoder) -> MaybeError { cmd->height,
return this->EncodeRenderPass(encoder); [this](id<MTLRenderCommandEncoder> encoder, BeginRenderPassCmd* cmd)
})); -> MaybeError { return this->EncodeRenderPass(encoder, cmd); },
cmd));
nextRenderPassNumber++; nextRenderPassNumber++;
break; break;
@ -905,16 +1046,29 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext)
case Command::WriteTimestamp: { case Command::WriteTimestamp: {
WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>(); WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>();
QuerySet* querySet = ToBackend(cmd->querySet.Get());
if (ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary()) {
if (@available(macos 11.0, iOS 14.0, *)) {
// Simulate writeTimestamp cmd between blit commands on the devices which
// supports counter sampling at stage boundary.
EncodeEmptyBlitEncoderForWriteTimestamp(ToBackend(GetDevice()),
commandContext, cmd);
} else {
UNREACHABLE();
}
} else {
if (@available(macos 10.15, iOS 14.0, *)) { if (@available(macos 10.15, iOS 14.0, *)) {
ASSERT(ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary());
[commandContext->EnsureBlit() [commandContext->EnsureBlit()
sampleCountersInBuffer:querySet->GetCounterSampleBuffer() sampleCountersInBuffer:ToBackend(cmd->querySet.Get())
->GetCounterSampleBuffer()
atSampleIndex:NSUInteger(cmd->queryIndex) atSampleIndex:NSUInteger(cmd->queryIndex)
withBarrier:YES]; withBarrier:YES];
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
}
break; break;
} }
@ -985,18 +1139,64 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext)
return {}; return {};
} }
MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandContext) { MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandContext,
BeginComputePassCmd* computePassCmd) {
ComputePipeline* lastPipeline = nullptr; ComputePipeline* lastPipeline = nullptr;
StorageBufferLengthTracker storageBufferLengths = {}; StorageBufferLengthTracker storageBufferLengths = {};
BindGroupTracker bindGroups(&storageBufferLengths); BindGroupTracker bindGroups(&storageBufferLengths);
id<MTLComputeCommandEncoder> encoder = commandContext->BeginCompute(); id<MTLComputeCommandEncoder> encoder;
// When counter sampling is supported at stage boundary, begin a configurable compute pass
// encoder which is supported since macOS 11.0+ and iOS 14.0+ and set timestamp writes to
// compute pass descriptor, otherwise begin a default compute pass encoder, and simulate
// timestamp writes using sampleCountersInBuffer API at the beginning and end of compute pass.
if (ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary()) {
if (@available(macOS 11.0, iOS 14.0, *)) {
NSRef<MTLComputePassDescriptor> descriptor =
CreateMTLComputePassDescriptor(computePassCmd);
encoder = commandContext->BeginCompute(descriptor.Get());
} else {
UNREACHABLE();
}
} else {
encoder = commandContext->BeginCompute();
if (@available(macos 10.15, iOS 14.0, *)) {
if (computePassCmd->beginTimestamp.querySet.Get() != nullptr) {
ASSERT(ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary());
[encoder
sampleCountersInBuffer:ToBackend(computePassCmd->beginTimestamp.querySet.Get())
->GetCounterSampleBuffer()
atSampleIndex:NSUInteger(computePassCmd->beginTimestamp.queryIndex)
withBarrier:YES];
}
}
}
Command type; Command type;
while (mCommands.NextCommandId(&type)) { while (mCommands.NextCommandId(&type)) {
switch (type) { switch (type) {
case Command::EndComputePass: { case Command::EndComputePass: {
mCommands.NextCommand<EndComputePassCmd>(); mCommands.NextCommand<EndComputePassCmd>();
if (@available(macos 10.15, iOS 14.0, *)) {
// Simulate timestamp write at the end of render pass if it does not support
// counter sampling at stage boundary.
if (ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary() &&
computePassCmd->endTimestamp.querySet.Get() != nullptr) {
ASSERT(!ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary());
[encoder
sampleCountersInBuffer:ToBackend(
computePassCmd->endTimestamp.querySet.Get())
->GetCounterSampleBuffer()
atSampleIndex:NSUInteger(
computePassCmd->endTimestamp.queryIndex)
withBarrier:YES];
}
}
commandContext->EndCompute(); commandContext->EndCompute();
return {}; return {};
} }
@ -1104,7 +1304,8 @@ MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandCont
UNREACHABLE(); UNREACHABLE();
} }
MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder) { MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
BeginRenderPassCmd* renderPassCmd) {
bool enableVertexPulling = GetDevice()->IsToggleEnabled(Toggle::MetalEnableVertexPulling); bool enableVertexPulling = GetDevice()->IsToggleEnabled(Toggle::MetalEnableVertexPulling);
RenderPipeline* lastPipeline = nullptr; RenderPipeline* lastPipeline = nullptr;
id<MTLBuffer> indexBuffer = nullptr; id<MTLBuffer> indexBuffer = nullptr;
@ -1116,6 +1317,20 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder)
VertexBufferTracker vertexBuffers(&storageBufferLengths); VertexBufferTracker vertexBuffers(&storageBufferLengths);
BindGroupTracker bindGroups(&storageBufferLengths); BindGroupTracker bindGroups(&storageBufferLengths);
if (@available(macos 10.15, iOS 14.0, *)) {
// Simulate timestamp write at the beginning of render pass by
// sampleCountersInBuffer if it does not support counter sampling at stage boundary.
if (ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary() &&
renderPassCmd->beginTimestamp.querySet.Get() != nullptr) {
ASSERT(!ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary());
[encoder sampleCountersInBuffer:ToBackend(renderPassCmd->beginTimestamp.querySet.Get())
->GetCounterSampleBuffer()
atSampleIndex:NSUInteger(renderPassCmd->beginTimestamp.queryIndex)
withBarrier:YES];
}
}
auto EncodeRenderBundleCommand = [&](CommandIterator* iter, Command type) { auto EncodeRenderBundleCommand = [&](CommandIterator* iter, Command type) {
switch (type) { switch (type) {
case Command::Draw: { case Command::Draw: {
@ -1304,6 +1519,24 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder)
switch (type) { switch (type) {
case Command::EndRenderPass: { case Command::EndRenderPass: {
mCommands.NextCommand<EndRenderPassCmd>(); mCommands.NextCommand<EndRenderPassCmd>();
if (@available(macos 10.15, iOS 14.0, *)) {
// Simulate timestamp write at the end of render pass if it does not support
// counter sampling at stage boundary.
if (ToBackend(GetDevice())->UseCounterSamplingAtCommandBoundary() &&
renderPassCmd->endTimestamp.querySet.Get() != nullptr) {
ASSERT(!ToBackend(GetDevice())->UseCounterSamplingAtStageBoundary());
[encoder
sampleCountersInBuffer:ToBackend(
renderPassCmd->endTimestamp.querySet.Get())
->GetCounterSampleBuffer()
atSampleIndex:NSUInteger(
renderPassCmd->endTimestamp.queryIndex)
withBarrier:YES];
}
}
return {}; return {};
} }

View File

@ -36,10 +36,18 @@ class CommandRecordingContext : NonMovable {
MaybeError PrepareNextCommandBuffer(id<MTLCommandQueue> queue); MaybeError PrepareNextCommandBuffer(id<MTLCommandQueue> queue);
NSPRef<id<MTLCommandBuffer>> AcquireCommands(); NSPRef<id<MTLCommandBuffer>> AcquireCommands();
// Create blit pass encoder from blit pass descriptor
id<MTLBlitCommandEncoder> BeginBlit(MTLBlitPassDescriptor* descriptor)
API_AVAILABLE(macos(11.0), ios(14.0));
id<MTLBlitCommandEncoder> EnsureBlit(); id<MTLBlitCommandEncoder> EnsureBlit();
void EndBlit(); void EndBlit();
// Create a sequential compute pass by default.
id<MTLComputeCommandEncoder> BeginCompute(); id<MTLComputeCommandEncoder> BeginCompute();
// Create configurable compute pass from a descriptor with serial dispatch type which commands
// are executed sequentially.
id<MTLComputeCommandEncoder> BeginCompute(MTLComputePassDescriptor* descriptor)
API_AVAILABLE(macos(11.0), ios(14.0));
void EndCompute(); void EndCompute();
id<MTLRenderCommandEncoder> BeginRender(MTLRenderPassDescriptor* descriptor); id<MTLRenderCommandEncoder> BeginRender(MTLRenderPassDescriptor* descriptor);

View File

@ -62,6 +62,20 @@ NSPRef<id<MTLCommandBuffer>> CommandRecordingContext::AcquireCommands() {
return std::move(mCommands); return std::move(mCommands);
} }
id<MTLBlitCommandEncoder> CommandRecordingContext::BeginBlit(MTLBlitPassDescriptor* descriptor)
API_AVAILABLE(macos(11.0), ios(14.0)) {
ASSERT(descriptor);
ASSERT(mCommands != nullptr);
ASSERT(mBlit == nullptr);
ASSERT(!mInEncoder);
mInEncoder = true;
// The encoder is created autoreleased. Retain it to avoid the autoreleasepool from
// draining from under us.
mBlit.Acquire([[*mCommands blitCommandEncoderWithDescriptor:descriptor] retain]);
return mBlit.Get();
}
id<MTLBlitCommandEncoder> CommandRecordingContext::EnsureBlit() { id<MTLBlitCommandEncoder> CommandRecordingContext::EnsureBlit() {
ASSERT(mCommands != nullptr); ASSERT(mCommands != nullptr);
@ -98,6 +112,20 @@ id<MTLComputeCommandEncoder> CommandRecordingContext::BeginCompute() {
return mCompute.Get(); return mCompute.Get();
} }
id<MTLComputeCommandEncoder> CommandRecordingContext::BeginCompute(
MTLComputePassDescriptor* descriptor) API_AVAILABLE(macos(11.0), ios(14.0)) {
ASSERT(descriptor);
ASSERT(mCommands != nullptr);
ASSERT(mCompute == nullptr);
ASSERT(!mInEncoder);
mInEncoder = true;
// The encoder is created autoreleased. Retain it to avoid the autoreleasepool from
// draining from under us.
mCompute.Acquire([[*mCommands computeCommandEncoderWithDescriptor:descriptor] retain]);
return mCompute.Get();
}
void CommandRecordingContext::EndCompute() { void CommandRecordingContext::EndCompute() {
ASSERT(mCommands != nullptr); ASSERT(mCommands != nullptr);
ASSERT(mCompute != nullptr); ASSERT(mCompute != nullptr);

View File

@ -72,6 +72,13 @@ class Device final : public DeviceBase {
float GetTimestampPeriodInNS() const override; float GetTimestampPeriodInNS() const override;
bool UseCounterSamplingAtCommandBoundary() const;
bool UseCounterSamplingAtStageBoundary() const;
// Get a MTLBuffer that can be used as a dummy in a no-op blit encoder based on filling this
// single-byte buffer
id<MTLBuffer> GetDummyBlitMtlBuffer();
private: private:
Device(AdapterBase* adapter, Device(AdapterBase* adapter,
NSPRef<id<MTLDevice>> mtlDevice, NSPRef<id<MTLDevice>> mtlDevice,
@ -144,6 +151,13 @@ class Device final : public DeviceBase {
MTLTimestamp mGpuTimestamp API_AVAILABLE(macos(10.15), ios(14.0)) = 0; MTLTimestamp mGpuTimestamp API_AVAILABLE(macos(10.15), ios(14.0)) = 0;
// The parameters for kalman filter // The parameters for kalman filter
std::unique_ptr<KalmanInfo> mKalmanInfo; std::unique_ptr<KalmanInfo> mKalmanInfo;
// Support counter sampling between blit commands, dispatches and draw calls
bool mCounterSamplingAtCommandBoundary;
// Support counter sampling at the begin and end of blit pass, compute pass and render pass's
// vertex/fragement stage
bool mCounterSamplingAtStageBoundary;
NSPRef<id<MTLBuffer>> mDummyBlitMtlBuffer;
}; };
} // namespace dawn::native::metal } // namespace dawn::native::metal

View File

@ -121,7 +121,18 @@ Device::Device(AdapterBase* adapter,
const TripleStateTogglesSet& userProvidedToggles) const TripleStateTogglesSet& userProvidedToggles)
: DeviceBase(adapter, descriptor, userProvidedToggles), : DeviceBase(adapter, descriptor, userProvidedToggles),
mMtlDevice(std::move(mtlDevice)), mMtlDevice(std::move(mtlDevice)),
mCompletedSerial(0) {} mCompletedSerial(0) {
// On macOS < 11.0, we only can check whether counter sampling is supported, and the counter
// only can be sampled between command boundary using sampleCountersInBuffer API if it's
// supported.
if (@available(macOS 11.0, iOS 14.0, *)) {
mCounterSamplingAtCommandBoundary = SupportCounterSamplingAtCommandBoundary(GetMTLDevice());
mCounterSamplingAtStageBoundary = SupportCounterSamplingAtStageBoundary(GetMTLDevice());
} else {
mCounterSamplingAtCommandBoundary = true;
mCounterSamplingAtStageBoundary = false;
}
}
Device::~Device() { Device::~Device() {
Destroy(); Destroy();
@ -234,6 +245,14 @@ void Device::InitTogglesFromDriver() {
if (gpu_info::IsIntel(vendorId)) { if (gpu_info::IsIntel(vendorId)) {
SetToggle(Toggle::ApplyClearBigIntegerColorValueWithDraw, true); SetToggle(Toggle::ApplyClearBigIntegerColorValueWithDraw, true);
} }
// TODO(dawn:1473): Metal fails to store GPU counters to sampleBufferAttachments on empty
// encoders on macOS 11.0+, we need to add dummy blit command to blit encoder when encoding
// writeTimestamp as workaround by enabling the toggle
// "metal_use_dummy_blit_encoder_for_write_timestamp".
if (@available(macos 11.0, iOS 14.0, *)) {
SetToggle(Toggle::MetalUseDummyBlitEncoderForWriteTimestamp, true);
}
} }
ResultOrError<Ref<BindGroupBase>> Device::CreateBindGroupImpl( ResultOrError<Ref<BindGroupBase>> Device::CreateBindGroupImpl(
@ -498,6 +517,7 @@ void Device::DestroyImpl() {
mCommandQueue = nullptr; mCommandQueue = nullptr;
mMtlDevice = nullptr; mMtlDevice = nullptr;
mDummyBlitMtlBuffer = nullptr;
} }
uint32_t Device::GetOptimalBytesPerRowAlignment() const { uint32_t Device::GetOptimalBytesPerRowAlignment() const {
@ -512,4 +532,21 @@ float Device::GetTimestampPeriodInNS() const {
return mTimestampPeriod; return mTimestampPeriod;
} }
bool Device::UseCounterSamplingAtCommandBoundary() const {
return mCounterSamplingAtCommandBoundary;
}
bool Device::UseCounterSamplingAtStageBoundary() const {
return mCounterSamplingAtStageBoundary;
}
id<MTLBuffer> Device::GetDummyBlitMtlBuffer() {
if (mDummyBlitMtlBuffer == nullptr) {
mDummyBlitMtlBuffer.Acquire(
[GetMTLDevice() newBufferWithLength:1 options:MTLResourceStorageModePrivate]);
}
return mDummyBlitMtlBuffer.Get();
}
} // namespace dawn::native::metal } // namespace dawn::native::metal

View File

@ -23,6 +23,7 @@
#import <Metal/Metal.h> #import <Metal/Metal.h>
namespace dawn::native { namespace dawn::native {
struct BeginRenderPassCmd;
struct ProgrammableStage; struct ProgrammableStage;
struct EntryPointMetadata; struct EntryPointMetadata;
enum class SingleShaderStage; enum class SingleShaderStage;
@ -81,19 +82,26 @@ constexpr MTLStoreAction kMTLStoreActionStoreAndMultisampleResolve =
// happen at the render pass start and end. Because workarounds wrap the encoding of the render // happen at the render pass start and end. Because workarounds wrap the encoding of the render
// pass, the encoding must be entirely done by the `encodeInside` callback. // pass, the encoding must be entirely done by the `encodeInside` callback.
// At the end of this function, `commandContext` will have no encoder open. // At the end of this function, `commandContext` will have no encoder open.
using EncodeInsideRenderPass = std::function<MaybeError(id<MTLRenderCommandEncoder>)>; using EncodeInsideRenderPass =
std::function<MaybeError(id<MTLRenderCommandEncoder>, BeginRenderPassCmd* renderPassCmd)>;
MaybeError EncodeMetalRenderPass(Device* device, MaybeError EncodeMetalRenderPass(Device* device,
CommandRecordingContext* commandContext, CommandRecordingContext* commandContext,
MTLRenderPassDescriptor* mtlRenderPass, MTLRenderPassDescriptor* mtlRenderPass,
uint32_t width, uint32_t width,
uint32_t height, uint32_t height,
EncodeInsideRenderPass encodeInside); EncodeInsideRenderPass encodeInside,
BeginRenderPassCmd* renderPassCmd = nullptr);
MaybeError EncodeEmptyMetalRenderPass(Device* device, MaybeError EncodeEmptyMetalRenderPass(Device* device,
CommandRecordingContext* commandContext, CommandRecordingContext* commandContext,
MTLRenderPassDescriptor* mtlRenderPass, MTLRenderPassDescriptor* mtlRenderPass,
Extent3D size); Extent3D size);
bool SupportCounterSamplingAtCommandBoundary(id<MTLDevice> device)
API_AVAILABLE(macos(11.0), ios(14.0));
bool SupportCounterSamplingAtStageBoundary(id<MTLDevice> device)
API_AVAILABLE(macos(11.0), ios(14.0));
} // namespace dawn::native::metal } // namespace dawn::native::metal
#endif // SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_ #endif // SRC_DAWN_NATIVE_METAL_UTILSMETAL_H_

View File

@ -328,7 +328,8 @@ MaybeError EncodeMetalRenderPass(Device* device,
MTLRenderPassDescriptor* mtlRenderPass, MTLRenderPassDescriptor* mtlRenderPass,
uint32_t width, uint32_t width,
uint32_t height, uint32_t height,
EncodeInsideRenderPass encodeInside) { EncodeInsideRenderPass encodeInside,
BeginRenderPassCmd* renderPassCmd) {
// This function handles multiple workarounds. Because some cases requires multiple // This function handles multiple workarounds. Because some cases requires multiple
// workarounds to happen at the same time, it handles workarounds one by one and calls // workarounds to happen at the same time, it handles workarounds one by one and calls
// itself recursively to handle the next workaround if needed. // itself recursively to handle the next workaround if needed.
@ -359,7 +360,7 @@ MaybeError EncodeMetalRenderPass(Device* device,
// resolve back to the true resolve targets. // resolve back to the true resolve targets.
if (workaroundUsed) { if (workaroundUsed) {
DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height, DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height,
std::move(encodeInside))); std::move(encodeInside), renderPassCmd));
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) { for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (trueResolveAttachments[i].texture == nullptr) { if (trueResolveAttachments[i].texture == nullptr) {
@ -403,7 +404,7 @@ MaybeError EncodeMetalRenderPass(Device* device,
if (workaroundUsed) { if (workaroundUsed) {
DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height, DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height,
std::move(encodeInside))); std::move(encodeInside), renderPassCmd));
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) { for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (originalAttachments[i].texture == nullptr) { if (originalAttachments[i].texture == nullptr) {
@ -439,7 +440,7 @@ MaybeError EncodeMetalRenderPass(Device* device,
// If we found a store + MSAA resolve we need to resolve in a different render pass. // If we found a store + MSAA resolve we need to resolve in a different render pass.
if (hasStoreAndMSAAResolve) { if (hasStoreAndMSAAResolve) {
DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height, DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height,
std::move(encodeInside))); std::move(encodeInside), renderPassCmd));
ResolveInAnotherRenderPass(commandContext, mtlRenderPass, resolveTextures); ResolveInAnotherRenderPass(commandContext, mtlRenderPass, resolveTextures);
return {}; return {};
@ -448,7 +449,7 @@ MaybeError EncodeMetalRenderPass(Device* device,
// No (more) workarounds needed! We can finally encode the actual render pass. // No (more) workarounds needed! We can finally encode the actual render pass.
commandContext->EndBlit(); commandContext->EndBlit();
DAWN_TRY(encodeInside(commandContext->BeginRender(mtlRenderPass))); DAWN_TRY(encodeInside(commandContext->BeginRender(mtlRenderPass), renderPassCmd));
commandContext->EndRender(); commandContext->EndRender();
return {}; return {};
} }
@ -457,8 +458,26 @@ MaybeError EncodeEmptyMetalRenderPass(Device* device,
CommandRecordingContext* commandContext, CommandRecordingContext* commandContext,
MTLRenderPassDescriptor* mtlRenderPass, MTLRenderPassDescriptor* mtlRenderPass,
Extent3D size) { Extent3D size) {
return EncodeMetalRenderPass(device, commandContext, mtlRenderPass, size.width, size.height, return EncodeMetalRenderPass(
[&](id<MTLRenderCommandEncoder>) -> MaybeError { return {}; }); device, commandContext, mtlRenderPass, size.width, size.height,
[&](id<MTLRenderCommandEncoder>, BeginRenderPassCmd*) -> MaybeError { return {}; });
}
DAWN_NOINLINE bool SupportCounterSamplingAtCommandBoundary(id<MTLDevice> device)
API_AVAILABLE(macos(11.0), ios(14.0)) {
bool isBlitBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary];
bool isDispatchBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary];
bool isDrawBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary];
return isBlitBoundarySupported && isDispatchBoundarySupported && isDrawBoundarySupported;
}
DAWN_NOINLINE bool SupportCounterSamplingAtStageBoundary(id<MTLDevice> device)
API_AVAILABLE(macos(11.0), ios(14.0)) {
return [device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary];
} }
} // namespace dawn::native::metal } // namespace dawn::native::metal

View File

@ -667,6 +667,10 @@ bool DawnTestBase::IsAMD() const {
return gpu_info::IsAMD(mParam.adapterProperties.vendorID); return gpu_info::IsAMD(mParam.adapterProperties.vendorID);
} }
bool DawnTestBase::IsApple() const {
return gpu_info::IsApple(mParam.adapterProperties.vendorID);
}
bool DawnTestBase::IsARM() const { bool DawnTestBase::IsARM() const {
return gpu_info::IsARM(mParam.adapterProperties.vendorID); return gpu_info::IsARM(mParam.adapterProperties.vendorID);
} }

View File

@ -225,6 +225,7 @@ class DawnTestBase {
bool IsVulkan() const; bool IsVulkan() const;
bool IsAMD() const; bool IsAMD() const;
bool IsApple() const;
bool IsARM() const; bool IsARM() const;
bool IsImgTec() const; bool IsImgTec() const;
bool IsIntel() const; bool IsIntel() const;

View File

@ -18,6 +18,15 @@
#include "dawn/utils/ComboRenderPipelineDescriptor.h" #include "dawn/utils/ComboRenderPipelineDescriptor.h"
#include "dawn/utils/WGPUHelpers.h" #include "dawn/utils/WGPUHelpers.h"
// Clear the content of the result buffer into 0xFFFFFFFF.
constexpr static uint64_t kSentinelValue = ~uint64_t(0u);
constexpr static uint64_t kZero = 0u;
constexpr static unsigned int kRTSize = 4;
constexpr uint64_t kMinDestinationOffset = 256;
constexpr uint64_t kMinCount = kMinDestinationOffset / sizeof(uint64_t);
constexpr wgpu::TextureFormat kColorFormat = wgpu::TextureFormat::RGBA8Unorm;
constexpr wgpu::TextureFormat kDepthStencilFormat = wgpu::TextureFormat::Depth24PlusStencil8;
class QueryTests : public DawnTest { class QueryTests : public DawnTest {
protected: protected:
wgpu::Buffer CreateResolveBuffer(uint64_t size) { wgpu::Buffer CreateResolveBuffer(uint64_t size) {
@ -27,13 +36,15 @@ class QueryTests : public DawnTest {
wgpu::BufferUsage::CopyDst; wgpu::BufferUsage::CopyDst;
return device.CreateBuffer(&descriptor); return device.CreateBuffer(&descriptor);
} }
};
// Clear the content of the result buffer into 0xFFFFFFFF. wgpu::Texture CreateRenderTexture(wgpu::TextureFormat format) {
constexpr static uint64_t kSentinelValue = ~uint64_t(0u); wgpu::TextureDescriptor descriptor;
constexpr static uint64_t kZero = 0u; descriptor.size = {kRTSize, kRTSize, 1};
constexpr uint64_t kMinDestinationOffset = 256; descriptor.format = format;
constexpr uint64_t kMinCount = kMinDestinationOffset / sizeof(uint64_t); descriptor.usage = wgpu::TextureUsage::RenderAttachment;
return device.CreateTexture(&descriptor);
}
};
class OcclusionExpectation : public detail::Expectation { class OcclusionExpectation : public detail::Expectation {
public: public:
@ -112,14 +123,6 @@ class OcclusionQueryTests : public QueryTests {
return device.CreateQuerySet(&descriptor); return device.CreateQuerySet(&descriptor);
} }
wgpu::Texture CreateRenderTexture(wgpu::TextureFormat format) {
wgpu::TextureDescriptor descriptor;
descriptor.size = {kRTSize, kRTSize, 1};
descriptor.format = format;
descriptor.usage = wgpu::TextureUsage::RenderAttachment;
return device.CreateTexture(&descriptor);
}
void TestOcclusionQueryWithDepthStencilTest(bool depthTestEnabled, void TestOcclusionQueryWithDepthStencilTest(bool depthTestEnabled,
bool stencilTestEnabled, bool stencilTestEnabled,
OcclusionExpectation::Result expected) { OcclusionExpectation::Result expected) {
@ -130,8 +133,7 @@ class OcclusionQueryTests : public QueryTests {
descriptor.cFragment.module = fsModule; descriptor.cFragment.module = fsModule;
// Enable depth and stencil tests and set comparison tests never pass. // Enable depth and stencil tests and set comparison tests never pass.
wgpu::DepthStencilState* depthStencil = wgpu::DepthStencilState* depthStencil = descriptor.EnableDepthStencil(kDepthStencilFormat);
descriptor.EnableDepthStencil(wgpu::TextureFormat::Depth24PlusStencil8);
depthStencil->depthCompare = depthStencil->depthCompare =
depthTestEnabled ? wgpu::CompareFunction::Never : wgpu::CompareFunction::Always; depthTestEnabled ? wgpu::CompareFunction::Never : wgpu::CompareFunction::Always;
depthStencil->stencilFront.compare = depthStencil->stencilFront.compare =
@ -141,10 +143,10 @@ class OcclusionQueryTests : public QueryTests {
wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&descriptor); wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&descriptor);
wgpu::Texture renderTarget = CreateRenderTexture(wgpu::TextureFormat::RGBA8Unorm); wgpu::Texture renderTarget = CreateRenderTexture(kColorFormat);
wgpu::TextureView renderTargetView = renderTarget.CreateView(); wgpu::TextureView renderTargetView = renderTarget.CreateView();
wgpu::Texture depthTexture = CreateRenderTexture(wgpu::TextureFormat::Depth24PlusStencil8); wgpu::Texture depthTexture = CreateRenderTexture(kDepthStencilFormat);
wgpu::TextureView depthTextureView = depthTexture.CreateView(); wgpu::TextureView depthTextureView = depthTexture.CreateView();
wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount); wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount);
@ -205,8 +207,6 @@ class OcclusionQueryTests : public QueryTests {
wgpu::ShaderModule fsModule; wgpu::ShaderModule fsModule;
wgpu::RenderPipeline pipeline; wgpu::RenderPipeline pipeline;
constexpr static unsigned int kRTSize = 4;
}; };
// Test creating query set with the type of Occlusion // Test creating query set with the type of Occlusion
@ -551,23 +551,87 @@ class TimestampQueryTests : public QueryTests {
return device.CreateQuerySet(&descriptor); return device.CreateQuerySet(&descriptor);
} }
wgpu::RenderPipeline CreateRenderPipeline(bool hasFragmentStage = true) {
utils::ComboRenderPipelineDescriptor descriptor;
descriptor.vertex.module = utils::CreateShaderModule(device, R"(
@vertex
fn main(@builtin(vertex_index) VertexIndex : u32) -> @builtin(position) vec4<f32> {
var pos = array<vec2<f32>, 3>(
vec2<f32>( 1.0, 1.0),
vec2<f32>(-1.0, -1.0),
vec2<f32>( 1.0, -1.0));
return vec4<f32>(pos[VertexIndex], 0.0, 1.0);
})");
if (hasFragmentStage) {
descriptor.cFragment.module = utils::CreateShaderModule(device, R"(
@fragment fn main() -> @location(0) vec4<f32> {
return vec4<f32>(0.0, 1.0, 0.0, 1.0);
})");
} else {
descriptor.fragment = nullptr;
descriptor.EnableDepthStencil(kDepthStencilFormat);
}
return device.CreateRenderPipeline(&descriptor);
}
void EncodeComputeTimestampWrites(
const wgpu::CommandEncoder& encoder,
const std::vector<wgpu::ComputePassTimestampWrite>& timestampWrites,
bool hasPipeline = true) {
wgpu::ComputePassDescriptor descriptor;
descriptor.timestampWriteCount = timestampWrites.size();
descriptor.timestampWrites = timestampWrites.data();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass(&descriptor);
if (hasPipeline) {
pass.SetPipeline(computePipeline);
pass.DispatchWorkgroups(1, 1, 1);
}
pass.End();
}
void EncodeRenderTimestampWrites(
const wgpu::CommandEncoder& encoder,
const std::vector<wgpu::RenderPassTimestampWrite>& timestampWrites,
bool hasPipeline = true,
bool hasFragmentStage = true) {
wgpu::Texture depthTexture = CreateRenderTexture(kDepthStencilFormat);
utils::ComboRenderPassDescriptor renderPassDesc =
hasFragmentStage
? utils::ComboRenderPassDescriptor({CreateRenderTexture(kColorFormat).CreateView()})
: utils::ComboRenderPassDescriptor(
{}, CreateRenderTexture(kDepthStencilFormat).CreateView());
renderPassDesc.timestampWriteCount = timestampWrites.size();
renderPassDesc.timestampWrites = timestampWrites.data();
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPassDesc);
if (hasPipeline) {
wgpu::RenderPipeline renderPipeline = CreateRenderPipeline(hasFragmentStage);
pass.SetPipeline(renderPipeline);
pass.Draw(3);
}
pass.End();
}
void TestTimestampWritesOnComputePass( void TestTimestampWritesOnComputePass(
const std::vector<wgpu::ComputePassTimestampWrite>& timestampWrites, const std::vector<wgpu::ComputePassTimestampWrite>& timestampWrites,
const std::vector<wgpu::ComputePassTimestampWrite>& timestampWritesOnAnotherPass = {}) { const std::vector<wgpu::ComputePassTimestampWrite>& timestampWritesOnAnotherPass = {},
bool hasPipeline = true) {
size_t queryCount = timestampWrites.size() + timestampWritesOnAnotherPass.size(); size_t queryCount = timestampWrites.size() + timestampWritesOnAnotherPass.size();
// The destination buffer offset must be a multiple of 256. // The destination buffer offset must be a multiple of 256.
wgpu::Buffer destination = wgpu::Buffer destination =
CreateResolveBuffer(queryCount * kMinDestinationOffset + sizeof(uint64_t)); CreateResolveBuffer(queryCount * kMinDestinationOffset + sizeof(uint64_t));
wgpu::ComputePassDescriptor descriptor;
descriptor.timestampWriteCount = timestampWrites.size();
descriptor.timestampWrites = timestampWrites.data();
wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass(&descriptor); EncodeComputeTimestampWrites(encoder, timestampWrites, hasPipeline);
pass.SetPipeline(computePipeline);
pass.DispatchWorkgroups(1, 1, 1); // Begin another compute pass if the timestampWritesOnAnotherPass is set.
pass.End(); if (!timestampWritesOnAnotherPass.empty()) {
EncodeComputeTimestampWrites(encoder, timestampWritesOnAnotherPass, hasPipeline);
}
// Resolve queries one by one because the query set at the beginning of pass may be // Resolve queries one by one because the query set at the beginning of pass may be
// different with the one at the end of pass. // different with the one at the end of pass.
@ -575,26 +639,11 @@ class TimestampQueryTests : public QueryTests {
encoder.ResolveQuerySet(timestampWrites[i].querySet, timestampWrites[i].queryIndex, 1, encoder.ResolveQuerySet(timestampWrites[i].querySet, timestampWrites[i].queryIndex, 1,
destination, i * kMinDestinationOffset); destination, i * kMinDestinationOffset);
} }
// Begin another compute pass if the timestampWritesOnAnotherPass is set.
if (!timestampWritesOnAnotherPass.empty()) {
wgpu::ComputePassDescriptor descriptor2;
descriptor2.timestampWriteCount = timestampWritesOnAnotherPass.size();
descriptor2.timestampWrites = timestampWritesOnAnotherPass.data();
wgpu::ComputePassEncoder pass2 = encoder.BeginComputePass(&descriptor2);
pass2.SetPipeline(computePipeline);
pass2.DispatchWorkgroups(1, 1, 1);
pass2.End();
for (size_t i = 0; i < timestampWritesOnAnotherPass.size(); i++) { for (size_t i = 0; i < timestampWritesOnAnotherPass.size(); i++) {
// Resolve queries one by one because the query set at the beginning of pass may be
// different with the one at the end of pass.
encoder.ResolveQuerySet(timestampWritesOnAnotherPass[i].querySet, encoder.ResolveQuerySet(timestampWritesOnAnotherPass[i].querySet,
timestampWritesOnAnotherPass[i].queryIndex, 1, destination, timestampWritesOnAnotherPass[i].queryIndex, 1, destination,
(timestampWrites.size() + i) * kMinDestinationOffset); (timestampWrites.size() + i) * kMinDestinationOffset);
} }
}
wgpu::CommandBuffer commands = encoder.Finish(); wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands); queue.Submit(1, &commands);
@ -607,19 +656,22 @@ class TimestampQueryTests : public QueryTests {
void TestTimestampWritesOnRenderPass( void TestTimestampWritesOnRenderPass(
const std::vector<wgpu::RenderPassTimestampWrite>& timestampWrites, const std::vector<wgpu::RenderPassTimestampWrite>& timestampWrites,
const std::vector<wgpu::RenderPassTimestampWrite>& timestampWritesOnAnotherPass = {}) { const std::vector<wgpu::RenderPassTimestampWrite>& timestampWritesOnAnotherPass = {},
bool hasPipeline = true,
bool hasFragmentStage = true) {
size_t queryCount = timestampWrites.size() + timestampWritesOnAnotherPass.size(); size_t queryCount = timestampWrites.size() + timestampWritesOnAnotherPass.size();
// The destination buffer offset must be a multiple of 256. // The destination buffer offset must be a multiple of 256.
wgpu::Buffer destination = wgpu::Buffer destination =
CreateResolveBuffer(queryCount * kMinDestinationOffset + sizeof(uint64_t)); CreateResolveBuffer(queryCount * kMinDestinationOffset + sizeof(uint64_t));
utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1);
renderPass.renderPassInfo.timestampWriteCount = timestampWrites.size();
renderPass.renderPassInfo.timestampWrites = timestampWrites.data();
wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); EncodeRenderTimestampWrites(encoder, timestampWrites, hasPipeline, hasFragmentStage);
pass.End();
// Begin another render pass if the timestampWritesOnAnotherPass is set.
if (!timestampWritesOnAnotherPass.empty()) {
EncodeRenderTimestampWrites(encoder, timestampWritesOnAnotherPass, hasPipeline,
hasFragmentStage);
}
// Resolve queries one by one because the query set at the beginning of pass may be // Resolve queries one by one because the query set at the beginning of pass may be
// different with the one at the end of pass. // different with the one at the end of pass.
@ -628,23 +680,11 @@ class TimestampQueryTests : public QueryTests {
destination, i * kMinDestinationOffset); destination, i * kMinDestinationOffset);
} }
// Begin another render pass if the timestampWritesOnAnotherPass is set.
if (!timestampWritesOnAnotherPass.empty()) {
utils::BasicRenderPass renderPass2 = utils::CreateBasicRenderPass(device, 1, 1);
renderPass2.renderPassInfo.timestampWriteCount = timestampWritesOnAnotherPass.size();
renderPass2.renderPassInfo.timestampWrites = timestampWritesOnAnotherPass.data();
wgpu::RenderPassEncoder pass2 = encoder.BeginRenderPass(&renderPass2.renderPassInfo);
pass2.End();
for (size_t i = 0; i < timestampWritesOnAnotherPass.size(); i++) { for (size_t i = 0; i < timestampWritesOnAnotherPass.size(); i++) {
// Resolve queries one by one because the query set at the beginning of pass may be
// different with the one at the end of pass.
encoder.ResolveQuerySet(timestampWritesOnAnotherPass[i].querySet, encoder.ResolveQuerySet(timestampWritesOnAnotherPass[i].querySet,
timestampWritesOnAnotherPass[i].queryIndex, 1, destination, timestampWritesOnAnotherPass[i].queryIndex, 1, destination,
(timestampWrites.size() + i) * kMinDestinationOffset); (timestampWrites.size() + i) * kMinDestinationOffset);
} }
}
wgpu::CommandBuffer commands = encoder.Finish(); wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands); queue.Submit(1, &commands);
@ -669,9 +709,6 @@ TEST_P(TimestampQueryTests, QuerySetCreation) {
// Test calling timestamp query from command encoder // Test calling timestamp query from command encoder
TEST_P(TimestampQueryTests, TimestampOnCommandEncoder) { TEST_P(TimestampQueryTests, TimestampOnCommandEncoder) {
// TODO (dawn:1250): Still not implemented on Metal backend.
DAWN_TEST_UNSUPPORTED_IF(IsMetal());
constexpr uint32_t kQueryCount = 2; constexpr uint32_t kQueryCount = 2;
// Write timestamp with different query indexes // Write timestamp with different query indexes
@ -709,6 +746,10 @@ TEST_P(TimestampQueryTests, TimestampOnCommandEncoder) {
// Test calling timestamp query from render pass encoder // Test calling timestamp query from render pass encoder
TEST_P(TimestampQueryTests, TimestampOnRenderPass) { TEST_P(TimestampQueryTests, TimestampOnRenderPass) {
// TODO (dawn:1250): Split writeTimestamp() to another extension which is not supported on Apple
// devices
DAWN_TEST_UNSUPPORTED_IF(IsMacOS() && IsMetal() && IsApple());
constexpr uint32_t kQueryCount = 2; constexpr uint32_t kQueryCount = 2;
// Write timestamp with different query indexes // Write timestamp with different query indexes
@ -754,6 +795,10 @@ TEST_P(TimestampQueryTests, TimestampOnRenderPass) {
// Test calling timestamp query from compute pass encoder // Test calling timestamp query from compute pass encoder
TEST_P(TimestampQueryTests, TimestampOnComputePass) { TEST_P(TimestampQueryTests, TimestampOnComputePass) {
// TODO (dawn:1250): Split writeTimestamp() to another extension which is not supported on Apple
// devices
DAWN_TEST_UNSUPPORTED_IF(IsMacOS() && IsMetal() && IsApple());
constexpr uint32_t kQueryCount = 2; constexpr uint32_t kQueryCount = 2;
// Write timestamp with different query indexes // Write timestamp with different query indexes
@ -815,19 +860,34 @@ TEST_P(TimestampQueryTests, TimestampOnComputePass) {
} }
} }
// Test timestampWrites setting in compute pass descriptor // Test timestampWrites with query set in compute pass descriptor
TEST_P(TimestampQueryTests, TimestampWritesOnComputePass) { TEST_P(TimestampQueryTests, TimestampWritesQuerySetOnComputePass) {
// TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed. // the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
// TODO (dawn:1250): Still not implemented on Metal backend. // TODO (dawn:1473): Metal bug which fails to store GPU counters to different sample buffer.
DAWN_TEST_UNSUPPORTED_IF(IsMetal()); DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsMetal() && IsApple());
// Set timestampWrites with different query set on same compute pass
wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1);
wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1);
TestTimestampWritesOnComputePass({{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning},
{querySet1, 0, wgpu::ComputePassTimestampLocation::End}});
}
// Test timestampWrites with query index in compute pass descriptor
TEST_P(TimestampQueryTests, TimestampWritesQueryIndexOnComputePass) {
// TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
constexpr uint32_t kQueryCount = 2; constexpr uint32_t kQueryCount = 2;
// Set timestampWrites with different query indexes and locations on same compute pass // Set timestampWrites with different query indexes on same compute pass
{ {
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount);
@ -836,16 +896,35 @@ TEST_P(TimestampQueryTests, TimestampWritesOnComputePass) {
{querySet, 1, wgpu::ComputePassTimestampLocation::End}}); {querySet, 1, wgpu::ComputePassTimestampLocation::End}});
} }
// Set timestampWrites with different query set on same compute pass // Set timestampWrites with same query index on same compute pass
{ {
wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1); wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount);
wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1);
TestTimestampWritesOnComputePass( TestTimestampWritesOnComputePass(
{{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning}, {{querySet, 0, wgpu::ComputePassTimestampLocation::Beginning},
{querySet1, 0, wgpu::ComputePassTimestampLocation::End}}); {querySet, 0, wgpu::ComputePassTimestampLocation::End}});
} }
// Set timestampWrites with same query indexes on different compute pass
{
wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(kQueryCount);
wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(kQueryCount);
TestTimestampWritesOnComputePass(
{{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning}},
{{querySet1, 0, wgpu::ComputePassTimestampLocation::End}});
}
}
// Test timestampWrites with timestamp location in compute pass descriptor
TEST_P(TimestampQueryTests, TimestampWritesLocationOnComputePass) {
// TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
constexpr uint32_t kQueryCount = 2;
// Set timestampWrites with only one value of ComputePassTimestampLocation // Set timestampWrites with only one value of ComputePassTimestampLocation
{ {
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount);
@ -856,63 +935,78 @@ TEST_P(TimestampQueryTests, TimestampWritesOnComputePass) {
TestTimestampWritesOnComputePass({{querySet, 1, wgpu::ComputePassTimestampLocation::End}}); TestTimestampWritesOnComputePass({{querySet, 1, wgpu::ComputePassTimestampLocation::End}});
} }
// Set timestampWrites with same query set and query index on same compute pass // Set timestampWrites with same location on different compute pass
{ {
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1);
wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1);
TestTimestampWritesOnComputePass( TestTimestampWritesOnComputePass(
{{querySet, 0, wgpu::ComputePassTimestampLocation::Beginning}, {{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning}},
{querySet, 0, wgpu::ComputePassTimestampLocation::End}}); {{querySet1, 0, wgpu::ComputePassTimestampLocation::Beginning}});
}
// Set timestampWrites with same query indexes and locations on different compute pass
{
wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(kQueryCount);
wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(kQueryCount);
TestTimestampWritesOnComputePass(
{{querySet0, 0, wgpu::ComputePassTimestampLocation::Beginning},
{querySet0, 1, wgpu::ComputePassTimestampLocation::End}},
{{querySet1, 0, wgpu::ComputePassTimestampLocation::Beginning},
{querySet1, 1, wgpu::ComputePassTimestampLocation::End}});
} }
} }
// Test timestampWrites setting in render pass descriptor // Test timestampWrites on compute pass without pipeline
TEST_P(TimestampQueryTests, TimestampWritesOnRenderPass) { TEST_P(TimestampQueryTests, TimestampWritesOnComputePassWithNoPipline) {
// TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed. // the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
// TODO (dawn:1250): Still not implemented on Metal backend. // TODO (dawn:1473): Metal fails to store GPU counters to sampleBufferAttachments on empty
DAWN_TEST_UNSUPPORTED_IF(IsMetal()); // encoders.
DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsMetal() && IsApple());
constexpr uint32_t kQueryCount = 2; wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2);
// Set timestampWrites with different query indexes and locations, not need test write same TestTimestampWritesOnComputePass({{querySet, 0, wgpu::ComputePassTimestampLocation::Beginning},
// query index due to it's not allowed on render pass. {querySet, 1, wgpu::ComputePassTimestampLocation::End}},
{ {}, false);
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); }
TestTimestampWritesOnRenderPass( // Test timestampWrites with query set in render pass descriptor
{{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning}, TEST_P(TimestampQueryTests, TimestampWritesQuerySetOnRenderPass) {
{querySet, 1, wgpu::RenderPassTimestampLocation::End}}); // TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
} // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
// TODO (dawn:1473): Metal bug which fails to store GPU counters to different sample buffer.
DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsMetal() && IsApple());
// Set timestampWrites with different query set on same render pass // Set timestampWrites with different query set on same render pass
{
wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1); wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1);
wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1); wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1);
TestTimestampWritesOnRenderPass( TestTimestampWritesOnRenderPass({{querySet0, 0, wgpu::RenderPassTimestampLocation::Beginning},
{{querySet0, 0, wgpu::RenderPassTimestampLocation::Beginning},
{querySet1, 0, wgpu::RenderPassTimestampLocation::End}}); {querySet1, 0, wgpu::RenderPassTimestampLocation::End}});
} }
// Test timestampWrites with query index in compute pass descriptor
TEST_P(TimestampQueryTests, TimestampWritesQueryIndexOnRenderPass) {
// TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
// Set timestampWrites with different query indexes and locations, not need test write same
// query index due to it's not allowed on render pass.
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2);
TestTimestampWritesOnRenderPass({{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning},
{querySet, 1, wgpu::RenderPassTimestampLocation::End}});
}
// Test timestampWrites with timestamp location in render pass descriptor
TEST_P(TimestampQueryTests, TimestampWritesLocationOnRenderPass) {
// TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
// Set timestampWrites with only one value of RenderPassTimestampLocation // Set timestampWrites with only one value of RenderPassTimestampLocation
{ {
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2);
TestTimestampWritesOnRenderPass( TestTimestampWritesOnRenderPass(
{{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning}}); {{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning}});
@ -920,19 +1014,45 @@ TEST_P(TimestampQueryTests, TimestampWritesOnRenderPass) {
TestTimestampWritesOnRenderPass({{querySet, 1, wgpu::RenderPassTimestampLocation::End}}); TestTimestampWritesOnRenderPass({{querySet, 1, wgpu::RenderPassTimestampLocation::End}});
} }
// Set timestampWrites with same query indexes and locations on different render pass // Set timestampWrites with same location on different render pass
{ {
wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(kQueryCount); wgpu::QuerySet querySet0 = CreateQuerySetForTimestamp(1);
wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(kQueryCount); wgpu::QuerySet querySet1 = CreateQuerySetForTimestamp(1);
TestTimestampWritesOnRenderPass( TestTimestampWritesOnRenderPass(
{{querySet0, 0, wgpu::RenderPassTimestampLocation::Beginning}, {{querySet0, 0, wgpu::RenderPassTimestampLocation::Beginning}},
{querySet0, 1, wgpu::RenderPassTimestampLocation::End}}, {{querySet1, 0, wgpu::RenderPassTimestampLocation::Beginning}});
{{querySet1, 0, wgpu::RenderPassTimestampLocation::Beginning},
{querySet1, 1, wgpu::RenderPassTimestampLocation::End}});
} }
} }
// Test timestampWrites on render pass without pipeline
TEST_P(TimestampQueryTests, TimestampWritesOnRenderPassWithNoPipline) {
// TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2);
TestTimestampWritesOnRenderPass({{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning},
{querySet, 1, wgpu::RenderPassTimestampLocation::End}},
{}, false);
}
// Test timestampWrites on render pass with pipeline but no fragment stage
TEST_P(TimestampQueryTests, TimestampWritesOnRenderPassWithOnlyVertexStage) {
DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("use_placeholder_fragment_in_vertex_only_pipeline"));
// TODO(dawn:1489): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it until
// the issue is fixed.
DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(2);
TestTimestampWritesOnRenderPass({{querySet, 0, wgpu::RenderPassTimestampLocation::Beginning},
{querySet, 1, wgpu::RenderPassTimestampLocation::End}},
{}, true, false);
}
// Test resolving timestamp query from another different encoder // Test resolving timestamp query from another different encoder
TEST_P(TimestampQueryTests, ResolveFromAnotherEncoder) { TEST_P(TimestampQueryTests, ResolveFromAnotherEncoder) {
constexpr uint32_t kQueryCount = 2; constexpr uint32_t kQueryCount = 2;