mirror of
https://github.com/encounter/dawn-cmake.git
synced 2025-06-03 05:01:27 +00:00
Workaround empty occlusion queries on Apple GPUs
Fixed: dawn:1707 Change-Id: I73f2b595c57830266e7d34ca9e483d803a00d331 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/125180 Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: Loko Kung <lokokung@google.com> Reviewed-by: Kai Ninomiya <kainino@chromium.org> Commit-Queue: Kai Ninomiya <kainino@chromium.org>
This commit is contained in:
parent
fa2d3851a8
commit
0dc09c01a4
@ -358,6 +358,12 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
|
||||
"subresource are completely initialized, and StoreOp::Discard is always translated as a "
|
||||
"Store.",
|
||||
"https://crbug.com/dawn/838", ToggleStage::Device}},
|
||||
{Toggle::MetalFillEmptyOcclusionQueriesWithZero,
|
||||
{"metal_fill_empty_occlusion_queries_with_zero",
|
||||
"Apple GPUs leave stale results in the visibility result buffer instead of writing zero if "
|
||||
"an occlusion query is empty. Workaround this by explicitly filling it with zero if there "
|
||||
"are no draw calls.",
|
||||
"https://crbug.com/dawn/1707", ToggleStage::Device}},
|
||||
{Toggle::UseBlitForBufferToDepthTextureCopy,
|
||||
{"use_blit_for_buffer_to_depth_texture_copy",
|
||||
"Use a blit instead of a copy command to copy buffer data to the depth aspect of a "
|
||||
|
@ -87,6 +87,7 @@ enum class Toggle {
|
||||
MetalUseCombinedDepthStencilFormatForStencil8,
|
||||
MetalUseBothDepthAndStencilAttachmentsForCombinedDepthStencilFormats,
|
||||
MetalKeepMultisubresourceDepthStencilTexturesInitialized,
|
||||
MetalFillEmptyOcclusionQueriesWithZero,
|
||||
UseBlitForBufferToDepthTextureCopy,
|
||||
UseBlitForBufferToStencilTextureCopy,
|
||||
UseBlitForDepthTextureToTextureCopyToNonzeroSubresource,
|
||||
|
@ -404,6 +404,10 @@ class Adapter : public AdapterBase {
|
||||
true);
|
||||
}
|
||||
|
||||
if (gpu_info::IsApple(vendorId)) {
|
||||
deviceToggles->Default(Toggle::MetalFillEmptyOcclusionQueriesWithZero, true);
|
||||
}
|
||||
|
||||
// Local testing shows the workaround is needed on AMD Radeon HD 8870M (gcn-1) MacOS 12.1;
|
||||
// not on AMD Radeon Pro 555 (gcn-4) MacOS 13.1.
|
||||
// Conservatively enable the workaround on AMD unless the system is MacOS 13.1+
|
||||
|
@ -15,6 +15,9 @@
|
||||
#ifndef SRC_DAWN_NATIVE_METAL_COMMANDBUFFERMTL_H_
|
||||
#define SRC_DAWN_NATIVE_METAL_COMMANDBUFFERMTL_H_
|
||||
|
||||
#include <set>
|
||||
#include <utility>
|
||||
|
||||
#include "dawn/native/CommandBuffer.h"
|
||||
#include "dawn/native/Commands.h"
|
||||
#include "dawn/native/Error.h"
|
||||
@ -32,6 +35,7 @@ namespace dawn::native::metal {
|
||||
class CommandRecordingContext;
|
||||
class Device;
|
||||
class Texture;
|
||||
class QuerySet;
|
||||
|
||||
void RecordCopyBufferToTexture(CommandRecordingContext* commandContext,
|
||||
id<MTLBuffer> mtlBuffer,
|
||||
@ -60,8 +64,15 @@ class CommandBuffer final : public CommandBufferBase {
|
||||
|
||||
MaybeError EncodeComputePass(CommandRecordingContext* commandContext,
|
||||
BeginComputePassCmd* computePassCmd);
|
||||
|
||||
// Empty occlusion queries aren't filled to zero on Apple GPUs. This set is used to
|
||||
// track which results should be explicitly zero'ed as a workaround. Use of empty queries
|
||||
// *should* mostly be a degenerate scenario, so this std::set shouldn't be performance-critical.
|
||||
// The set is passed as nullptr to `EncodeRenderPass` if the workaround is not in use.
|
||||
using EmptyOcclusionQueries = std::set<std::pair<QuerySet*, uint32_t>>;
|
||||
MaybeError EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
BeginRenderPassCmd* renderPassCmd);
|
||||
BeginRenderPassCmd* renderPassCmd,
|
||||
EmptyOcclusionQueries* emptyOcclusionQueries);
|
||||
};
|
||||
|
||||
} // namespace dawn::native::metal
|
||||
|
@ -812,12 +812,25 @@ MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext)
|
||||
Device* device = ToBackend(GetDevice());
|
||||
NSRef<MTLRenderPassDescriptor> descriptor = CreateMTLRenderPassDescriptor(
|
||||
device, cmd, device->UseCounterSamplingAtStageBoundary());
|
||||
|
||||
EmptyOcclusionQueries emptyOcclusionQueries;
|
||||
DAWN_TRY(EncodeMetalRenderPass(
|
||||
device, commandContext, descriptor.Get(), cmd->width, cmd->height,
|
||||
[this](id<MTLRenderCommandEncoder> encoder, BeginRenderPassCmd* cmd)
|
||||
-> MaybeError { return this->EncodeRenderPass(encoder, cmd); },
|
||||
[&](id<MTLRenderCommandEncoder> encoder,
|
||||
BeginRenderPassCmd* cmd) -> MaybeError {
|
||||
return this->EncodeRenderPass(
|
||||
encoder, cmd,
|
||||
device->IsToggleEnabled(Toggle::MetalFillEmptyOcclusionQueriesWithZero)
|
||||
? &emptyOcclusionQueries
|
||||
: nullptr);
|
||||
},
|
||||
cmd));
|
||||
|
||||
for (const auto& [querySet, queryIndex] : emptyOcclusionQueries) {
|
||||
[commandContext->EnsureBlit()
|
||||
fillBuffer:querySet->GetVisibilityBuffer()
|
||||
range:NSMakeRange(queryIndex * sizeof(uint64_t), sizeof(uint64_t))
|
||||
value:0u];
|
||||
}
|
||||
nextRenderPassNumber++;
|
||||
break;
|
||||
}
|
||||
@ -1347,7 +1360,8 @@ MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandCont
|
||||
}
|
||||
|
||||
MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
BeginRenderPassCmd* renderPassCmd) {
|
||||
BeginRenderPassCmd* renderPassCmd,
|
||||
EmptyOcclusionQueries* emptyOcclusionQueries) {
|
||||
bool enableVertexPulling = GetDevice()->IsToggleEnabled(Toggle::MetalEnableVertexPulling);
|
||||
RenderPipeline* lastPipeline = nullptr;
|
||||
id<MTLBuffer> indexBuffer = nullptr;
|
||||
@ -1355,6 +1369,8 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
MTLIndexType indexBufferType;
|
||||
uint64_t indexFormatSize = 0;
|
||||
|
||||
bool didDrawInCurrentOcclusionQuery = false;
|
||||
|
||||
StorageBufferLengthTracker storageBufferLengths = {};
|
||||
VertexBufferTracker vertexBuffers(&storageBufferLengths);
|
||||
BindGroupTracker bindGroups(&storageBufferLengths);
|
||||
@ -1390,12 +1406,14 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
vertexStart:draw->firstVertex
|
||||
vertexCount:draw->vertexCount
|
||||
instanceCount:draw->instanceCount];
|
||||
didDrawInCurrentOcclusionQuery = true;
|
||||
} else {
|
||||
[encoder drawPrimitives:lastPipeline->GetMTLPrimitiveTopology()
|
||||
vertexStart:draw->firstVertex
|
||||
vertexCount:draw->vertexCount
|
||||
instanceCount:draw->instanceCount
|
||||
baseInstance:draw->firstInstance];
|
||||
didDrawInCurrentOcclusionQuery = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -1420,6 +1438,7 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
indexBufferOffset:indexBufferBaseOffset +
|
||||
draw->firstIndex * indexFormatSize
|
||||
instanceCount:draw->instanceCount];
|
||||
didDrawInCurrentOcclusionQuery = true;
|
||||
} else {
|
||||
[encoder drawIndexedPrimitives:lastPipeline->GetMTLPrimitiveTopology()
|
||||
indexCount:draw->indexCount
|
||||
@ -1430,6 +1449,7 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
instanceCount:draw->instanceCount
|
||||
baseVertex:draw->baseVertex
|
||||
baseInstance:draw->firstInstance];
|
||||
didDrawInCurrentOcclusionQuery = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -1448,6 +1468,7 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
[encoder drawPrimitives:lastPipeline->GetMTLPrimitiveTopology()
|
||||
indirectBuffer:indirectBuffer
|
||||
indirectBufferOffset:draw->indirectOffset];
|
||||
didDrawInCurrentOcclusionQuery = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1469,6 +1490,7 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
indexBufferOffset:indexBufferBaseOffset
|
||||
indirectBuffer:indirectBuffer
|
||||
indirectBufferOffset:draw->indirectOffset];
|
||||
didDrawInCurrentOcclusionQuery = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1649,6 +1671,7 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
|
||||
[encoder setVisibilityResultMode:MTLVisibilityResultModeBoolean
|
||||
offset:cmd->queryIndex * sizeof(uint64_t)];
|
||||
didDrawInCurrentOcclusionQuery = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1657,6 +1680,20 @@ MaybeError CommandBuffer::EncodeRenderPass(id<MTLRenderCommandEncoder> encoder,
|
||||
|
||||
[encoder setVisibilityResultMode:MTLVisibilityResultModeDisabled
|
||||
offset:cmd->queryIndex * sizeof(uint64_t)];
|
||||
if (emptyOcclusionQueries) {
|
||||
// Empty occlusion queries aren't filled to zero on Apple GPUs.
|
||||
// Keep track of them so we can clear them if necessary.
|
||||
auto key = std::make_pair(ToBackend(renderPassCmd->occlusionQuerySet.Get()),
|
||||
cmd->queryIndex);
|
||||
if (!didDrawInCurrentOcclusionQuery) {
|
||||
emptyOcclusionQueries->insert(std::move(key));
|
||||
} else {
|
||||
auto it = emptyOcclusionQueries->find(std::move(key));
|
||||
if (it != emptyOcclusionQueries->end()) {
|
||||
emptyOcclusionQueries->erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -366,8 +366,6 @@ TEST_P(OcclusionQueryTests, ResolveWithoutWritten) {
|
||||
|
||||
// Test setting an occlusion query to non-zero, then rewriting it without drawing, resolves to 0.
|
||||
TEST_P(OcclusionQueryTests, RewriteNoDrawToZero) {
|
||||
// TODO(crbug.com/dawn/1707): The second query does not reset it to 0.
|
||||
DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsMetal() && IsApple());
|
||||
constexpr uint32_t kQueryCount = 1;
|
||||
|
||||
wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount);
|
||||
@ -406,8 +404,6 @@ TEST_P(OcclusionQueryTests, RewriteNoDrawToZero) {
|
||||
// Test setting an occlusion query to non-zero, then rewriting it without drawing, resolves to 0.
|
||||
// Do the two queries+resolves in separate submits.
|
||||
TEST_P(OcclusionQueryTests, RewriteNoDrawToZeroSeparateSubmit) {
|
||||
// TODO(crbug.com/dawn/1707): The second query does not reset it to 0.
|
||||
DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsMetal() && IsApple());
|
||||
constexpr uint32_t kQueryCount = 1;
|
||||
|
||||
wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount);
|
||||
@ -445,6 +441,66 @@ TEST_P(OcclusionQueryTests, RewriteNoDrawToZeroSeparateSubmit) {
|
||||
new OcclusionExpectation(OcclusionExpectation::Result::Zero));
|
||||
}
|
||||
|
||||
// Test that resetting an occlusion query to zero works when a draw is done where all primitives
|
||||
// fail the depth test.
|
||||
TEST_P(OcclusionQueryTests, RewriteToZeroWithDraw) {
|
||||
constexpr uint32_t kQueryCount = 1;
|
||||
|
||||
utils::ComboRenderPipelineDescriptor descriptor;
|
||||
descriptor.vertex.module = vsModule;
|
||||
descriptor.cFragment.module = fsModule;
|
||||
|
||||
// Enable depth and stencil tests and set comparison tests to never pass.
|
||||
wgpu::DepthStencilState* depthStencil = descriptor.EnableDepthStencil(kDepthStencilFormat);
|
||||
depthStencil->depthCompare = wgpu::CompareFunction::Never;
|
||||
depthStencil->stencilFront.compare = wgpu::CompareFunction::Never;
|
||||
depthStencil->stencilBack.compare = wgpu::CompareFunction::Never;
|
||||
|
||||
wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&descriptor);
|
||||
|
||||
wgpu::Texture renderTarget = CreateRenderTexture(kColorFormat);
|
||||
wgpu::TextureView renderTargetView = renderTarget.CreateView();
|
||||
|
||||
wgpu::Texture depthTexture = CreateRenderTexture(kDepthStencilFormat);
|
||||
wgpu::TextureView depthTextureView = depthTexture.CreateView();
|
||||
|
||||
wgpu::QuerySet querySet = CreateOcclusionQuerySet(kQueryCount);
|
||||
wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t));
|
||||
// Set all bits in buffer to check 0 is correctly written if there is no sample passed the
|
||||
// occlusion testing
|
||||
queue.WriteBuffer(destination, 0, &kSentinelValue, sizeof(kSentinelValue));
|
||||
|
||||
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
|
||||
{
|
||||
utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, kRTSize, kRTSize);
|
||||
renderPass.renderPassInfo.occlusionQuerySet = querySet;
|
||||
|
||||
wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
|
||||
pass.SetPipeline(pipeline);
|
||||
pass.BeginOcclusionQuery(0);
|
||||
pass.Draw(3);
|
||||
pass.EndOcclusionQuery();
|
||||
pass.End();
|
||||
}
|
||||
{
|
||||
utils::ComboRenderPassDescriptor renderPass({renderTargetView}, depthTextureView);
|
||||
renderPass.occlusionQuerySet = querySet;
|
||||
|
||||
wgpu::RenderPassEncoder rewritePass = encoder.BeginRenderPass(&renderPass);
|
||||
rewritePass.SetPipeline(renderPipeline);
|
||||
rewritePass.BeginOcclusionQuery(0);
|
||||
rewritePass.Draw(3);
|
||||
rewritePass.EndOcclusionQuery();
|
||||
rewritePass.End();
|
||||
}
|
||||
encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0);
|
||||
wgpu::CommandBuffer commands = encoder.Finish();
|
||||
queue.Submit(1, &commands);
|
||||
|
||||
EXPECT_BUFFER(destination, 0, sizeof(uint64_t),
|
||||
new OcclusionExpectation(OcclusionExpectation::Result::Zero));
|
||||
}
|
||||
|
||||
// Test resolving occlusion query to the destination buffer with offset
|
||||
TEST_P(OcclusionQueryTests, ResolveToBufferWithOffset) {
|
||||
constexpr uint32_t kQueryCount = 2;
|
||||
@ -1227,7 +1283,11 @@ TEST_P(TimestampQueryInsidePassesTests, FromComputePass) {
|
||||
}
|
||||
}
|
||||
|
||||
DAWN_INSTANTIATE_TEST(OcclusionQueryTests, D3D12Backend(), MetalBackend(), VulkanBackend());
|
||||
DAWN_INSTANTIATE_TEST(OcclusionQueryTests,
|
||||
D3D12Backend(),
|
||||
MetalBackend(),
|
||||
MetalBackend({"metal_fill_empty_occlusion_queries_with_zero"}),
|
||||
VulkanBackend());
|
||||
DAWN_INSTANTIATE_TEST(PipelineStatisticsQueryTests,
|
||||
D3D12Backend(),
|
||||
MetalBackend(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user