Add workaround for resolving overlapping queries on Intel Gen12

TimestampQueryTests.ResolveTwiceToSameBuffer fails on Mesa driver >=
21.2.0 and D3D driver >= 31.0.101.3413 on Intel Gen12 GPUs due to driver
bugs with different root causes, but the workaround of clearing
destination buffer before resolving queries works for both.

Bug: dawn:1546, dawn:1823

Change-Id: I3f20a9100f4b6d3386e9685b351ad4fed69195bd
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/133284
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Hao Li <hao.x.li@intel.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
Li Hao 2023-05-19 08:07:40 +00:00 committed by Dawn LUCI CQ
parent 34fd751bd7
commit d3875fc9b6
7 changed files with 41 additions and 6 deletions

View File

@ -251,6 +251,11 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
{"disable_timestamp_query_conversion", {"disable_timestamp_query_conversion",
"Resolve timestamp queries into ticks instead of nanoseconds.", "https://crbug.com/dawn/1305", "Resolve timestamp queries into ticks instead of nanoseconds.", "https://crbug.com/dawn/1305",
ToggleStage::Device}}, ToggleStage::Device}},
{Toggle::ClearBufferBeforeResolveQueries,
{"clear_buffer_before_resolve_queries",
"clear destination buffer to zero before resolving queries. This toggle is enabled on Intel "
"Gen12 GPUs due to driver issue.",
"https://crbug.com/dawn/1823", ToggleStage::Device}},
{Toggle::VulkanUseZeroInitializeWorkgroupMemoryExtension, {Toggle::VulkanUseZeroInitializeWorkgroupMemoryExtension,
{"use_vulkan_zero_initialize_workgroup_memory_extension", {"use_vulkan_zero_initialize_workgroup_memory_extension",
"Initialize workgroup memory with OpConstantNull on Vulkan when the Vulkan extension " "Initialize workgroup memory with OpConstantNull on Vulkan when the Vulkan extension "

View File

@ -71,6 +71,7 @@ enum class Toggle {
FxcOptimizations, FxcOptimizations,
RecordDetailedTimingInTraceEvents, RecordDetailedTimingInTraceEvents,
DisableTimestampQueryConversion, DisableTimestampQueryConversion,
ClearBufferBeforeResolveQueries,
VulkanUseZeroInitializeWorkgroupMemoryExtension, VulkanUseZeroInitializeWorkgroupMemoryExtension,
D3D12SplitBufferTextureCopyForRowsPerImagePaddings, D3D12SplitBufferTextureCopyForRowsPerImagePaddings,
MetalRenderR8RG8UnormSmallMipToTempTexture, MetalRenderR8RG8UnormSmallMipToTempTexture,

View File

@ -1056,7 +1056,11 @@ MaybeError CommandBuffer::RecordCommands(CommandRecordingContext* commandContext
auto startIt = querySet->GetQueryAvailability().begin() + firstQuery; auto startIt = querySet->GetQueryAvailability().begin() + firstQuery;
auto endIt = querySet->GetQueryAvailability().begin() + firstQuery + queryCount; auto endIt = querySet->GetQueryAvailability().begin() + firstQuery + queryCount;
bool hasUnavailableQueries = std::find(startIt, endIt, false) != endIt; bool hasUnavailableQueries = std::find(startIt, endIt, false) != endIt;
if (hasUnavailableQueries) { // Workaround for resolving overlapping queries to a same buffer on Intel Gen12 GPUs
// due to D3D12 driver issue.
// See http://crbug.com/dawn/1546 for more information.
bool clearNeeded = device->IsToggleEnabled(Toggle::ClearBufferBeforeResolveQueries);
if (hasUnavailableQueries || clearNeeded) {
DAWN_TRY(device->ClearBufferToZero(commandContext, destination, DAWN_TRY(device->ClearBufferToZero(commandContext, destination,
destinationOffset, destinationOffset,
queryCount * sizeof(uint64_t))); queryCount * sizeof(uint64_t)));

View File

@ -535,6 +535,18 @@ void PhysicalDevice::SetupBackendDeviceToggles(TogglesState* deviceToggles) cons
} }
} }
// D3D driver has a bug resolving overlapping queries to a same buffer on Intel Gen12 GPUs. This
// workaround is needed on the driver version >= 30.0.101.3413.
// TODO(crbug.com/dawn/1546): Remove the workaround when the bug is fixed in D3D driver.
if (gpu_info::IsIntelGen12LP(vendorId, deviceId) ||
gpu_info::IsIntelGen12HP(vendorId, deviceId)) {
const gpu_info::DriverVersion kDriverVersion = {30, 0, 101, 3413};
if (gpu_info::CompareWindowsDriverVersion(vendorId, GetDriverVersion(), kDriverVersion) !=
-1) {
deviceToggles->Default(Toggle::ClearBufferBeforeResolveQueries, true);
}
}
// Currently these workarounds are only needed on Intel Gen9.5 and Gen11 GPUs. // Currently these workarounds are only needed on Intel Gen9.5 and Gen11 GPUs.
// See http://crbug.com/1237175 and http://crbug.com/dawn/1628 for more information. // See http://crbug.com/1237175 and http://crbug.com/dawn/1628 for more information.
if ((gpu_info::IsIntelGen9(vendorId, deviceId) && !gpu_info::IsSkylake(deviceId)) || if ((gpu_info::IsIntelGen9(vendorId, deviceId) && !gpu_info::IsSkylake(deviceId)) ||

View File

@ -767,7 +767,11 @@ MaybeError CommandBuffer::RecordCommands(CommandRecordingContext* recordingConte
auto endIt = auto endIt =
querySet->GetQueryAvailability().begin() + cmd->firstQuery + cmd->queryCount; querySet->GetQueryAvailability().begin() + cmd->firstQuery + cmd->queryCount;
bool hasUnavailableQueries = std::find(startIt, endIt, false) != endIt; bool hasUnavailableQueries = std::find(startIt, endIt, false) != endIt;
if (hasUnavailableQueries) { // Workaround for resolving overlapping queries to a same buffer on Intel Gen12 GPUs
// due to Mesa driver issue.
// See http://crbug.com/dawn/1823 for more information.
bool clearNeeded = device->IsToggleEnabled(Toggle::ClearBufferBeforeResolveQueries);
if (hasUnavailableQueries || clearNeeded) {
destination->TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); destination->TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst);
device->fn.CmdFillBuffer(commands, destination->GetHandle(), device->fn.CmdFillBuffer(commands, destination->GetHandle(),
cmd->destinationOffset, cmd->destinationOffset,

View File

@ -485,6 +485,19 @@ void PhysicalDevice::SetupBackendDeviceToggles(TogglesState* deviceToggles) cons
} }
} }
if (IsIntelMesa() && (gpu_info::IsIntelGen12LP(GetVendorId(), GetDeviceId()) ||
gpu_info::IsIntelGen12HP(GetVendorId(), GetDeviceId()))) {
// Intel Mesa driver has a bug where vkCmdCopyQueryPoolResults fails to write overlapping
// queries to a same buffer after the buffer is accessed by a compute shader with correct
// resource barriers, which may caused by flush and memory coherency issue on Intel Gen12
// GPUs. Workaround for it to clear the buffer before vkCmdCopyQueryPoolResults.
// TODO(crbug.com/dawn/1823): Remove the workaround when the bug is fixed in Mesa driver.
const gpu_info::DriverVersion kBuggyDriverVersion = {21, 2, 0, 0};
if (gpu_info::CompareIntelMesaDriverVersion(GetDriverVersion(), kBuggyDriverVersion) >= 0) {
deviceToggles->Default(Toggle::ClearBufferBeforeResolveQueries, true);
}
}
// The environment can request to various options for depth-stencil formats that could be // The environment can request to various options for depth-stencil formats that could be
// unavailable. Override the decision if it is not applicable. // unavailable. Override the decision if it is not applicable.
bool supportsD32s8 = IsDepthStencilFormatSupported(VK_FORMAT_D32_SFLOAT_S8_UINT); bool supportsD32s8 = IsDepthStencilFormatSupported(VK_FORMAT_D32_SFLOAT_S8_UINT);

View File

@ -1091,10 +1091,6 @@ TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) {
// Test resolving a query set twice into the same destination buffer with potentially overlapping // Test resolving a query set twice into the same destination buffer with potentially overlapping
// ranges // ranges
TEST_P(TimestampQueryTests, ResolveTwiceToSameBuffer) { TEST_P(TimestampQueryTests, ResolveTwiceToSameBuffer) {
// TODO(dawn:1546): Intel D3D driver regression on Gen12 GPUs. The compute shader in two
// ResolveQuerySet execute wrong.
DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsIntelGen12());
constexpr uint32_t kQueryCount = kMinCount + 2; constexpr uint32_t kQueryCount = kMinCount + 2;
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount);