Dawn/Tint: Polyfill reflect vec2<f32> for D3D12 FXC on Intel

This CL add a toggle-controlled Tint polyfill for reflect on vec2<f32>,
and enable this toggle by default on D3D12 Intel device when using FXC.
This CL works around issue tint:1798.

Bug: tint:1798
Change-Id: If2f4de836eaf5e7374bc2c1ae3fbe06b91a5bbd5
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121160
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
This commit is contained in:
Zhaoming Jiang 2023-02-27 02:59:50 +00:00 committed by Dawn LUCI CQ
parent 67a3918bd8
commit 04529be9b7
10 changed files with 232 additions and 3 deletions

View File

@ -387,6 +387,11 @@ static constexpr ToggleEnumAndInfoList kToggleNameAndInfoList = {{
"This toggle is off by default. It is expected to turn on or get removed when WebGPU V1 " "This toggle is off by default. It is expected to turn on or get removed when WebGPU V1 "
"ships and stays stable.", "ships and stays stable.",
"https://crbug.com/dawn/1563", ToggleStage::Device}}, "https://crbug.com/dawn/1563", ToggleStage::Device}},
{Toggle::D3D12PolyfillReflectVec2F32,
{"d3d12_polyfill_reflect_vec2_f32",
"Polyfill the reflect builtin for vec2<f32> for D3D12. This toggle is enabled by default on "
"D3D12 backends using FXC on Intel GPUs due to a driver issue on Intel D3D12 driver.",
"https://crbug.com/tint/1798", ToggleStage::Device}},
{Toggle::NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget, {Toggle::NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,
{"no_workaround_sample_mask_becomes_zero_for_all_but_last_color_target", {"no_workaround_sample_mask_becomes_zero_for_all_but_last_color_target",
"MacOS 12.0+ Intel has a bug where the sample mask is only applied for the last color " "MacOS 12.0+ Intel has a bug where the sample mask is only applied for the last color "

View File

@ -92,6 +92,7 @@ enum class Toggle {
UseBlitForDepthTextureToTextureCopyToNonzeroSubresource, UseBlitForDepthTextureToTextureCopyToNonzeroSubresource,
D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha, D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha,
DisallowDeprecatedAPIs, DisallowDeprecatedAPIs,
D3D12PolyfillReflectVec2F32,
// Unresolved issues. // Unresolved issues.
NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget, NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,

View File

@ -595,6 +595,12 @@ void Adapter::SetupBackendDeviceToggles(TogglesState* deviceToggles) const {
deviceToggles->ForceSet( deviceToggles->ForceSet(
Toggle::D3D12UseTempBufferInTextureToTextureCopyBetweenDifferentDimensions, true); Toggle::D3D12UseTempBufferInTextureToTextureCopyBetweenDifferentDimensions, true);
} }
// Polyfill reflect builtin for vec2<f32> on Intel device in usng FXC.
// See https://crbug.com/tint/1798 for more information.
if (gpu_info::IsIntel(vendorId) && !deviceToggles->IsEnabled(Toggle::UseDXC)) {
deviceToggles->Default(Toggle::D3D12PolyfillReflectVec2F32, true);
}
} }
ResultOrError<Ref<DeviceBase>> Adapter::CreateDeviceImpl(const DeviceDescriptor* descriptor, ResultOrError<Ref<DeviceBase>> Adapter::CreateDeviceImpl(const DeviceDescriptor* descriptor,

View File

@ -95,6 +95,7 @@ enum class Compiler { FXC, DXC };
X(bool, disableSymbolRenaming) \ X(bool, disableSymbolRenaming) \
X(bool, isRobustnessEnabled) \ X(bool, isRobustnessEnabled) \
X(bool, disableWorkgroupInit) \ X(bool, disableWorkgroupInit) \
X(bool, polyfillReflectVec2F32) \
X(bool, dumpShaders) X(bool, dumpShaders)
#define D3D_BYTECODE_COMPILATION_REQUEST_MEMBERS(X) \ #define D3D_BYTECODE_COMPILATION_REQUEST_MEMBERS(X) \
@ -401,6 +402,8 @@ ResultOrError<std::string> TranslateToHLSL(
options.interstage_locations = r.interstageLocations; options.interstage_locations = r.interstageLocations;
} }
options.polyfill_reflect_vec2_f32 = r.polyfillReflectVec2F32;
TRACE_EVENT0(tracePlatform.UnsafeGetValue(), General, "tint::writer::hlsl::Generate"); TRACE_EVENT0(tracePlatform.UnsafeGetValue(), General, "tint::writer::hlsl::Generate");
auto result = tint::writer::hlsl::Generate(&transformedProgram, options); auto result = tint::writer::hlsl::Generate(&transformedProgram, options);
DAWN_INVALID_IF(!result.success, "An error occured while generating HLSL: %s", result.error); DAWN_INVALID_IF(!result.success, "An error occured while generating HLSL: %s", result.error);
@ -606,6 +609,8 @@ ResultOrError<CompiledShader> ShaderModule::Compile(
req.hlsl.arrayLengthFromUniform = std::move(arrayLengthFromUniform); req.hlsl.arrayLengthFromUniform = std::move(arrayLengthFromUniform);
req.hlsl.substituteOverrideConfig = std::move(substituteOverrideConfig); req.hlsl.substituteOverrideConfig = std::move(substituteOverrideConfig);
req.hlsl.polyfillReflectVec2F32 = device->IsToggleEnabled(Toggle::D3D12PolyfillReflectVec2F32);
const CombinedLimits& limits = device->GetLimits(); const CombinedLimits& limits = device->GetLimits();
req.hlsl.limits = LimitsForCompilationRequest::Create(limits.v1); req.hlsl.limits = LimitsForCompilationRequest::Create(limits.v1);

View File

@ -582,6 +582,33 @@ struct BuiltinPolyfill::State {
return name; return name;
} }
/// Builds the polyfill function for the `reflect` builtin
/// @param ty the parameter and return type for the function
/// @return the polyfill function name
Symbol reflect(const type::Type* ty) {
auto name = b.Symbols().New("tint_reflect");
// WGSL polyfill function:
// fn tint_reflect(e1 : T, e2 : T) -> T {
// let factor = (-2.0 * dot(e1, e2));
// return (e1 + (factor * e2));
// }
// Using -2.0 instead of 2.0 in factor to prevent the optimization that cause wrong result.
// See https://crbug.com/tint/1798 for more details.
auto body = utils::Vector{
b.Decl(b.Let("factor", b.Mul(-2.0_a, b.Call("dot", "e1", "e2")))),
b.Return(b.Add("e1", b.Mul("factor", "e2"))),
};
b.Func(name,
utils::Vector{
b.Param("e1", T(ty)),
b.Param("e2", T(ty)),
},
T(ty), body);
return name;
}
/// Builds the polyfill function for the `saturate` builtin /// Builds the polyfill function for the `saturate` builtin
/// @param ty the parameter and return type for the function /// @param ty the parameter and return type for the function
/// @return the polyfill function name /// @return the polyfill function name
@ -1007,6 +1034,18 @@ Transform::ApplyResult BuiltinPolyfill::Apply(const Program* src,
builtin, [&] { return s.insertBits(builtin->ReturnType()); }); builtin, [&] { return s.insertBits(builtin->ReturnType()); });
} }
break; break;
case sem::BuiltinType::kReflect:
// Only polyfill for vec2<f32>. See https://crbug.com/tint/1798 for more
// details.
if (polyfill.reflect_vec2_f32) {
auto& sig = builtin->Signature();
auto* vec = sig.return_type->As<type::Vector>();
if (vec && vec->Width() == 2 && vec->type()->Is<type::F32>()) {
fn = builtin_polyfills.GetOrCreate(
builtin, [&] { return s.reflect(builtin->ReturnType()); });
}
}
break;
case sem::BuiltinType::kSaturate: case sem::BuiltinType::kSaturate:
if (polyfill.saturate) { if (polyfill.saturate) {
fn = builtin_polyfills.GetOrCreate( fn = builtin_polyfills.GetOrCreate(

View File

@ -70,6 +70,8 @@ class BuiltinPolyfill final : public Castable<BuiltinPolyfill, Transform> {
bool int_div_mod = false; bool int_div_mod = false;
/// Should float modulos be polyfilled to emit a precise modulo operation as per the spec? /// Should float modulos be polyfilled to emit a precise modulo operation as per the spec?
bool precise_float_mod = false; bool precise_float_mod = false;
/// Should `reflect()` be polyfilled for vec2<f32>?
bool reflect_vec2_f32 = false;
/// Should `saturate()` be polyfilled? /// Should `saturate()` be polyfilled?
bool saturate = false; bool saturate = false;
/// Should `sign()` be polyfilled for integer types? /// Should `sign()` be polyfilled for integer types?

View File

@ -3044,6 +3044,177 @@ fn f() {
EXPECT_EQ(expect, str(got)); EXPECT_EQ(expect, str(got));
} }
////////////////////////////////////////////////////////////////////////////////
// reflect for vec2<f32>
////////////////////////////////////////////////////////////////////////////////
DataMap polyfillReflectVec2F32() {
BuiltinPolyfill::Builtins builtins;
builtins.reflect_vec2_f32 = true;
DataMap data;
data.Add<BuiltinPolyfill::Config>(builtins);
return data;
}
TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec2_f32) {
auto* src = R"(
fn f() {
let e1 = vec2<f32>(1.0f);
let e2 = vec2<f32>(1.0f);
let x = reflect(e1, e2);
}
)";
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src));
EXPECT_TRUE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32()));
}
TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec2_f16) {
auto* src = R"(
enable f16;
fn f() {
let e1 = vec2<f16>(1.0h);
let e2 = vec2<f16>(1.0h);
let x = reflect(e1, e2);
}
)";
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src));
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32()));
}
TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec3_f32) {
auto* src = R"(
fn f() {
let e1 = vec3<f32>(1.0f);
let e2 = vec3<f32>(1.0f);
let x = reflect(e1, e2);
}
)";
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src));
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32()));
}
TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec3_f16) {
auto* src = R"(
enable f16;
fn f() {
let e1 = vec3<f16>(1.0h);
let e2 = vec3<f16>(1.0h);
let x = reflect(e1, e2);
}
)";
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src));
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32()));
}
TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec4_f32) {
auto* src = R"(
fn f() {
let e1 = vec3<f32>(1.0f);
let e2 = vec3<f32>(1.0f);
let x = reflect(e1, e2);
}
)";
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src));
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32()));
}
TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec4_f16) {
auto* src = R"(
enable f16;
fn f() {
let e1 = vec3<f16>(1.0h);
let e2 = vec3<f16>(1.0h);
let x = reflect(e1, e2);
}
)";
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src));
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32()));
}
TEST_F(BuiltinPolyfillTest, Reflect_ConstantExpression) {
auto* src = R"(
fn f() {
let r : vec2<f32> = reflect(vec2<f32>(1.0), vec2<f32>(1.0));
}
)";
EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32()));
}
TEST_F(BuiltinPolyfillTest, Reflect_vec2_f32) {
auto* src = R"(
fn f() {
let v = 0.5f;
let r : vec2<f32> = reflect(vec2<f32>(v), vec2<f32>(v));
}
)";
auto* expect = R"(
fn tint_reflect(e1 : vec2<f32>, e2 : vec2<f32>) -> vec2<f32> {
let factor = (-2.0 * dot(e1, e2));
return (e1 + (factor * e2));
}
fn f() {
let v = 0.5f;
let r : vec2<f32> = tint_reflect(vec2<f32>(v), vec2<f32>(v));
}
)";
auto got = Run<BuiltinPolyfill>(src, polyfillReflectVec2F32());
EXPECT_EQ(expect, str(got));
}
TEST_F(BuiltinPolyfillTest, Reflect_multiple_types) {
auto* src = R"(
enable f16;
fn f() {
let in_f32 = 0.5f;
let out_f32_vec2 : vec2<f32> = reflect(vec2<f32>(in_f32), vec2<f32>(in_f32));
let out_f32_vec3 : vec3<f32> = reflect(vec3<f32>(in_f32), vec3<f32>(in_f32));
let out_f32_vec4 : vec4<f32> = reflect(vec4<f32>(in_f32), vec4<f32>(in_f32));
let in_f16 = 0.5h;
let out_f16_vec2 : vec2<f16> = reflect(vec2<f16>(in_f16), vec2<f16>(in_f16));
let out_f16_vec3 : vec3<f16> = reflect(vec3<f16>(in_f16), vec3<f16>(in_f16));
let out_f16_vec4 : vec4<f16> = reflect(vec4<f16>(in_f16), vec4<f16>(in_f16));
}
)";
auto* expect = R"(
enable f16;
fn tint_reflect(e1 : vec2<f32>, e2 : vec2<f32>) -> vec2<f32> {
let factor = (-2.0 * dot(e1, e2));
return (e1 + (factor * e2));
}
fn f() {
let in_f32 = 0.5f;
let out_f32_vec2 : vec2<f32> = tint_reflect(vec2<f32>(in_f32), vec2<f32>(in_f32));
let out_f32_vec3 : vec3<f32> = reflect(vec3<f32>(in_f32), vec3<f32>(in_f32));
let out_f32_vec4 : vec4<f32> = reflect(vec4<f32>(in_f32), vec4<f32>(in_f32));
let in_f16 = 0.5h;
let out_f16_vec2 : vec2<f16> = reflect(vec2<f16>(in_f16), vec2<f16>(in_f16));
let out_f16_vec3 : vec3<f16> = reflect(vec3<f16>(in_f16), vec3<f16>(in_f16));
let out_f16_vec4 : vec4<f16> = reflect(vec4<f16>(in_f16), vec4<f16>(in_f16));
}
)";
auto got = Run<BuiltinPolyfill>(src, polyfillReflectVec2F32());
EXPECT_EQ(expect, str(got));
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// saturate // saturate
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////

View File

@ -61,6 +61,8 @@ struct Options {
/// Interstage locations actually used as inputs in the next stage of the pipeline. /// Interstage locations actually used as inputs in the next stage of the pipeline.
/// This is potentially used for truncating unused interstage outputs at current shader stage. /// This is potentially used for truncating unused interstage outputs at current shader stage.
std::bitset<16> interstage_locations; std::bitset<16> interstage_locations;
/// Set to `true` to generate polyfill for `reflect` builtin for vec2<f32>
bool polyfill_reflect_vec2_f32 = false;
/// Reflect the fields of this class so that it can be used by tint::ForeachField() /// Reflect the fields of this class so that it can be used by tint::ForeachField()
TINT_REFLECT(root_constant_binding_point, TINT_REFLECT(root_constant_binding_point,

View File

@ -181,6 +181,7 @@ SanitizedResult Sanitize(const Program* in, const Options& options) {
polyfills.insert_bits = transform::BuiltinPolyfill::Level::kFull; polyfills.insert_bits = transform::BuiltinPolyfill::Level::kFull;
polyfills.int_div_mod = true; polyfills.int_div_mod = true;
polyfills.precise_float_mod = true; polyfills.precise_float_mod = true;
polyfills.reflect_vec2_f32 = options.polyfill_reflect_vec2_f32;
polyfills.texture_sample_base_clamp_to_edge_2d_f32 = true; polyfills.texture_sample_base_clamp_to_edge_2d_f32 = true;
polyfills.workgroup_uniform_load = true; polyfills.workgroup_uniform_load = true;
data.Add<transform::BuiltinPolyfill::Config>(polyfills); data.Add<transform::BuiltinPolyfill::Config>(polyfills);

View File

@ -233,9 +233,6 @@ crbug.com/tint/1796 [ nvidia-0x2184 win10 ] webgpu:shader,execution,expression,c
crbug.com/tint/1796 [ intel-gen-9 ubuntu ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ] crbug.com/tint/1796 [ intel-gen-9 ubuntu ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ]
crbug.com/tint/1796 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ] crbug.com/tint/1796 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ]
crbug.com/tint/1796 [ nvidia-0x2184 win10 ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ] crbug.com/tint/1796 [ nvidia-0x2184 win10 ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ]
crbug.com/tint/1798 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,reflect:f32_vec2:inputSource="storage_r" [ Failure ]
crbug.com/tint/1798 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,reflect:f32_vec2:inputSource="storage_rw" [ Failure ]
crbug.com/tint/1798 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,reflect:f32_vec2:inputSource="uniform" [ Failure ]
crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":1,"y":1,"z":1};numGroups={"x":1,"y":1,"z":1} [ Failure ] crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":1,"y":1,"z":1};numGroups={"x":1,"y":1,"z":1} [ Failure ]
crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":1,"y":1,"z":1};numGroups={"x":8,"y":4,"z":2} [ Failure ] crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":1,"y":1,"z":1};numGroups={"x":8,"y":4,"z":2} [ Failure ]
crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":3,"y":7,"z":5};numGroups={"x":13,"y":9,"z":11} [ Failure ] crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":3,"y":7,"z":5};numGroups={"x":13,"y":9,"z":11} [ Failure ]