diff --git a/src/dawn/native/d3d/ShaderUtils.cpp b/src/dawn/native/d3d/ShaderUtils.cpp index 222dc34374..a0e13d47cb 100644 --- a/src/dawn/native/d3d/ShaderUtils.cpp +++ b/src/dawn/native/d3d/ShaderUtils.cpp @@ -232,6 +232,7 @@ ResultOrError TranslateToHLSL( // Pass in the actually used interstage locations for tint to potentially truncate unused // outputs. options.interstage_locations = r.interstageLocations; + options.truncate_interstage_variables = true; } options.polyfill_reflect_vec2_f32 = r.polyfillReflectVec2F32; diff --git a/src/dawn/tests/end2end/ShaderTests.cpp b/src/dawn/tests/end2end/ShaderTests.cpp index 2e91325bfc..b81f4eed32 100644 --- a/src/dawn/tests/end2end/ShaderTests.cpp +++ b/src/dawn/tests/end2end/ShaderTests.cpp @@ -549,6 +549,42 @@ fn fragmentMain() -> @location(0) vec4f { wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); } +// Regression test for crbug.com/dawn/1733. Even when user defined attribute input is empty, +// Builtin input for the next stage could still cause register mismatch issue on D3D12 HLSL +// compiler. So the TruncateInterstageVariables transform should still be run. This test is not in +// dawn_unittests/RenderPipelineValidationTests because we want to test the compilation of the +// pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesEmptyUserAttributeSubset) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4f, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4f, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4f(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4f(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(@builtin(position) position : vec4) -> @location(0) vec4f { + return vec4f(0.0, 0.0, 0.0, 1.0); +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + // This is a regression test for an issue caused by the FirstIndexOffset transfrom being done before // the BindingRemapper, causing an intermediate AST to be invalid (and fail the overall // compilation). diff --git a/src/tint/writer/hlsl/generator.h b/src/tint/writer/hlsl/generator.h index 9b79ad1887..804b449cb4 100644 --- a/src/tint/writer/hlsl/generator.h +++ b/src/tint/writer/hlsl/generator.h @@ -74,6 +74,9 @@ struct Options { /// This is potentially used for truncating unused interstage outputs at current shader stage. std::bitset<16> interstage_locations; + /// Set to `true` to run the TruncateInterstageVariables transform. + bool truncate_interstage_variables = false; + /// Set to `true` to generate polyfill for `reflect` builtin for vec2 bool polyfill_reflect_vec2_f32 = false; diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc index 4d07b4ff31..5e4ae33f94 100644 --- a/src/tint/writer/hlsl/generator_impl.cc +++ b/src/tint/writer/hlsl/generator_impl.cc @@ -241,13 +241,11 @@ SanitizedResult Sanitize(const Program* in, const Options& options) { // CanonicalizeEntryPointIO must come after Robustness manager.Add(); - if (options.interstage_locations.any()) { + if (options.truncate_interstage_variables) { // When interstage_locations is empty, it means there's no user-defined interstage variables - // being used in the next stage. This is treated as a special case. - // TruncateInterstageVariables transform is trying to solve the HLSL compiler register - // mismatch issue. So it is not needed if no register is assigned to any interstage - // variables. As a result we only add this transform when there is at least one interstage - // locations being used. + // being used in the next stage. Still, HLSL compiler register mismatch could happen, if + // there's builtin inputs used in the next stage. So we still run + // TruncateInterstageVariables transform. // TruncateInterstageVariables itself will skip when interstage_locations matches exactly // with the current stage output.