From f9c6633006e84f697996fb72e570114576cc32c3 Mon Sep 17 00:00:00 2001 From: shrekshao Date: Tue, 22 Nov 2022 21:36:27 +0000 Subject: [PATCH] Update inter stage variable subsetting validation and add tests Sync up with current WebGPU spec to allow FS input being a subset of VS output instead of requiring a strict match. This patch involves changing the validation and adding tests, together with using the TruncateInterstageVariables for hlsl generator to workaround the extra limit for D3D12 backend. Bug: dawn:1493 Change-Id: I2d4ba7f43dbe57f17ecd5c5d659f4ca93bb682a3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/109460 Commit-Queue: Shrek Shao Reviewed-by: Corentin Wallez Auto-Submit: Shrek Shao Kokoro: Kokoro --- src/dawn/native/RenderPipeline.cpp | 20 +- src/dawn/native/d3d12/RenderPipelineD3D12.cpp | 15 +- src/dawn/native/d3d12/ShaderModuleD3D12.cpp | 24 +- src/dawn/native/d3d12/ShaderModuleD3D12.h | 10 +- src/dawn/tests/end2end/ShaderTests.cpp | 418 ++++++++++++++++++ .../RenderPipelineValidationTests.cpp | 8 +- .../ShaderModuleValidationTests.cpp | 22 +- src/tint/writer/hlsl/generator.h | 5 + src/tint/writer/hlsl/generator_impl.cc | 22 + 9 files changed, 517 insertions(+), 27 deletions(-) diff --git a/src/dawn/native/RenderPipeline.cpp b/src/dawn/native/RenderPipeline.cpp index 033231b823..baa6c85b24 100644 --- a/src/dawn/native/RenderPipeline.cpp +++ b/src/dawn/native/RenderPipeline.cpp @@ -382,12 +382,24 @@ MaybeError ValidateInterStageMatching(DeviceBase* device, const EntryPointMetadata& fragmentMetadata = fragmentState.module->GetEntryPoint(fragmentState.entryPoint); - // TODO(dawn:563): Can this message give more details? - DAWN_INVALID_IF( - vertexMetadata.usedInterStageVariables != fragmentMetadata.usedInterStageVariables, - "One or more fragment inputs and vertex outputs are not one-to-one matching"); + if (DAWN_UNLIKELY( + (vertexMetadata.usedInterStageVariables | fragmentMetadata.usedInterStageVariables) != + vertexMetadata.usedInterStageVariables)) { + for (size_t i : IterateBitSet(fragmentMetadata.usedInterStageVariables)) { + if (!vertexMetadata.usedInterStageVariables.test(i)) { + return DAWN_VALIDATION_ERROR( + "The fragment input at location %u doesn't have a corresponding vertex output.", + i); + } + } + UNREACHABLE(); + } for (size_t i : IterateBitSet(vertexMetadata.usedInterStageVariables)) { + if (!fragmentMetadata.usedInterStageVariables.test(i)) { + // It is valid that fragment output is a subset of vertex input + continue; + } const auto& vertexOutputInfo = vertexMetadata.interStageVariables[i]; const auto& fragmentInputInfo = fragmentMetadata.interStageVariables[i]; DAWN_INVALID_IF( diff --git a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp index 80bc6fce9c..6c8a6fbeac 100644 --- a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp +++ b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp @@ -357,11 +357,20 @@ MaybeError RenderPipeline::Initialize() { PerStage compiledShader; + std::bitset* usedInterstageVariables = nullptr; + if (GetStageMask() & wgpu::ShaderStage::Fragment) { + // Now that only fragment shader can have interstage inputs. + const ProgrammableStage& programmableStage = GetStage(SingleShaderStage::Fragment); + auto entryPoint = programmableStage.module->GetEntryPoint(programmableStage.entryPoint); + usedInterstageVariables = &entryPoint.usedInterStageVariables; + } + for (auto stage : IterateStages(GetStageMask())) { const ProgrammableStage& programmableStage = GetStage(stage); - DAWN_TRY_ASSIGN(compiledShader[stage], ToBackend(programmableStage.module) - ->Compile(programmableStage, stage, - ToBackend(GetLayout()), compileFlags)); + DAWN_TRY_ASSIGN(compiledShader[stage], + ToBackend(programmableStage.module) + ->Compile(programmableStage, stage, ToBackend(GetLayout()), + compileFlags, usedInterstageVariables)); *shaders[stage] = compiledShader[stage].GetD3D12ShaderBytecode(); } diff --git a/src/dawn/native/d3d12/ShaderModuleD3D12.cpp b/src/dawn/native/d3d12/ShaderModuleD3D12.cpp index 18e50eb030..4aa4d3c8d1 100644 --- a/src/dawn/native/d3d12/ShaderModuleD3D12.cpp +++ b/src/dawn/native/d3d12/ShaderModuleD3D12.cpp @@ -90,6 +90,7 @@ enum class Compiler { FXC, DXC }; X(tint::transform::BindingRemapper::BindingPoints, remappedBindingPoints) \ X(tint::transform::BindingRemapper::AccessControls, remappedAccessControls) \ X(std::optional, substituteOverrideConfig) \ + X(std::bitset, interstageLocations) \ X(LimitsForCompilationRequest, limits) \ X(bool, disableSymbolRenaming) \ X(bool, isRobustnessEnabled) \ @@ -392,6 +393,14 @@ ResultOrError TranslateToHLSL( // them as well. This would allow us to only upload root constants that are actually // read by the shader. options.array_length_from_uniform = r.arrayLengthFromUniform; + + if (r.stage == SingleShaderStage::Vertex) { + // Now that only vertex shader can have interstage outputs. + // Pass in the actually used interstage locations for tint to potentially truncate unused + // outputs. + options.interstage_locations = r.interstageLocations; + } + TRACE_EVENT0(tracePlatform.UnsafeGetValue(), General, "tint::writer::hlsl::Generate"); auto result = tint::writer::hlsl::Generate(&transformedProgram, options); DAWN_INVALID_IF(!result.success, "An error occured while generating HLSL: %s", result.error); @@ -456,10 +465,12 @@ MaybeError ShaderModule::Initialize(ShaderModuleParseResult* parseResult, return InitializeBase(parseResult, compilationMessages); } -ResultOrError ShaderModule::Compile(const ProgrammableStage& programmableStage, - SingleShaderStage stage, - const PipelineLayout* layout, - uint32_t compileFlags) { +ResultOrError ShaderModule::Compile( + const ProgrammableStage& programmableStage, + SingleShaderStage stage, + const PipelineLayout* layout, + uint32_t compileFlags, + const std::bitset* usedInterstageVariables) { Device* device = ToBackend(GetDevice()); TRACE_EVENT0(device->GetPlatform(), General, "ShaderModuleD3D12::Compile"); ASSERT(!IsError()); @@ -475,6 +486,10 @@ ResultOrError ShaderModule::Compile(const ProgrammableStage& pro req.hlsl.disableWorkgroupInit = device->IsToggleEnabled(Toggle::DisableWorkgroupInit); req.hlsl.dumpShaders = device->IsToggleEnabled(Toggle::DumpShaders); + if (usedInterstageVariables) { + req.hlsl.interstageLocations = *usedInterstageVariables; + } + req.bytecode.hasShaderF16Feature = device->HasFeature(Feature::ShaderF16); req.bytecode.compileFlags = compileFlags; @@ -596,7 +611,6 @@ ResultOrError ShaderModule::Compile(const ProgrammableStage& pro std::ostringstream dumpedMsg; dumpedMsg << "/* Dumped generated HLSL */" << std::endl << compiledShader->hlslSource << std::endl; - device->EmitLog(WGPULoggingType_Info, dumpedMsg.str().c_str()); if (device->IsToggleEnabled(Toggle::UseDXC)) { dumpedMsg << "/* Dumped disassembled DXIL */" << std::endl; diff --git a/src/dawn/native/d3d12/ShaderModuleD3D12.h b/src/dawn/native/d3d12/ShaderModuleD3D12.h index 8c70bb45a4..f64611703d 100644 --- a/src/dawn/native/d3d12/ShaderModuleD3D12.h +++ b/src/dawn/native/d3d12/ShaderModuleD3D12.h @@ -52,10 +52,12 @@ class ShaderModule final : public ShaderModuleBase { ShaderModuleParseResult* parseResult, OwnedCompilationMessages* compilationMessages); - ResultOrError Compile(const ProgrammableStage& programmableStage, - SingleShaderStage stage, - const PipelineLayout* layout, - uint32_t compileFlags); + ResultOrError Compile( + const ProgrammableStage& programmableStage, + SingleShaderStage stage, + const PipelineLayout* layout, + uint32_t compileFlags, + const std::bitset* usedInterstageVariables = nullptr); private: ShaderModule(Device* device, const ShaderModuleDescriptor* descriptor); diff --git a/src/dawn/tests/end2end/ShaderTests.cpp b/src/dawn/tests/end2end/ShaderTests.cpp index 8909e6e359..57f10b360d 100644 --- a/src/dawn/tests/end2end/ShaderTests.cpp +++ b/src/dawn/tests/end2end/ShaderTests.cpp @@ -318,6 +318,237 @@ fn fragmentMain(input : VertexOut) -> @location(0) vec4 { wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); } +// Tests that sparse input output locations should work properly. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesSparse) { + std::string shader = R"( +struct ShaderIO { + @builtin(position) position : vec4, + @location(1) attribute1 : vec4, + @location(3) attribute3 : vec4, +} + +@vertex +fn vertexMain() -> ShaderIO { + var output : ShaderIO; + output.position = vec4(0.0, 0.0, 0.0, 1.0); + output.attribute1 = vec4(0.0, 0.0, 0.0, 1.0); + output.attribute3 = vec4(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : ShaderIO) -> @location(0) vec4 { + return input.attribute1; +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage built-in inputs and outputs usage mismatch don't mess up with input-output +// locations. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesBuiltinsMismatched) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4, +} + +struct FragmentIn { + @location(3) attribute3 : vec4, + @builtin(front_facing) front_facing : bool, + @location(1) attribute1 : f32, + @builtin(position) position : vec4, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : FragmentIn) -> @location(0) vec4 { + _ = input.front_facing; + _ = input.position.x; + return input.attribute3; +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage inputs could be a prefix subset of the outputs. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesPrefixSubset) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4, +} + +struct FragmentIn { + @location(1) attribute1 : f32, + @builtin(position) position : vec4, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : FragmentIn) -> @location(0) vec4 { + _ = input.position.x; + return vec4(input.attribute1, 0.0, 0.0, 1.0); +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage inputs could be a sparse non-prefix subset of the outputs. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesSparseSubset) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4, +} + +struct FragmentIn { + @location(3) attribute3 : vec4, + @builtin(position) position : vec4, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : FragmentIn) -> @location(0) vec4 { + _ = input.position.x; + return input.attribute3; +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage inputs could be a sparse non-prefix subset of the outputs, and that +// fragment inputs are unused. This test is not in dawn_unittests/RenderPipelineValidationTests +// because we want to test the compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesSparseSubsetUnused) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4, +} + +struct FragmentIn { + @location(3) attribute3 : vec4, + @builtin(position) position : vec4, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : FragmentIn) -> @location(0) vec4 { + return vec4(0.0, 0.0, 0.0, 1.0); +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage inputs could be empty when outputs are not. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesEmptySubset) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain() -> @location(0) vec4 { + return vec4(0.0, 0.0, 0.0, 1.0); +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + // This is a regression test for an issue caused by the FirstIndexOffset transfrom being done before // the BindingRemapper, causing an intermediate AST to be invalid (and fail the overall // compilation). @@ -949,8 +1180,195 @@ TEST_P(ShaderTests, ShaderOverridingRobustnessBuiltins) { EXPECT_BUFFER_U32_EQ(2, buf, 0); } +// Test that when fragment input is a subset of the vertex output, the render pipeline should be +// valid. +TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutput) { + wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(1) var1: f32, + @location(3) @interpolate(flat) var3: u32, + @location(5) @interpolate(flat) var5: i32, + @location(7) var7: f32, + @location(9) @interpolate(flat) var9: u32, + @builtin(position) pos: vec4, +} + +@vertex fn main(@builtin(vertex_index) VertexIndex : u32) + -> ShaderIO { + var pos = array, 3>( + vec2(-1.0, 3.0), + vec2(-1.0, -3.0), + vec2(3.0, 0.0)); + + var shaderIO: ShaderIO; + shaderIO.var1 = 0.0; + shaderIO.var3 = 1u; + shaderIO.var5 = -9; + shaderIO.var7 = 1.0; + shaderIO.var9 = 0u; + shaderIO.pos = vec4(pos[VertexIndex], 0.0, 1.0); + + return shaderIO; +})"); + + wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(3) @interpolate(flat) var3: u32, + @location(7) var7: f32, +} + +@fragment fn main(io: ShaderIO) + -> @location(0) vec4 { + return vec4(f32(io.var3), io.var7, 1.0, 1.0); +})"); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); + + utils::ComboRenderPipelineDescriptor descriptor; + descriptor.vertex.module = vsModule; + descriptor.cFragment.module = fsModule; + descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList; + descriptor.cTargets[0].format = renderPass.colorFormat; + + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.SetPipeline(pipeline); + pass.Draw(3); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(255, 255, 255, 255), renderPass.color, 0, 0); +} + +// Test that when fragment input is a subset of the vertex output and the order of them is +// different, the render pipeline should be valid. +TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutputWithDifferentOrder) { + wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(5) @align(16) var5: f32, + @location(1) var1: f32, + @location(2) var2: f32, + @location(3) @align(8) var3: f32, + @location(4) var4: vec4, + @builtin(position) pos: vec4, +} + +@vertex fn main(@builtin(vertex_index) VertexIndex : u32) + -> ShaderIO { + var pos = array, 3>( + vec2(-1.0, 3.0), + vec2(-1.0, -3.0), + vec2(3.0, 0.0)); + + var shaderIO: ShaderIO; + shaderIO.var1 = 0.0; + shaderIO.var2 = 0.0; + shaderIO.var3 = 1.0; + shaderIO.var4 = vec4(0.4, 0.4, 0.4, 0.4); + shaderIO.var5 = 1.0; + shaderIO.pos = vec4(pos[VertexIndex], 0.0, 1.0); + + return shaderIO; +})"); + + wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(4) var4: vec4, + @location(1) var1: f32, + @location(5) @align(16) var5: f32, +} + +@fragment fn main(io: ShaderIO) + -> @location(0) vec4 { + return vec4(io.var1, io.var5, io.var4.x, 1.0); +})"); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); + + utils::ComboRenderPipelineDescriptor descriptor; + descriptor.vertex.module = vsModule; + descriptor.cFragment.module = fsModule; + descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList; + descriptor.cTargets[0].format = renderPass.colorFormat; + + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.SetPipeline(pipeline); + pass.Draw(3); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(0, 255, 102, 255), renderPass.color, 0, 0); +} + +// Test that when fragment input is a subset of the vertex output and that when the builtin +// interstage variables may mess up with the order, the render pipeline should be valid. +TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutputBuiltinOrder) { + wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(1) var1: f32, + @builtin(position) pos: vec4, + @location(8) var8: vec3, + @location(7) var7: f32, +} + +@vertex fn main(@builtin(vertex_index) VertexIndex : u32) + -> ShaderIO { + var pos = array, 3>( + vec2(-1.0, 3.0), + vec2(-1.0, -3.0), + vec2(3.0, 0.0)); + + var shaderIO: ShaderIO; + shaderIO.var1 = 0.0; + shaderIO.var7 = 1.0; + shaderIO.var8 = vec3(1.0, 0.4, 0.0); + shaderIO.pos = vec4(pos[VertexIndex], 0.0, 1.0); + + return shaderIO; +})"); + + wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @builtin(position) pos: vec4, + @location(7) var7: f32, +} + +@fragment fn main(io: ShaderIO) + -> @location(0) vec4 { + return vec4(0.0, io.var7, 0.4, 1.0); +})"); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); + + utils::ComboRenderPipelineDescriptor descriptor; + descriptor.vertex.module = vsModule; + descriptor.cFragment.module = fsModule; + descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList; + descriptor.cTargets[0].format = renderPass.colorFormat; + + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.SetPipeline(pipeline); + pass.Draw(3); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(0, 255, 102, 255), renderPass.color, 0, 0); +} + DAWN_INSTANTIATE_TEST(ShaderTests, D3D12Backend(), + D3D12Backend({"use_dxc"}), MetalBackend(), OpenGLBackend(), OpenGLESBackend(), diff --git a/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp b/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp index 5271228122..675b1d950a 100644 --- a/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp +++ b/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp @@ -1390,8 +1390,7 @@ class InterStageVariableMatchingValidationTest : public RenderPipelineValidation } }; -// Tests that creating render pipeline should fail when there is a vertex output that doesn't have -// its corresponding fragment input at the same location, and there is a fragment input that +// Tests that creating render pipeline should fail when there is a fragment input that // doesn't have its corresponding vertex output at the same location. TEST_F(InterStageVariableMatchingValidationTest, MissingDeclarationAtSameLocation) { wgpu::ShaderModule vertexModuleOutputAtLocation0 = utils::CreateShaderModule(device, R"( @@ -1430,7 +1429,10 @@ TEST_F(InterStageVariableMatchingValidationTest, MissingDeclarationAtSameLocatio })"); { - CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fsModule, false); + // It is okay if the fragment output is a subset of the vertex input. + CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fsModule, true); + } + { CheckCreatingRenderPipeline(vsModule, fragmentModuleAtLocation0, false); CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fragmentModuleInputAtLocation1, false); diff --git a/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp b/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp index 63aafac350..4d788e35f1 100644 --- a/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp +++ b/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp @@ -278,10 +278,13 @@ TEST_F(ShaderModuleValidationTest, MaximumShaderIOLocations) { } if (success) { - ASSERT_DEVICE_ERROR( - device.CreateRenderPipeline(&pDesc), - testing::HasSubstr( - "One or more fragment inputs and vertex outputs are not one-to-one matching")); + if (failingShaderStage == wgpu::ShaderStage::Vertex) { + // It is allowed that fragment inputs are a subset of the vertex output variables. + device.CreateRenderPipeline(&pDesc); + } else { + ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc), + testing::HasSubstr("The fragment input at location")); + } } else { ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc), testing::HasSubstr(errorMatcher)); @@ -401,10 +404,13 @@ TEST_F(ShaderModuleValidationTest, MaximumInterStageShaderComponents) { } if (success) { - ASSERT_DEVICE_ERROR( - device.CreateRenderPipeline(&pDesc), - testing::HasSubstr( - "One or more fragment inputs and vertex outputs are not one-to-one matching")); + if (failingShaderStage == wgpu::ShaderStage::Vertex) { + // It is allowed that fragment inputs are a subset of the vertex output variables. + device.CreateRenderPipeline(&pDesc); + } else { + ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc), + testing::HasSubstr("The fragment input at location")); + } } else { ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc), testing::HasSubstr(errorMatcher)); diff --git a/src/tint/writer/hlsl/generator.h b/src/tint/writer/hlsl/generator.h index 9974cdef47..c6249435ea 100644 --- a/src/tint/writer/hlsl/generator.h +++ b/src/tint/writer/hlsl/generator.h @@ -15,6 +15,7 @@ #ifndef SRC_TINT_WRITER_HLSL_GENERATOR_H_ #define SRC_TINT_WRITER_HLSL_GENERATOR_H_ +#include #include #include #include @@ -25,6 +26,7 @@ #include "src/tint/ast/pipeline_stage.h" #include "src/tint/reflection.h" #include "src/tint/sem/binding_point.h" +#include "src/tint/utils/bitset.h" #include "src/tint/writer/array_length_from_uniform_options.h" #include "src/tint/writer/text.h" @@ -56,6 +58,9 @@ struct Options { /// Options used to specify a mapping of binding points to indices into a UBO /// from which to load buffer sizes. ArrayLengthFromUniformOptions array_length_from_uniform = {}; + /// Interstage locations actually used as inputs in the next stage of the pipeline. + /// This is potentially used for truncating unused interstage outputs at current shader stage. + std::bitset<16> interstage_locations; /// Reflect the fields of this class so that it can be used by tint::ForeachField() TINT_REFLECT(root_constant_binding_point, diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc index 9cfe5d7dae..e3abc5175e 100644 --- a/src/tint/writer/hlsl/generator_impl.cc +++ b/src/tint/writer/hlsl/generator_impl.cc @@ -64,6 +64,7 @@ #include "src/tint/transform/remove_continue_in_switch.h" #include "src/tint/transform/remove_phonies.h" #include "src/tint/transform/simplify_pointers.h" +#include "src/tint/transform/truncate_interstage_variables.h" #include "src/tint/transform/unshadow.h" #include "src/tint/transform/vectorize_scalar_matrix_initializers.h" #include "src/tint/transform/zero_init_workgroup_memory.h" @@ -210,6 +211,27 @@ SanitizedResult Sanitize(const Program* in, const Options& options) { manager.Add(); } manager.Add(); + + if (options.interstage_locations.any()) { + // When interstage_locations is empty, it means there's no user-defined interstage variables + // being used in the next stage. This is treated as a special case. + // TruncateInterstageVariables transform is trying to solve the HLSL compiler register + // mismatch issue. So it is not needed if no register is assigned to any interstage + // variables. As a result we only add this transform when there is at least one interstage + // locations being used. + + // TruncateInterstageVariables itself will skip when interstage_locations matches exactly + // with the current stage output. + + // Build the config for internal TruncateInterstageVariables transform. + transform::TruncateInterstageVariables::Config truncate_interstage_variables_cfg; + truncate_interstage_variables_cfg.interstage_locations = + std::move(options.interstage_locations); + manager.Add(); + data.Add( + std::move(truncate_interstage_variables_cfg)); + } + // NumWorkgroupsFromUniform must come after CanonicalizeEntryPointIO, as it // assumes that num_workgroups builtins only appear as struct members and are // only accessed directly via member accessors.