From 10fe83305a0450e2cb2747fbdc1eab9617a509be Mon Sep 17 00:00:00 2001 From: Corentin Wallez Date: Mon, 29 Jul 2019 14:11:06 +0000 Subject: [PATCH] Metal: Pack vertex buffers just after the pipeline layout WebGPU have a 2D pipeline layout plus a vertex buffer table while Metal has a single vertex buffer table that contains everything (including uniform and storage buffers). Previously the space for vertex buffers was statically allocated in that table which made the last vertex buffer go out of bound of the Metal vertex buffer table. This fixes the issue by packing all the vertex buffers that are used right after the vertex buffers used by the bind groups. This is a drive-by fix found while looking at reserving Metal vertex buffer 30 to contain the shader storage buffer lengths. BUG=dawn:195 Change-Id: If5c67bbc0d15c976793ef43889e50e4a360217d7 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/9387 Commit-Queue: Corentin Wallez Reviewed-by: Austin Eng --- src/dawn_native/metal/BufferMTL.h | 2 +- src/dawn_native/metal/BufferMTL.mm | 2 +- src/dawn_native/metal/CommandBufferMTL.mm | 93 +++++++++++++++++----- src/dawn_native/metal/PipelineLayoutMTL.h | 4 + src/dawn_native/metal/PipelineLayoutMTL.mm | 6 ++ src/dawn_native/metal/RenderPipelineMTL.h | 5 ++ src/dawn_native/metal/RenderPipelineMTL.mm | 45 +++++++---- src/tests/end2end/VertexInputTests.cpp | 22 +++++ 8 files changed, 139 insertions(+), 40 deletions(-) diff --git a/src/dawn_native/metal/BufferMTL.h b/src/dawn_native/metal/BufferMTL.h index c4395b2d40..f05f38e751 100644 --- a/src/dawn_native/metal/BufferMTL.h +++ b/src/dawn_native/metal/BufferMTL.h @@ -29,7 +29,7 @@ namespace dawn_native { namespace metal { Buffer(Device* device, const BufferDescriptor* descriptor); ~Buffer(); - id GetMTLBuffer(); + id GetMTLBuffer() const; void OnMapCommandSerialFinished(uint32_t mapSerial, bool isWrite); diff --git a/src/dawn_native/metal/BufferMTL.mm b/src/dawn_native/metal/BufferMTL.mm index d4cb82f1f1..0bdc09daaf 100644 --- a/src/dawn_native/metal/BufferMTL.mm +++ b/src/dawn_native/metal/BufferMTL.mm @@ -34,7 +34,7 @@ namespace dawn_native { namespace metal { DestroyInternal(); } - id Buffer::GetMTLBuffer() { + id Buffer::GetMTLBuffer() const { return mMtlBuffer; } diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm index 5f6331f7c1..dd65f3e7f0 100644 --- a/src/dawn_native/metal/CommandBufferMTL.mm +++ b/src/dawn_native/metal/CommandBufferMTL.mm @@ -437,6 +437,55 @@ namespace dawn_native { namespace metal { return copy; } + + // Keeps track of the dirty vertex buffer values so they can be lazily applied when we know + // all the relevant state. + class VertexInputBufferTracker { + public: + void OnSetVertexBuffers(uint32_t startSlot, + uint32_t count, + const Ref* buffers, + const uint64_t* offsets) { + for (uint32_t i = 0; i < count; ++i) { + uint32_t slot = startSlot + i; + mVertexBuffers[slot] = ToBackend(buffers[i].Get())->GetMTLBuffer(); + mVertexBufferOffsets[slot] = offsets[i]; + } + + // Use 64 bit masks and make sure there are no shift UB + static_assert(kMaxVertexBuffers <= 8 * sizeof(unsigned long long) - 1, ""); + mDirtyVertexBuffers |= ((1ull << count) - 1ull) << startSlot; + } + + void OnSetPipeline(RenderPipeline* lastPipeline, RenderPipeline* pipeline) { + // When a new pipeline is bound we must set all the vertex buffers again because + // they might have been offset by the pipeline layout, and they might be packed + // differently from the previous pipeline. + mDirtyVertexBuffers |= pipeline->GetInputsSetMask(); + } + + void Apply(id encoder, RenderPipeline* pipeline) { + std::bitset vertexBuffersToApply = + mDirtyVertexBuffers & pipeline->GetInputsSetMask(); + + for (uint32_t dawnIndex : IterateBitSet(vertexBuffersToApply)) { + uint32_t metalIndex = pipeline->GetMtlVertexBufferIndex(dawnIndex); + + [encoder setVertexBuffers:&mVertexBuffers[dawnIndex] + offsets:&mVertexBufferOffsets[dawnIndex] + withRange:NSMakeRange(metalIndex, 1)]; + } + + mDirtyVertexBuffers.reset(); + } + + private: + // All the indices in these arrays are Dawn vertex buffer indices + std::bitset mDirtyVertexBuffers; + std::array, kMaxVertexBuffers> mVertexBuffers; + std::array mVertexBufferOffsets; + }; + } // anonymous namespace CommandBuffer::CommandBuffer(CommandEncoderBase* encoder, @@ -718,6 +767,7 @@ namespace dawn_native { namespace metal { RenderPipeline* lastPipeline = nullptr; id indexBuffer = nil; uint32_t indexBufferBaseOffset = 0; + VertexInputBufferTracker vertexInputBuffers; // This will be autoreleased id encoder = @@ -735,6 +785,8 @@ namespace dawn_native { namespace metal { case Command::Draw: { DrawCmd* draw = mCommands.NextCommand(); + vertexInputBuffers.Apply(encoder, lastPipeline); + // The instance count must be non-zero, otherwise no-op if (draw->instanceCount != 0) { [encoder drawPrimitives:lastPipeline->GetMTLPrimitiveTopology() @@ -750,6 +802,8 @@ namespace dawn_native { namespace metal { size_t formatSize = IndexFormatSize(lastPipeline->GetVertexInputDescriptor()->indexFormat); + vertexInputBuffers.Apply(encoder, lastPipeline); + // The index and instance count must be non-zero, otherwise no-op if (draw->indexCount != 0 && draw->instanceCount != 0) { [encoder drawIndexedPrimitives:lastPipeline->GetMTLPrimitiveTopology() @@ -767,6 +821,8 @@ namespace dawn_native { namespace metal { case Command::DrawIndirect: { DrawIndirectCmd* draw = mCommands.NextCommand(); + vertexInputBuffers.Apply(encoder, lastPipeline); + Buffer* buffer = ToBackend(draw->indirectBuffer.Get()); id indirectBuffer = buffer->GetMTLBuffer(); [encoder drawPrimitives:lastPipeline->GetMTLPrimitiveTopology() @@ -777,6 +833,8 @@ namespace dawn_native { namespace metal { case Command::DrawIndexedIndirect: { DrawIndirectCmd* draw = mCommands.NextCommand(); + vertexInputBuffers.Apply(encoder, lastPipeline); + Buffer* buffer = ToBackend(draw->indirectBuffer.Get()); id indirectBuffer = buffer->GetMTLBuffer(); [encoder drawIndexedPrimitives:lastPipeline->GetMTLPrimitiveTopology() @@ -813,12 +871,15 @@ namespace dawn_native { namespace metal { case Command::SetRenderPipeline: { SetRenderPipelineCmd* cmd = mCommands.NextCommand(); - lastPipeline = ToBackend(cmd->pipeline).Get(); + RenderPipeline* newPipeline = ToBackend(cmd->pipeline).Get(); - [encoder setDepthStencilState:lastPipeline->GetMTLDepthStencilState()]; - [encoder setFrontFacingWinding:lastPipeline->GetMTLFrontFace()]; - [encoder setCullMode:lastPipeline->GetMTLCullMode()]; - lastPipeline->Encode(encoder); + vertexInputBuffers.OnSetPipeline(lastPipeline, newPipeline); + [encoder setDepthStencilState:newPipeline->GetMTLDepthStencilState()]; + [encoder setFrontFacingWinding:newPipeline->GetMTLFrontFace()]; + [encoder setCullMode:newPipeline->GetMTLCullMode()]; + newPipeline->Encode(encoder); + + lastPipeline = newPipeline; } break; case Command::SetStencilReference: { @@ -888,24 +949,12 @@ namespace dawn_native { namespace metal { case Command::SetVertexBuffers: { SetVertexBuffersCmd* cmd = mCommands.NextCommand(); - auto buffers = mCommands.NextData>(cmd->count); - auto offsets = mCommands.NextData(cmd->count); + const Ref* buffers = + mCommands.NextData>(cmd->count); + const uint64_t* offsets = mCommands.NextData(cmd->count); - std::array, kMaxVertexBuffers> mtlBuffers; - std::array mtlOffsets; - - // Perhaps an "array of vertex buffers(+offsets?)" should be - // a Dawn API primitive to avoid reconstructing this array? - for (uint32_t i = 0; i < cmd->count; ++i) { - Buffer* buffer = ToBackend(buffers[i].Get()); - mtlBuffers[i] = buffer->GetMTLBuffer(); - mtlOffsets[i] = offsets[i]; - } - - [encoder setVertexBuffers:mtlBuffers.data() - offsets:mtlOffsets.data() - withRange:NSMakeRange(kMaxBindingsPerGroup + cmd->startSlot, - cmd->count)]; + vertexInputBuffers.OnSetVertexBuffers(cmd->startSlot, cmd->count, buffers, + offsets); } break; default: { UNREACHABLE(); } break; diff --git a/src/dawn_native/metal/PipelineLayoutMTL.h b/src/dawn_native/metal/PipelineLayoutMTL.h index 77610a5f4a..59ba3b79bc 100644 --- a/src/dawn_native/metal/PipelineLayoutMTL.h +++ b/src/dawn_native/metal/PipelineLayoutMTL.h @@ -37,8 +37,12 @@ namespace dawn_native { namespace metal { std::array, kMaxBindGroups>; const BindingIndexInfo& GetBindingIndexInfo(ShaderStage stage) const; + // The number of Metal vertex stage buffers used for the whole pipeline layout. + uint32_t GetBufferBindingCount(ShaderStage stage); + private: PerStage mIndexInfo; + PerStage mBufferBindingCount; }; }} // namespace dawn_native::metal diff --git a/src/dawn_native/metal/PipelineLayoutMTL.mm b/src/dawn_native/metal/PipelineLayoutMTL.mm index 282559fdbf..af08c5bc59 100644 --- a/src/dawn_native/metal/PipelineLayoutMTL.mm +++ b/src/dawn_native/metal/PipelineLayoutMTL.mm @@ -60,6 +60,8 @@ namespace dawn_native { namespace metal { } } } + + mBufferBindingCount[stage] = bufferIndex; } } @@ -68,4 +70,8 @@ namespace dawn_native { namespace metal { return mIndexInfo[stage]; } + uint32_t PipelineLayout::GetBufferBindingCount(ShaderStage stage) { + return mBufferBindingCount[stage]; + } + }} // namespace dawn_native::metal diff --git a/src/dawn_native/metal/RenderPipelineMTL.h b/src/dawn_native/metal/RenderPipelineMTL.h index a027ffc806..10d75251be 100644 --- a/src/dawn_native/metal/RenderPipelineMTL.h +++ b/src/dawn_native/metal/RenderPipelineMTL.h @@ -37,6 +37,10 @@ namespace dawn_native { namespace metal { id GetMTLDepthStencilState(); + // For each Dawn vertex buffer, give the index in which it will be positioned in the Metal + // vertex buffer table. + uint32_t GetMtlVertexBufferIndex(uint32_t dawnIndex) const; + private: MTLVertexDescriptor* MakeVertexDesc(); @@ -46,6 +50,7 @@ namespace dawn_native { namespace metal { MTLCullMode mMtlCullMode; id mMtlRenderPipelineState = nil; id mMtlDepthStencilState = nil; + std::array mMtlVertexBufferIndices; }; }} // namespace dawn_native::metal diff --git a/src/dawn_native/metal/RenderPipelineMTL.mm b/src/dawn_native/metal/RenderPipelineMTL.mm index 0bb3a55d73..ac6ab356ec 100644 --- a/src/dawn_native/metal/RenderPipelineMTL.mm +++ b/src/dawn_native/metal/RenderPipelineMTL.mm @@ -400,24 +400,22 @@ namespace dawn_native { namespace metal { return mMtlDepthStencilState; } + uint32_t RenderPipeline::GetMtlVertexBufferIndex(uint32_t dawnIndex) const { + ASSERT(dawnIndex < kMaxVertexBuffers); + return mMtlVertexBufferIndices[dawnIndex]; + } + MTLVertexDescriptor* RenderPipeline::MakeVertexDesc() { MTLVertexDescriptor* mtlVertexDescriptor = [MTLVertexDescriptor new]; - for (uint32_t i : IterateBitSet(GetAttributesSetMask())) { - const VertexAttributeInfo& info = GetAttribute(i); + // Vertex buffers are packed after all the buffers for the bind groups. + uint32_t mtlVertexBufferIndex = + ToBackend(GetLayout())->GetBufferBindingCount(ShaderStage::Vertex); - auto attribDesc = [MTLVertexAttributeDescriptor new]; - attribDesc.format = VertexFormatType(info.format); - attribDesc.offset = info.offset; - attribDesc.bufferIndex = kMaxBindingsPerGroup + info.inputSlot; - mtlVertexDescriptor.attributes[i] = attribDesc; - [attribDesc release]; - } + for (uint32_t dawnVertexBufferIndex : IterateBitSet(GetInputsSetMask())) { + const VertexBufferInfo& info = GetInput(dawnVertexBufferIndex); - for (uint32_t vbInputSlot : IterateBitSet(GetInputsSetMask())) { - const VertexBufferInfo& info = GetInput(vbInputSlot); - - auto layoutDesc = [MTLVertexBufferLayoutDescriptor new]; + MTLVertexBufferLayoutDescriptor* layoutDesc = [MTLVertexBufferLayoutDescriptor new]; if (info.stride == 0) { // For MTLVertexStepFunctionConstant, the stepRate must be 0, // but the stride must NOT be 0, so we made up it with @@ -426,7 +424,7 @@ namespace dawn_native { namespace metal { for (uint32_t attribIndex : IterateBitSet(GetAttributesSetMask())) { const VertexAttributeInfo& attrib = GetAttribute(attribIndex); // Only use the attributes that use the current input - if (attrib.inputSlot != vbInputSlot) { + if (attrib.inputSlot != dawnVertexBufferIndex) { continue; } max_stride = std::max(max_stride, @@ -442,10 +440,25 @@ namespace dawn_native { namespace metal { layoutDesc.stepRate = 1; layoutDesc.stride = info.stride; } - // TODO(cwallez@chromium.org): make the offset depend on the pipeline layout - mtlVertexDescriptor.layouts[kMaxBindingsPerGroup + vbInputSlot] = layoutDesc; + + mtlVertexDescriptor.layouts[mtlVertexBufferIndex] = layoutDesc; [layoutDesc release]; + + mMtlVertexBufferIndices[dawnVertexBufferIndex] = mtlVertexBufferIndex; + mtlVertexBufferIndex++; } + + for (uint32_t i : IterateBitSet(GetAttributesSetMask())) { + const VertexAttributeInfo& info = GetAttribute(i); + + auto attribDesc = [MTLVertexAttributeDescriptor new]; + attribDesc.format = VertexFormatType(info.format); + attribDesc.offset = info.offset; + attribDesc.bufferIndex = mMtlVertexBufferIndices[info.inputSlot]; + mtlVertexDescriptor.attributes[i] = attribDesc; + [attribDesc release]; + } + return mtlVertexDescriptor; } diff --git a/src/tests/end2end/VertexInputTests.cpp b/src/tests/end2end/VertexInputTests.cpp index 9024b1eed5..6efce06f64 100644 --- a/src/tests/end2end/VertexInputTests.cpp +++ b/src/tests/end2end/VertexInputTests.cpp @@ -487,6 +487,28 @@ TEST_P(VertexInputTest, MultiplePipelinesMixedVertexInput) { CheckResult(1, 4); } +// Checks that using the last vertex buffer doesn't overflow the vertex buffer table in Metal. +TEST_P(VertexInputTest, LastAllowedVertexBuffer) { + constexpr uint32_t kBufferIndex = kMaxVertexBuffers - 1; + + utils::ComboVertexInputDescriptor vertexInput; + // All the other vertex buffers default to no attributes + vertexInput.bufferCount = kMaxVertexBuffers; + vertexInput.cBuffers[kBufferIndex].stride = 4 * sizeof(float); + vertexInput.cBuffers[kBufferIndex].stepMode = InputStepMode::Vertex; + vertexInput.cBuffers[kBufferIndex].attributeCount = 1; + vertexInput.cBuffers[kBufferIndex].attributes = &vertexInput.cAttributes[0]; + vertexInput.cAttributes[0].shaderLocation = 0; + vertexInput.cAttributes[0].offset = 0; + vertexInput.cAttributes[0].format = VertexFormat::Float4; + + dawn::RenderPipeline pipeline = + MakeTestPipeline(vertexInput, 1, {{0, VertexFormat::Float4, InputStepMode::Vertex}}); + + dawn::Buffer buffer0 = MakeVertexBuffer({0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5}); + DoTestDraw(pipeline, 1, 1, {DrawVertexBuffer{kMaxVertexBuffers - 1, &buffer0}}); +} + DAWN_INSTANTIATE_TEST(VertexInputTest, D3D12Backend, MetalBackend, OpenGLBackend, VulkanBackend); // TODO for the input state: