Remove deferred BufferLocation updates for drawIndexedIndirect

Instead of using BufferLocation as another layer of indirection, the indirectBuffer can be set directly on the indirect command. This makes the indirect validation a bit simpler, but introduces additional lifetime dependencies in that the indirect draw validation MUST be encoded while the DrawIndexedIndirectCmds it references are still valid. Bug: dawn:809 Change-Id: I1ef084622d8737ad5ec1b0247bf9062712e35008 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/67241 Commit-Queue: Austin Eng <enga@chromium.org> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
2025-12-15 08:06:19 +00:00 · 2021-11-02 18:23:49 +00:00
parent d4b9cda056
commit 6abf1a1adb
18 changed files with 45 additions and 218 deletions
--- a/src/dawn_native/IndirectDrawValidationEncoder.cpp
+++ b/src/dawn_native/IndirectDrawValidationEncoder.cpp
@@ -226,7 +226,6 @@ namespace dawn_native {
        // single pass as possible. Batches can be grouped together as long as they're validating
        // data from the same indirect buffer, but they may still be split into multiple passes if
        // the number of draw calls in a pass would exceed some (very high) upper bound.
-        uint64_t numTotalDrawCalls = 0;
        size_t validatedParamsSize = 0;
        std::vector<Pass> passes;
        IndirectDrawMetadata::IndexedIndirectBufferValidationInfoMap& bufferInfoMap =
@@ -257,7 +256,6 @@ namespace dawn_native {
                newBatch.clientIndirectOffset = minOffsetAlignedDown;
                newBatch.clientIndirectSize =
                    batch.maxOffset + kDrawIndexedIndirectSize - minOffsetAlignedDown;
-                numTotalDrawCalls += batch.draws.size();

                newBatch.validatedParamsSize = batch.draws.size() * kDrawIndexedIndirectSize;
                newBatch.validatedParamsOffset =
@@ -306,10 +304,7 @@ namespace dawn_native {
        DAWN_TRY(validatedParamsBuffer.EnsureCapacity(validatedParamsSize));
        usageTracker->BufferUsedAs(validatedParamsBuffer.GetBuffer(), wgpu::BufferUsage::Indirect);

-        // Now we allocate and populate host-side batch data to be copied to the GPU, and prepare to
-        // update all DrawIndexedIndirectCmd buffer references.
-        std::vector<DeferredBufferLocationUpdate> deferredBufferLocationUpdates;
-        deferredBufferLocationUpdates.reserve(numTotalDrawCalls);
+        // Now we allocate and populate host-side batch data to be copied to the GPU.
        for (Pass& pass : passes) {
            // We use std::malloc here because it guarantees maximal scalar alignment.
            pass.batchData = {std::malloc(pass.batchDataSize), std::free};
@@ -322,16 +317,13 @@ namespace dawn_native {

                uint32_t* indirectOffsets = reinterpret_cast<uint32_t*>(batch.batchInfo + 1);
                uint64_t validatedParamsOffset = batch.validatedParamsOffset;
-                for (const auto& draw : batch.metadata->draws) {
+                for (auto& draw : batch.metadata->draws) {
                    // The shader uses this to index an array of u32, hence the division by 4 bytes.
                    *indirectOffsets++ = static_cast<uint32_t>(
                        (draw.clientBufferOffset - batch.clientIndirectOffset) / 4);

-                    DeferredBufferLocationUpdate deferredUpdate;
-                    deferredUpdate.location = draw.bufferLocation;
-                    deferredUpdate.buffer = validatedParamsBuffer.GetBuffer();
-                    deferredUpdate.offset = validatedParamsOffset;
-                    deferredBufferLocationUpdates.push_back(std::move(deferredUpdate));
+                    draw.cmd->indirectBuffer = validatedParamsBuffer.GetBuffer();
+                    draw.cmd->indirectOffset = validatedParamsOffset;

                    validatedParamsOffset += kDrawIndexedIndirectSize;
                }
@@ -364,8 +356,6 @@ namespace dawn_native {
        // Finally, we can now encode our validation passes. Each pass first does a single
        // WriteBuffer to get batch data over to the GPU, followed by a single compute pass. The
        // compute pass encodes a separate SetBindGroup and Dispatch command for each batch.
-        commandEncoder->EncodeSetValidatedBufferLocationsInternal(
-            std::move(deferredBufferLocationUpdates));
        for (const Pass& pass : passes) {
            commandEncoder->APIWriteBuffer(batchDataBuffer.GetBuffer(), 0,
                                           static_cast<const uint8_t*>(pass.batchData.get()),