Remove deferred BufferLocation updates for drawIndexedIndirect

Instead of using BufferLocation as another layer of indirection,
the indirectBuffer can be set directly on the indirect command.
This makes the indirect validation a bit simpler, but introduces
additional lifetime dependencies in that the indirect draw validation
MUST be encoded while the DrawIndexedIndirectCmds it references
are still valid.

Bug: dawn:809
Change-Id: I1ef084622d8737ad5ec1b0247bf9062712e35008
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/67241
Commit-Queue: Austin Eng <enga@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Austin Eng
2021-11-02 18:23:49 +00:00
committed by Dawn LUCI CQ
parent d4b9cda056
commit 6abf1a1adb
18 changed files with 45 additions and 218 deletions

View File

@@ -226,7 +226,6 @@ namespace dawn_native {
// single pass as possible. Batches can be grouped together as long as they're validating
// data from the same indirect buffer, but they may still be split into multiple passes if
// the number of draw calls in a pass would exceed some (very high) upper bound.
uint64_t numTotalDrawCalls = 0;
size_t validatedParamsSize = 0;
std::vector<Pass> passes;
IndirectDrawMetadata::IndexedIndirectBufferValidationInfoMap& bufferInfoMap =
@@ -257,7 +256,6 @@ namespace dawn_native {
newBatch.clientIndirectOffset = minOffsetAlignedDown;
newBatch.clientIndirectSize =
batch.maxOffset + kDrawIndexedIndirectSize - minOffsetAlignedDown;
numTotalDrawCalls += batch.draws.size();
newBatch.validatedParamsSize = batch.draws.size() * kDrawIndexedIndirectSize;
newBatch.validatedParamsOffset =
@@ -306,10 +304,7 @@ namespace dawn_native {
DAWN_TRY(validatedParamsBuffer.EnsureCapacity(validatedParamsSize));
usageTracker->BufferUsedAs(validatedParamsBuffer.GetBuffer(), wgpu::BufferUsage::Indirect);
// Now we allocate and populate host-side batch data to be copied to the GPU, and prepare to
// update all DrawIndexedIndirectCmd buffer references.
std::vector<DeferredBufferLocationUpdate> deferredBufferLocationUpdates;
deferredBufferLocationUpdates.reserve(numTotalDrawCalls);
// Now we allocate and populate host-side batch data to be copied to the GPU.
for (Pass& pass : passes) {
// We use std::malloc here because it guarantees maximal scalar alignment.
pass.batchData = {std::malloc(pass.batchDataSize), std::free};
@@ -322,16 +317,13 @@ namespace dawn_native {
uint32_t* indirectOffsets = reinterpret_cast<uint32_t*>(batch.batchInfo + 1);
uint64_t validatedParamsOffset = batch.validatedParamsOffset;
for (const auto& draw : batch.metadata->draws) {
for (auto& draw : batch.metadata->draws) {
// The shader uses this to index an array of u32, hence the division by 4 bytes.
*indirectOffsets++ = static_cast<uint32_t>(
(draw.clientBufferOffset - batch.clientIndirectOffset) / 4);
DeferredBufferLocationUpdate deferredUpdate;
deferredUpdate.location = draw.bufferLocation;
deferredUpdate.buffer = validatedParamsBuffer.GetBuffer();
deferredUpdate.offset = validatedParamsOffset;
deferredBufferLocationUpdates.push_back(std::move(deferredUpdate));
draw.cmd->indirectBuffer = validatedParamsBuffer.GetBuffer();
draw.cmd->indirectOffset = validatedParamsOffset;
validatedParamsOffset += kDrawIndexedIndirectSize;
}
@@ -364,8 +356,6 @@ namespace dawn_native {
// Finally, we can now encode our validation passes. Each pass first does a single
// WriteBuffer to get batch data over to the GPU, followed by a single compute pass. The
// compute pass encodes a separate SetBindGroup and Dispatch command for each batch.
commandEncoder->EncodeSetValidatedBufferLocationsInternal(
std::move(deferredBufferLocationUpdates));
for (const Pass& pass : passes) {
commandEncoder->APIWriteBuffer(batchDataBuffer.GetBuffer(), 0,
static_cast<const uint8_t*>(pass.batchData.get()),