dawn-cmake/src/dawn_native/ComputePassEncoder.cpp

453 lines
20 KiB
C++

// Copyright 2018 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn_native/ComputePassEncoder.h"
#include "dawn_native/BindGroup.h"
#include "dawn_native/BindGroupLayout.h"
#include "dawn_native/Buffer.h"
#include "dawn_native/CommandEncoder.h"
#include "dawn_native/CommandValidation.h"
#include "dawn_native/Commands.h"
#include "dawn_native/ComputePipeline.h"
#include "dawn_native/Device.h"
#include "dawn_native/InternalPipelineStore.h"
#include "dawn_native/ObjectType_autogen.h"
#include "dawn_native/PassResourceUsageTracker.h"
#include "dawn_native/QuerySet.h"
namespace dawn_native {
namespace {
ResultOrError<ComputePipelineBase*> GetOrCreateIndirectDispatchValidationPipeline(
DeviceBase* device) {
InternalPipelineStore* store = device->GetInternalPipelineStore();
if (store->dispatchIndirectValidationPipeline != nullptr) {
return store->dispatchIndirectValidationPipeline.Get();
}
ShaderModuleDescriptor descriptor;
ShaderModuleWGSLDescriptor wgslDesc;
descriptor.nextInChain = reinterpret_cast<ChainedStruct*>(&wgslDesc);
// TODO(https://crbug.com/dawn/1108): Propagate validation feedback from this
// shader in various failure modes.
wgslDesc.source = R"(
[[block]] struct UniformParams {
maxComputeWorkgroupsPerDimension: u32;
clientOffsetInU32: u32;
};
[[block]] struct IndirectParams {
data: array<u32>;
};
[[block]] struct ValidatedParams {
data: array<u32, 3>;
};
[[group(0), binding(0)]] var<uniform> uniformParams: UniformParams;
[[group(0), binding(1)]] var<storage, read_write> clientParams: IndirectParams;
[[group(0), binding(2)]] var<storage, write> validatedParams: ValidatedParams;
[[stage(compute), workgroup_size(1, 1, 1)]]
fn main() {
for (var i = 0u; i < 3u; i = i + 1u) {
var numWorkgroups = clientParams.data[uniformParams.clientOffsetInU32 + i];
if (numWorkgroups > uniformParams.maxComputeWorkgroupsPerDimension) {
numWorkgroups = 0u;
}
validatedParams.data[i] = numWorkgroups;
}
}
)";
Ref<ShaderModuleBase> shaderModule;
DAWN_TRY_ASSIGN(shaderModule, device->CreateShaderModule(&descriptor));
std::array<BindGroupLayoutEntry, 3> entries;
entries[0].binding = 0;
entries[0].visibility = wgpu::ShaderStage::Compute;
entries[0].buffer.type = wgpu::BufferBindingType::Uniform;
entries[1].binding = 1;
entries[1].visibility = wgpu::ShaderStage::Compute;
entries[1].buffer.type = kInternalStorageBufferBinding;
entries[2].binding = 2;
entries[2].visibility = wgpu::ShaderStage::Compute;
entries[2].buffer.type = wgpu::BufferBindingType::Storage;
BindGroupLayoutDescriptor bindGroupLayoutDescriptor;
bindGroupLayoutDescriptor.entryCount = entries.size();
bindGroupLayoutDescriptor.entries = entries.data();
Ref<BindGroupLayoutBase> bindGroupLayout;
DAWN_TRY_ASSIGN(bindGroupLayout,
device->CreateBindGroupLayout(&bindGroupLayoutDescriptor, true));
PipelineLayoutDescriptor pipelineDescriptor;
pipelineDescriptor.bindGroupLayoutCount = 1;
pipelineDescriptor.bindGroupLayouts = &bindGroupLayout.Get();
Ref<PipelineLayoutBase> pipelineLayout;
DAWN_TRY_ASSIGN(pipelineLayout, device->CreatePipelineLayout(&pipelineDescriptor));
ComputePipelineDescriptor computePipelineDescriptor = {};
computePipelineDescriptor.layout = pipelineLayout.Get();
computePipelineDescriptor.compute.module = shaderModule.Get();
computePipelineDescriptor.compute.entryPoint = "main";
DAWN_TRY_ASSIGN(store->dispatchIndirectValidationPipeline,
device->CreateComputePipeline(&computePipelineDescriptor));
return store->dispatchIndirectValidationPipeline.Get();
}
} // namespace
ComputePassEncoder::ComputePassEncoder(DeviceBase* device,
CommandEncoder* commandEncoder,
EncodingContext* encodingContext)
: ProgrammablePassEncoder(device, encodingContext), mCommandEncoder(commandEncoder) {
}
ComputePassEncoder::ComputePassEncoder(DeviceBase* device,
CommandEncoder* commandEncoder,
EncodingContext* encodingContext,
ErrorTag errorTag)
: ProgrammablePassEncoder(device, encodingContext, errorTag),
mCommandEncoder(commandEncoder) {
}
ComputePassEncoder* ComputePassEncoder::MakeError(DeviceBase* device,
CommandEncoder* commandEncoder,
EncodingContext* encodingContext) {
return new ComputePassEncoder(device, commandEncoder, encodingContext, ObjectBase::kError);
}
ObjectType ComputePassEncoder::GetType() const {
return ObjectType::ComputePassEncoder;
}
void ComputePassEncoder::APIEndPass() {
if (mEncodingContext->TryEncode(
this,
[&](CommandAllocator* allocator) -> MaybeError {
if (IsValidationEnabled()) {
DAWN_TRY(ValidateProgrammableEncoderEnd());
}
allocator->Allocate<EndComputePassCmd>(Command::EndComputePass);
return {};
},
"encoding EndPass()")) {
mEncodingContext->ExitComputePass(this, mUsageTracker.AcquireResourceUsage());
}
}
void ComputePassEncoder::APIDispatch(uint32_t x, uint32_t y, uint32_t z) {
mEncodingContext->TryEncode(
this,
[&](CommandAllocator* allocator) -> MaybeError {
if (IsValidationEnabled()) {
DAWN_TRY(mCommandBufferState.ValidateCanDispatch());
uint32_t workgroupsPerDimension =
GetDevice()->GetLimits().v1.maxComputeWorkgroupsPerDimension;
DAWN_INVALID_IF(
x > workgroupsPerDimension,
"Dispatch size X (%u) exceeds max compute workgroups per dimension (%u).",
x, workgroupsPerDimension);
DAWN_INVALID_IF(
y > workgroupsPerDimension,
"Dispatch size Y (%u) exceeds max compute workgroups per dimension (%u).",
y, workgroupsPerDimension);
DAWN_INVALID_IF(
z > workgroupsPerDimension,
"Dispatch size Z (%u) exceeds max compute workgroups per dimension (%u).",
z, workgroupsPerDimension);
}
// Record the synchronization scope for Dispatch, which is just the current
// bindgroups.
AddDispatchSyncScope();
DispatchCmd* dispatch = allocator->Allocate<DispatchCmd>(Command::Dispatch);
dispatch->x = x;
dispatch->y = y;
dispatch->z = z;
return {};
},
"encoding Dispatch (x: %u, y: %u, z: %u)", x, y, z);
}
ResultOrError<std::pair<Ref<BufferBase>, uint64_t>>
ComputePassEncoder::ValidateIndirectDispatch(BufferBase* indirectBuffer,
uint64_t indirectOffset) {
DeviceBase* device = GetDevice();
auto* const store = device->GetInternalPipelineStore();
Ref<ComputePipelineBase> validationPipeline;
DAWN_TRY_ASSIGN(validationPipeline, GetOrCreateIndirectDispatchValidationPipeline(device));
Ref<BindGroupLayoutBase> layout;
DAWN_TRY_ASSIGN(layout, validationPipeline->GetBindGroupLayout(0));
uint32_t storageBufferOffsetAlignment =
device->GetLimits().v1.minStorageBufferOffsetAlignment;
std::array<BindGroupEntry, 3> bindings;
// Storage binding holding the client's indirect buffer.
BindGroupEntry& clientIndirectBinding = bindings[0];
clientIndirectBinding.binding = 1;
clientIndirectBinding.buffer = indirectBuffer;
// Let the offset be the indirectOffset, aligned down to |storageBufferOffsetAlignment|.
const uint32_t clientOffsetFromAlignedBoundary =
indirectOffset % storageBufferOffsetAlignment;
const uint64_t clientOffsetAlignedDown = indirectOffset - clientOffsetFromAlignedBoundary;
clientIndirectBinding.offset = clientOffsetAlignedDown;
// Let the size of the binding be the additional offset, plus the size.
clientIndirectBinding.size = kDispatchIndirectSize + clientOffsetFromAlignedBoundary;
struct UniformParams {
uint32_t maxComputeWorkgroupsPerDimension;
uint32_t clientOffsetInU32;
};
// Create a uniform buffer to hold parameters for the shader.
Ref<BufferBase> uniformBuffer;
{
BufferDescriptor uniformDesc = {};
uniformDesc.size = sizeof(UniformParams);
uniformDesc.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
uniformDesc.mappedAtCreation = true;
DAWN_TRY_ASSIGN(uniformBuffer, device->CreateBuffer(&uniformDesc));
UniformParams* params = static_cast<UniformParams*>(
uniformBuffer->GetMappedRange(0, sizeof(UniformParams)));
params->maxComputeWorkgroupsPerDimension =
device->GetLimits().v1.maxComputeWorkgroupsPerDimension;
params->clientOffsetInU32 = clientOffsetFromAlignedBoundary / sizeof(uint32_t);
uniformBuffer->Unmap();
}
// Uniform buffer binding pointing to the uniform parameters.
BindGroupEntry& uniformBinding = bindings[1];
uniformBinding.binding = 0;
uniformBinding.buffer = uniformBuffer.Get();
uniformBinding.offset = 0;
uniformBinding.size = sizeof(UniformParams);
// Reserve space in the scratch buffer to hold the validated indirect params.
ScratchBuffer& scratchBuffer = store->scratchIndirectStorage;
DAWN_TRY(scratchBuffer.EnsureCapacity(kDispatchIndirectSize));
Ref<BufferBase> validatedIndirectBuffer = scratchBuffer.GetBuffer();
// Binding for the validated indirect params.
BindGroupEntry& validatedParamsBinding = bindings[2];
validatedParamsBinding.binding = 2;
validatedParamsBinding.buffer = validatedIndirectBuffer.Get();
validatedParamsBinding.offset = 0;
validatedParamsBinding.size = kDispatchIndirectSize;
BindGroupDescriptor bindGroupDescriptor = {};
bindGroupDescriptor.layout = layout.Get();
bindGroupDescriptor.entryCount = bindings.size();
bindGroupDescriptor.entries = bindings.data();
Ref<BindGroupBase> validationBindGroup;
DAWN_TRY_ASSIGN(validationBindGroup, device->CreateBindGroup(&bindGroupDescriptor));
// Issue commands to validate the indirect buffer.
APISetPipeline(validationPipeline.Get());
APISetBindGroup(0, validationBindGroup.Get());
APIDispatch(1);
// Return the new indirect buffer and indirect buffer offset.
return std::make_pair(std::move(validatedIndirectBuffer), uint64_t(0));
}
void ComputePassEncoder::APIDispatchIndirect(BufferBase* indirectBuffer,
uint64_t indirectOffset) {
mEncodingContext->TryEncode(
this,
[&](CommandAllocator* allocator) -> MaybeError {
if (IsValidationEnabled()) {
DAWN_TRY(GetDevice()->ValidateObject(indirectBuffer));
DAWN_TRY(ValidateCanUseAs(indirectBuffer, wgpu::BufferUsage::Indirect));
DAWN_TRY(mCommandBufferState.ValidateCanDispatch());
DAWN_INVALID_IF(indirectOffset % 4 != 0,
"Indirect offset (%u) is not a multiple of 4.", indirectOffset);
DAWN_INVALID_IF(
indirectOffset >= indirectBuffer->GetSize() ||
indirectOffset + kDispatchIndirectSize > indirectBuffer->GetSize(),
"Indirect offset (%u) and dispatch size (%u) exceeds the indirect buffer "
"size (%u).",
indirectOffset, kDispatchIndirectSize, indirectBuffer->GetSize());
}
SyncScopeUsageTracker scope;
scope.BufferUsedAs(indirectBuffer, wgpu::BufferUsage::Indirect);
mUsageTracker.AddReferencedBuffer(indirectBuffer);
// TODO(crbug.com/dawn/1166): If validation is enabled, adding |indirectBuffer|
// is needed for correct usage validation even though it will only be bound for
// storage. This will unecessarily transition the |indirectBuffer| in
// the backend.
Ref<BufferBase> indirectBufferRef = indirectBuffer;
if (IsValidationEnabled()) {
// Save the previous command buffer state so it can be restored after the
// validation inserts additional commands.
CommandBufferStateTracker previousState = mCommandBufferState;
// Validate each indirect dispatch with a single dispatch to copy the indirect
// buffer params into a scratch buffer if they're valid, and otherwise zero them
// out. We could consider moving the validation earlier in the pass after the
// last point the indirect buffer was used with writable usage, as well as batch
// validation for multiple dispatches into one, but inserting commands at
// arbitrary points in the past is not possible right now.
DAWN_TRY_ASSIGN(
std::tie(indirectBufferRef, indirectOffset),
ValidateIndirectDispatch(indirectBufferRef.Get(), indirectOffset));
// Restore the state.
RestoreCommandBufferState(std::move(previousState));
// |indirectBufferRef| was replaced with a scratch buffer. Add it to the
// synchronization scope.
ASSERT(indirectBufferRef.Get() != indirectBuffer);
scope.BufferUsedAs(indirectBufferRef.Get(), wgpu::BufferUsage::Indirect);
mUsageTracker.AddReferencedBuffer(indirectBufferRef.Get());
}
AddDispatchSyncScope(std::move(scope));
DispatchIndirectCmd* dispatch =
allocator->Allocate<DispatchIndirectCmd>(Command::DispatchIndirect);
dispatch->indirectBuffer = std::move(indirectBufferRef);
dispatch->indirectOffset = indirectOffset;
return {};
},
"encoding DispatchIndirect with %s", indirectBuffer);
}
void ComputePassEncoder::APISetPipeline(ComputePipelineBase* pipeline) {
mEncodingContext->TryEncode(
this,
[&](CommandAllocator* allocator) -> MaybeError {
if (IsValidationEnabled()) {
DAWN_TRY(GetDevice()->ValidateObject(pipeline));
}
mCommandBufferState.SetComputePipeline(pipeline);
SetComputePipelineCmd* cmd =
allocator->Allocate<SetComputePipelineCmd>(Command::SetComputePipeline);
cmd->pipeline = pipeline;
return {};
},
"encoding SetPipeline with %s", pipeline);
}
void ComputePassEncoder::APISetBindGroup(uint32_t groupIndexIn,
BindGroupBase* group,
uint32_t dynamicOffsetCount,
const uint32_t* dynamicOffsets) {
mEncodingContext->TryEncode(
this,
[&](CommandAllocator* allocator) -> MaybeError {
BindGroupIndex groupIndex(groupIndexIn);
if (IsValidationEnabled()) {
DAWN_TRY(ValidateSetBindGroup(groupIndex, group, dynamicOffsetCount,
dynamicOffsets));
}
mUsageTracker.AddResourcesReferencedByBindGroup(group);
RecordSetBindGroup(allocator, groupIndex, group, dynamicOffsetCount,
dynamicOffsets);
mCommandBufferState.SetBindGroup(groupIndex, group, dynamicOffsetCount,
dynamicOffsets);
return {};
},
"encoding SetBindGroup with %s at index %u", group, groupIndexIn);
}
void ComputePassEncoder::APIWriteTimestamp(QuerySetBase* querySet, uint32_t queryIndex) {
mEncodingContext->TryEncode(
this,
[&](CommandAllocator* allocator) -> MaybeError {
if (IsValidationEnabled()) {
DAWN_TRY(GetDevice()->ValidateObject(querySet));
DAWN_TRY(ValidateTimestampQuery(querySet, queryIndex));
}
mCommandEncoder->TrackQueryAvailability(querySet, queryIndex);
WriteTimestampCmd* cmd =
allocator->Allocate<WriteTimestampCmd>(Command::WriteTimestamp);
cmd->querySet = querySet;
cmd->queryIndex = queryIndex;
return {};
},
"encoding WriteTimestamp to %s.", querySet);
}
void ComputePassEncoder::AddDispatchSyncScope(SyncScopeUsageTracker scope) {
PipelineLayoutBase* layout = mCommandBufferState.GetPipelineLayout();
for (BindGroupIndex i : IterateBitSet(layout->GetBindGroupLayoutsMask())) {
scope.AddBindGroup(mCommandBufferState.GetBindGroup(i));
}
mUsageTracker.AddDispatch(scope.AcquireSyncScopeUsage());
}
void ComputePassEncoder::RestoreCommandBufferState(CommandBufferStateTracker state) {
// Encode commands for the backend to restore the pipeline and bind groups.
if (state.HasPipeline()) {
APISetPipeline(state.GetComputePipeline());
}
for (BindGroupIndex i(0); i < kMaxBindGroupsTyped; ++i) {
BindGroupBase* bg = state.GetBindGroup(i);
if (bg != nullptr) {
const std::vector<uint32_t>& offsets = state.GetDynamicOffsets(i);
if (offsets.empty()) {
APISetBindGroup(static_cast<uint32_t>(i), bg);
} else {
APISetBindGroup(static_cast<uint32_t>(i), bg, offsets.size(), offsets.data());
}
}
}
// Restore the frontend state tracking information.
mCommandBufferState = std::move(state);
}
CommandBufferStateTracker* ComputePassEncoder::GetCommandBufferStateTrackerForTesting() {
return &mCommandBufferState;
}
} // namespace dawn_native