dawn-cmake/src/dawn_native/Queue.cpp

495 lines
21 KiB
C++
Raw Normal View History

// Copyright 2017 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2018-07-24 11:53:51 +00:00
#include "dawn_native/Queue.h"
#include "common/Constants.h"
#include "dawn_native/Buffer.h"
2018-07-24 11:53:51 +00:00
#include "dawn_native/CommandBuffer.h"
#include "dawn_native/CommandEncoder.h"
#include "dawn_native/CommandValidation.h"
#include "dawn_native/Commands.h"
#include "dawn_native/CopyTextureForBrowserHelper.h"
2018-07-24 11:53:51 +00:00
#include "dawn_native/Device.h"
#include "dawn_native/DynamicUploader.h"
#include "dawn_native/ExternalTexture.h"
#include "dawn_native/QuerySet.h"
#include "dawn_native/RenderPassEncoder.h"
#include "dawn_native/RenderPipeline.h"
#include "dawn_native/Texture.h"
#include "dawn_platform/DawnPlatform.h"
#include "dawn_platform/tracing/TraceEvent.h"
#include <cstring>
namespace dawn_native {
namespace {
void CopyTextureData(uint8_t* dstPointer,
const uint8_t* srcPointer,
uint32_t depth,
uint32_t rowsPerImage,
uint64_t imageAdditionalStride,
uint32_t actualBytesPerRow,
uint32_t dstBytesPerRow,
uint32_t srcBytesPerRow) {
bool copyWholeLayer =
actualBytesPerRow == dstBytesPerRow && dstBytesPerRow == srcBytesPerRow;
bool copyWholeData = copyWholeLayer && imageAdditionalStride == 0;
if (!copyWholeLayer) { // copy row by row
for (uint32_t d = 0; d < depth; ++d) {
for (uint32_t h = 0; h < rowsPerImage; ++h) {
memcpy(dstPointer, srcPointer, actualBytesPerRow);
dstPointer += dstBytesPerRow;
srcPointer += srcBytesPerRow;
}
srcPointer += imageAdditionalStride;
}
} else {
uint64_t layerSize = uint64_t(rowsPerImage) * actualBytesPerRow;
if (!copyWholeData) { // copy layer by layer
for (uint32_t d = 0; d < depth; ++d) {
memcpy(dstPointer, srcPointer, layerSize);
dstPointer += layerSize;
srcPointer += layerSize + imageAdditionalStride;
}
} else { // do a single copy
memcpy(dstPointer, srcPointer, layerSize * depth);
}
}
}
ResultOrError<UploadHandle> UploadTextureDataAligningBytesPerRowAndOffset(
DeviceBase* device,
const void* data,
uint32_t alignedBytesPerRow,
uint32_t optimallyAlignedBytesPerRow,
uint32_t alignedRowsPerImage,
const TextureDataLayout& dataLayout,
const TexelBlockInfo& blockInfo,
const Extent3D& writeSizePixel) {
uint64_t newDataSizeBytes;
DAWN_TRY_ASSIGN(
newDataSizeBytes,
ComputeRequiredBytesInCopy(blockInfo, writeSizePixel, optimallyAlignedBytesPerRow,
alignedRowsPerImage));
uint64_t optimalOffsetAlignment =
device->GetOptimalBufferToTextureCopyOffsetAlignment();
ASSERT(IsPowerOfTwo(optimalOffsetAlignment));
ASSERT(IsPowerOfTwo(blockInfo.byteSize));
// We need the offset to be aligned to both optimalOffsetAlignment and blockByteSize,
// since both of them are powers of two, we only need to align to the max value.
uint64_t offsetAlignment =
std::max(optimalOffsetAlignment, uint64_t(blockInfo.byteSize));
UploadHandle uploadHandle;
DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate(
newDataSizeBytes, device->GetPendingCommandSerial(),
offsetAlignment));
ASSERT(uploadHandle.mappedBuffer != nullptr);
uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
srcPointer += dataLayout.offset;
uint32_t dataRowsPerImage = dataLayout.rowsPerImage;
if (dataRowsPerImage == 0) {
dataRowsPerImage = writeSizePixel.height / blockInfo.height;
}
ASSERT(dataRowsPerImage >= alignedRowsPerImage);
uint64_t imageAdditionalStride =
dataLayout.bytesPerRow * (dataRowsPerImage - alignedRowsPerImage);
CopyTextureData(dstPointer, srcPointer, writeSizePixel.depthOrArrayLayers,
alignedRowsPerImage, imageAdditionalStride, alignedBytesPerRow,
optimallyAlignedBytesPerRow, dataLayout.bytesPerRow);
return uploadHandle;
}
struct SubmittedWorkDone : QueueBase::TaskInFlight {
SubmittedWorkDone(WGPUQueueWorkDoneCallback callback, void* userdata)
: mCallback(callback), mUserdata(userdata) {
}
void Finish() override {
ASSERT(mCallback != nullptr);
mCallback(WGPUQueueWorkDoneStatus_Success, mUserdata);
mCallback = nullptr;
}
void HandleDeviceLoss() override {
ASSERT(mCallback != nullptr);
mCallback(WGPUQueueWorkDoneStatus_DeviceLost, mUserdata);
mCallback = nullptr;
}
~SubmittedWorkDone() override = default;
private:
WGPUQueueWorkDoneCallback mCallback = nullptr;
void* mUserdata;
};
class ErrorQueue : public QueueBase {
public:
ErrorQueue(DeviceBase* device) : QueueBase(device, ObjectBase::kError) {
}
private:
MaybeError SubmitImpl(uint32_t commandCount,
CommandBufferBase* const* commands) override {
UNREACHABLE();
}
};
} // namespace
// QueueBase
QueueBase::TaskInFlight::~TaskInFlight() {
}
QueueBase::QueueBase(DeviceBase* device) : ObjectBase(device, kLabelNotImplemented) {
}
QueueBase::QueueBase(DeviceBase* device, ObjectBase::ErrorTag tag) : ObjectBase(device, tag) {
}
QueueBase::~QueueBase() {
ASSERT(mTasksInFlight.Empty());
}
// static
QueueBase* QueueBase::MakeError(DeviceBase* device) {
return new ErrorQueue(device);
}
void QueueBase::APISubmit(uint32_t commandCount, CommandBufferBase* const* commands) {
SubmitInternal(commandCount, commands);
for (uint32_t i = 0; i < commandCount; ++i) {
commands[i]->Destroy();
}
}
void QueueBase::APIOnSubmittedWorkDone(uint64_t signalValue,
WGPUQueueWorkDoneCallback callback,
void* userdata) {
// The error status depends on the type of error so we let the validation function choose it
WGPUQueueWorkDoneStatus status;
if (GetDevice()->ConsumedError(ValidateOnSubmittedWorkDone(signalValue, &status))) {
callback(status, userdata);
return;
}
std::unique_ptr<SubmittedWorkDone> task =
std::make_unique<SubmittedWorkDone>(callback, userdata);
// Technically we only need to wait for previously submitted work but OnSubmittedWorkDone is
// also used to make sure ALL queue work is finished in tests, so we also wait for pending
// commands (this is non-observable outside of tests so it's ok to do deviate a bit from the
// spec).
TrackTask(std::move(task), GetDevice()->GetPendingCommandSerial());
}
void QueueBase::TrackTask(std::unique_ptr<TaskInFlight> task, ExecutionSerial serial) {
mTasksInFlight.Enqueue(std::move(task), serial);
GetDevice()->AddFutureSerial(serial);
}
void QueueBase::Tick(ExecutionSerial finishedSerial) {
// If a user calls Queue::Submit inside a task, for example in a Buffer::MapAsync callback,
// then the device will be ticked, which in turns ticks the queue, causing reentrance here.
// To prevent the reentrant call from invalidating mTasksInFlight while in use by the first
// call, we remove the tasks to finish from the queue, update mTasksInFlight, then run the
// callbacks.
std::vector<std::unique_ptr<TaskInFlight>> tasks;
for (auto& task : mTasksInFlight.IterateUpTo(finishedSerial)) {
tasks.push_back(std::move(task));
}
mTasksInFlight.ClearUpTo(finishedSerial);
for (auto& task : tasks) {
task->Finish();
}
}
void QueueBase::HandleDeviceLoss() {
for (auto& task : mTasksInFlight.IterateAll()) {
task->HandleDeviceLoss();
}
mTasksInFlight.Clear();
}
void QueueBase::APIWriteBuffer(BufferBase* buffer,
uint64_t bufferOffset,
const void* data,
size_t size) {
GetDevice()->ConsumedError(WriteBuffer(buffer, bufferOffset, data, size));
}
MaybeError QueueBase::WriteBuffer(BufferBase* buffer,
uint64_t bufferOffset,
const void* data,
size_t size) {
DAWN_TRY(GetDevice()->ValidateIsAlive());
DAWN_TRY(GetDevice()->ValidateObject(this));
DAWN_TRY(ValidateWriteBuffer(GetDevice(), buffer, bufferOffset, size));
DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
return WriteBufferImpl(buffer, bufferOffset, data, size);
}
MaybeError QueueBase::WriteBufferImpl(BufferBase* buffer,
uint64_t bufferOffset,
const void* data,
size_t size) {
if (size == 0) {
return {};
}
DeviceBase* device = GetDevice();
UploadHandle uploadHandle;
DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate(
size, device->GetPendingCommandSerial(),
kCopyBufferToBufferOffsetAlignment));
ASSERT(uploadHandle.mappedBuffer != nullptr);
memcpy(uploadHandle.mappedBuffer, data, size);
device->AddFutureSerial(device->GetPendingCommandSerial());
return device->CopyFromStagingToBuffer(uploadHandle.stagingBuffer, uploadHandle.startOffset,
buffer, bufferOffset, size);
}
void QueueBase::APIWriteTexture(const ImageCopyTexture* destination,
const void* data,
size_t dataSize,
const TextureDataLayout* dataLayout,
const Extent3D* writeSize) {
GetDevice()->ConsumedError(
WriteTextureInternal(destination, data, dataSize, *dataLayout, writeSize));
}
MaybeError QueueBase::WriteTextureInternal(const ImageCopyTexture* destination,
const void* data,
size_t dataSize,
const TextureDataLayout& dataLayout,
const Extent3D* writeSize) {
DAWN_TRY(ValidateWriteTexture(destination, dataSize, dataLayout, writeSize));
if (writeSize->width == 0 || writeSize->height == 0 || writeSize->depthOrArrayLayers == 0) {
return {};
}
const TexelBlockInfo& blockInfo =
destination->texture->GetFormat().GetAspectInfo(destination->aspect).block;
TextureDataLayout layout = dataLayout;
ApplyDefaultTextureDataLayoutOptions(&layout, blockInfo, *writeSize);
return WriteTextureImpl(*destination, data, layout, *writeSize);
}
MaybeError QueueBase::WriteTextureImpl(const ImageCopyTexture& destination,
const void* data,
const TextureDataLayout& dataLayout,
const Extent3D& writeSizePixel) {
const TexelBlockInfo& blockInfo =
destination.texture->GetFormat().GetAspectInfo(destination.aspect).block;
// We are only copying the part of the data that will appear in the texture.
// Note that validating texture copy range ensures that writeSizePixel->width and
// writeSizePixel->height are multiples of blockWidth and blockHeight respectively.
ASSERT(writeSizePixel.width % blockInfo.width == 0);
ASSERT(writeSizePixel.height % blockInfo.height == 0);
uint32_t alignedBytesPerRow = writeSizePixel.width / blockInfo.width * blockInfo.byteSize;
uint32_t alignedRowsPerImage = writeSizePixel.height / blockInfo.height;
uint32_t optimalBytesPerRowAlignment = GetDevice()->GetOptimalBytesPerRowAlignment();
uint32_t optimallyAlignedBytesPerRow =
Align(alignedBytesPerRow, optimalBytesPerRowAlignment);
UploadHandle uploadHandle;
DAWN_TRY_ASSIGN(uploadHandle,
UploadTextureDataAligningBytesPerRowAndOffset(
GetDevice(), data, alignedBytesPerRow, optimallyAlignedBytesPerRow,
alignedRowsPerImage, dataLayout, blockInfo, writeSizePixel));
TextureDataLayout passDataLayout = dataLayout;
passDataLayout.offset = uploadHandle.startOffset;
passDataLayout.bytesPerRow = optimallyAlignedBytesPerRow;
passDataLayout.rowsPerImage = alignedRowsPerImage;
TextureCopy textureCopy;
textureCopy.texture = destination.texture;
textureCopy.mipLevel = destination.mipLevel;
textureCopy.origin = destination.origin;
textureCopy.aspect = ConvertAspect(destination.texture->GetFormat(), destination.aspect);
DeviceBase* device = GetDevice();
device->AddFutureSerial(device->GetPendingCommandSerial());
return device->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout,
&textureCopy, writeSizePixel);
}
void QueueBase::APICopyTextureForBrowser(const ImageCopyTexture* source,
const ImageCopyTexture* destination,
const Extent3D* copySize,
const CopyTextureForBrowserOptions* options) {
GetDevice()->ConsumedError(
CopyTextureForBrowserInternal(source, destination, copySize, options));
}
MaybeError QueueBase::CopyTextureForBrowserInternal(
const ImageCopyTexture* source,
const ImageCopyTexture* destination,
const Extent3D* copySize,
const CopyTextureForBrowserOptions* options) {
if (GetDevice()->IsValidationEnabled()) {
DAWN_TRY(
ValidateCopyTextureForBrowser(GetDevice(), source, destination, copySize, options));
}
return DoCopyTextureForBrowser(GetDevice(), source, destination, copySize, options);
}
MaybeError QueueBase::ValidateSubmit(uint32_t commandCount,
CommandBufferBase* const* commands) const {
TRACE_EVENT0(GetDevice()->GetPlatform(), Validation, "Queue::ValidateSubmit");
DAWN_TRY(GetDevice()->ValidateObject(this));
for (uint32_t i = 0; i < commandCount; ++i) {
DAWN_TRY(GetDevice()->ValidateObject(commands[i]));
DAWN_TRY(commands[i]->ValidateCanUseInSubmitNow());
const CommandBufferResourceUsage& usages = commands[i]->GetResourceUsages();
for (const SyncScopeResourceUsage& scope : usages.renderPasses) {
Properly implement per-dispatch synchronization scopes. Below are the list of all the individual changes, which are a good order in which to review this CL. Core changes: - Change the tracking in the frontend to produce a synchronization scope per dispatch instead of per compute pass. Some bindgroups might not be part of any synchronization scopes so we also track all the referenced resources on the side so they can be checked during Queue::Submit validation. - Fix clearing in the GL and Metal backends to use the per-dispatch synchronization scopes. - Fix the Vulkan backend to use the per dispatch synchronization scopes to produce the correct pipeline barriers. This allows the removal of previous logic that was subtly incorrect for Indirect buffer. This allows the merging of the Compute and Render DescriptorSetTracker into a single small helper class. - D3D12 changes are similar to Vulkan, but the simplification is just a the suppression of a branch with a lot of code in BindGroupStateTracker. Test changes: - Fixup all the ResourceUsageTracking tests to follow the WebGPU spec for synchronization scopes (fixing a lot of TODOs). - Add additional tests checking that Indirect buffers are not allowed to be used as a writeable storage in the same synchronization scope. - Add tests for Queue::Submit validation correctly taking into account resources that are bound but unused in compute passes. - Add an end2end test for using a buffer as Indirect and Storage at the same time in a DispatchIndirect, which would previously produce incorrect barriers in the Vulkan and D3D12 backends. Other small changes (that I was to lazy to put in a different CL): - Add the utils::MakePipelineLayout helper function. - Fix Indirect not being in the list of readonly buffer usages (caught by a test added in this CL). Bug: dawn:632 Change-Id: I77263c3535a4ba995faccbf26255da9a2f6ed3b5 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/49887 Commit-Queue: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Stephen White <senorblanco@chromium.org>
2021-05-06 19:20:14 +00:00
for (const BufferBase* buffer : scope.buffers) {
DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
}
for (const TextureBase* texture : scope.textures) {
DAWN_TRY(texture->ValidateCanUseInSubmitNow());
}
for (const ExternalTextureBase* externalTexture : scope.externalTextures) {
DAWN_TRY(externalTexture->ValidateCanUseInSubmitNow());
}
}
Properly implement per-dispatch synchronization scopes. Below are the list of all the individual changes, which are a good order in which to review this CL. Core changes: - Change the tracking in the frontend to produce a synchronization scope per dispatch instead of per compute pass. Some bindgroups might not be part of any synchronization scopes so we also track all the referenced resources on the side so they can be checked during Queue::Submit validation. - Fix clearing in the GL and Metal backends to use the per-dispatch synchronization scopes. - Fix the Vulkan backend to use the per dispatch synchronization scopes to produce the correct pipeline barriers. This allows the removal of previous logic that was subtly incorrect for Indirect buffer. This allows the merging of the Compute and Render DescriptorSetTracker into a single small helper class. - D3D12 changes are similar to Vulkan, but the simplification is just a the suppression of a branch with a lot of code in BindGroupStateTracker. Test changes: - Fixup all the ResourceUsageTracking tests to follow the WebGPU spec for synchronization scopes (fixing a lot of TODOs). - Add additional tests checking that Indirect buffers are not allowed to be used as a writeable storage in the same synchronization scope. - Add tests for Queue::Submit validation correctly taking into account resources that are bound but unused in compute passes. - Add an end2end test for using a buffer as Indirect and Storage at the same time in a DispatchIndirect, which would previously produce incorrect barriers in the Vulkan and D3D12 backends. Other small changes (that I was to lazy to put in a different CL): - Add the utils::MakePipelineLayout helper function. - Fix Indirect not being in the list of readonly buffer usages (caught by a test added in this CL). Bug: dawn:632 Change-Id: I77263c3535a4ba995faccbf26255da9a2f6ed3b5 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/49887 Commit-Queue: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Stephen White <senorblanco@chromium.org>
2021-05-06 19:20:14 +00:00
for (const ComputePassResourceUsage& pass : usages.computePasses) {
for (const BufferBase* buffer : pass.referencedBuffers) {
DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
}
for (const TextureBase* texture : pass.referencedTextures) {
DAWN_TRY(texture->ValidateCanUseInSubmitNow());
}
for (const ExternalTextureBase* externalTexture : pass.referencedExternalTextures) {
DAWN_TRY(externalTexture->ValidateCanUseInSubmitNow());
}
}
for (const BufferBase* buffer : usages.topLevelBuffers) {
DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
}
for (const TextureBase* texture : usages.topLevelTextures) {
DAWN_TRY(texture->ValidateCanUseInSubmitNow());
}
for (const QuerySetBase* querySet : usages.usedQuerySets) {
DAWN_TRY(querySet->ValidateCanUseInSubmitNow());
}
}
return {};
}
MaybeError QueueBase::ValidateOnSubmittedWorkDone(uint64_t signalValue,
WGPUQueueWorkDoneStatus* status) const {
*status = WGPUQueueWorkDoneStatus_DeviceLost;
DAWN_TRY(GetDevice()->ValidateIsAlive());
*status = WGPUQueueWorkDoneStatus_Error;
DAWN_TRY(GetDevice()->ValidateObject(this));
if (signalValue != 0) {
return DAWN_VALIDATION_ERROR("SignalValue must currently be 0.");
}
return {};
}
MaybeError QueueBase::ValidateWriteTexture(const ImageCopyTexture* destination,
size_t dataSize,
const TextureDataLayout& dataLayout,
const Extent3D* writeSize) const {
DAWN_TRY(GetDevice()->ValidateIsAlive());
DAWN_TRY(GetDevice()->ValidateObject(this));
DAWN_TRY(GetDevice()->ValidateObject(destination->texture));
DAWN_TRY(ValidateImageCopyTexture(GetDevice(), *destination, *writeSize));
if (dataLayout.offset > dataSize) {
return DAWN_VALIDATION_ERROR("Queue::WriteTexture out of range");
}
if (!(destination->texture->GetUsage() & wgpu::TextureUsage::CopyDst)) {
return DAWN_VALIDATION_ERROR("Texture needs the CopyDst usage bit");
}
if (destination->texture->GetSampleCount() > 1) {
return DAWN_VALIDATION_ERROR("The sample count of textures must be 1");
}
DAWN_TRY(ValidateLinearToDepthStencilCopyRestrictions(*destination));
// We validate texture copy range before validating linear texture data,
// because in the latter we divide copyExtent.width by blockWidth and
// copyExtent.height by blockHeight while the divisibility conditions are
// checked in validating texture copy range.
DAWN_TRY(ValidateTextureCopyRange(GetDevice(), *destination, *writeSize));
const TexelBlockInfo& blockInfo =
destination->texture->GetFormat().GetAspectInfo(destination->aspect).block;
DAWN_TRY(ValidateLinearTextureData(dataLayout, dataSize, blockInfo, *writeSize));
DAWN_TRY(destination->texture->ValidateCanUseInSubmitNow());
return {};
}
void QueueBase::SubmitInternal(uint32_t commandCount, CommandBufferBase* const* commands) {
DeviceBase* device = GetDevice();
if (device->ConsumedError(device->ValidateIsAlive())) {
// If device is lost, don't let any commands be submitted
return;
}
TRACE_EVENT0(device->GetPlatform(), General, "Queue::Submit");
if (device->IsValidationEnabled() &&
device->ConsumedError(ValidateSubmit(commandCount, commands))) {
return;
}
ASSERT(!IsError());
if (device->ConsumedError(SubmitImpl(commandCount, commands))) {
return;
}
}
} // namespace dawn_native