Metal: Introduce a proper BufferUploader and PendingCommands

Previously the Metal backend used a manual mutex system to make sure the
BufferSetSubData didn't have data races with reads from the GPU. Replace
this with a non-hacky version
 - Make the Buffer objects allocated on the GPU
 - Make SetSubData use a ResourceUploader that allocates a CPU buffer
   and schedules a CPU->GPU copy.
 - Have a list of pending commands and a finished command serial to
   order operations and track when resource become unused.
This commit is contained in:
Corentin Wallez 2017-06-14 16:40:47 -04:00 committed by Corentin Wallez
parent 0ba5550874
commit 6cb33ef24e
9 changed files with 179 additions and 44 deletions

View File

@ -157,6 +157,8 @@ if (APPLE)
${METAL_DIR}/PipelineMTL.h
${METAL_DIR}/PipelineLayoutMTL.mm
${METAL_DIR}/PipelineLayoutMTL.h
${METAL_DIR}/ResourceUploader.mm
${METAL_DIR}/ResourceUploader.h
${METAL_DIR}/SamplerMTL.mm
${METAL_DIR}/SamplerMTL.h
${METAL_DIR}/ShaderModuleMTL.mm

View File

@ -19,8 +19,6 @@
#import <Metal/Metal.h>
#include <mutex>
namespace backend {
namespace metal {
@ -30,7 +28,6 @@ namespace metal {
~Buffer();
id<MTLBuffer> GetMTLBuffer();
std::mutex& GetMutex();
private:
void SetSubDataImpl(uint32_t start, uint32_t count, const uint32_t* data) override;
@ -38,7 +35,6 @@ namespace metal {
void UnmapImpl() override;
void TransitionUsageImpl(nxt::BufferUsageBit currentUsage, nxt::BufferUsageBit targetUsage) override;
std::mutex mutex;
id<MTLBuffer> mtlBuffer = nil;
};

View File

@ -15,6 +15,7 @@
#include "BufferMTL.h"
#include "MetalBackend.h"
#include "ResourceUploader.h"
namespace backend {
namespace metal {
@ -22,11 +23,10 @@ namespace metal {
Buffer::Buffer(BufferBuilder* builder)
: BufferBase(builder) {
mtlBuffer = [ToBackend(GetDevice())->GetMTLDevice() newBufferWithLength:GetSize()
options:MTLResourceStorageModeManaged];
options:MTLResourceStorageModePrivate];
}
Buffer::~Buffer() {
std::lock_guard<std::mutex> lock(mutex);
[mtlBuffer release];
mtlBuffer = nil;
}
@ -35,17 +35,9 @@ namespace metal {
return mtlBuffer;
}
std::mutex& Buffer::GetMutex() {
return mutex;
}
void Buffer::SetSubDataImpl(uint32_t start, uint32_t count, const uint32_t* data) {
uint32_t* dest = reinterpret_cast<uint32_t*>([mtlBuffer contents]);
{
std::lock_guard<std::mutex> lock(mutex);
memcpy(&dest[start], data, count * sizeof(uint32_t));
}
[mtlBuffer didModifyRange:NSMakeRange(start * sizeof(uint32_t), count * sizeof(uint32_t))];
auto* uploader = ToBackend(GetDevice())->GetResourceUploader();
uploader->BufferSubData(mtlBuffer, start * sizeof(uint32_t), count * sizeof(uint32_t), data);
}
void Buffer::MapReadAsyncImpl(uint32_t serial, uint32_t start, uint32_t count) {

View File

@ -19,9 +19,6 @@
#import <Metal/Metal.h>
#include <mutex>
#include <unordered_set>
namespace backend {
namespace metal {
@ -32,7 +29,7 @@ namespace metal {
CommandBuffer(Device* device, CommandBufferBuilder* builder);
~CommandBuffer();
void FillCommands(id<MTLCommandBuffer> commandBuffer, std::unordered_set<std::mutex*>* mutexes);
void FillCommands(id<MTLCommandBuffer> commandBuffer);
private:
Device* device;

View File

@ -126,7 +126,7 @@ namespace metal {
FreeCommands(&commands);
}
void CommandBuffer::FillCommands(id<MTLCommandBuffer> commandBuffer, std::unordered_set<std::mutex*>* mutexes) {
void CommandBuffer::FillCommands(id<MTLCommandBuffer> commandBuffer) {
Command type;
Pipeline* lastPipeline = nullptr;
id<MTLBuffer> indexBuffer = nil;
@ -339,7 +339,6 @@ namespace metal {
{
BufferView* view = ToBackend(group->GetBindingAsBufferView(binding));
auto b = ToBackend(view->GetBuffer());
mutexes->insert(&b->GetMutex());
const id<MTLBuffer> buffer = b->GetMTLBuffer();
const NSUInteger offset = view->GetOffset();
if (vertStage) {
@ -414,7 +413,6 @@ namespace metal {
{
SetIndexBufferCmd* cmd = commands.NextCommand<SetIndexBufferCmd>();
auto b = ToBackend(cmd->buffer.Get());
mutexes->insert(&b->GetMutex());
indexBuffer = b->GetMTLBuffer();
indexBufferOffset = cmd->offset;
indexType = IndexFormatType(cmd->format);
@ -436,7 +434,6 @@ namespace metal {
// a NXT API primitive to avoid reconstructing this array?
for (uint32_t i = 0; i < cmd->count; ++i) {
Buffer* buffer = ToBackend(buffers[i].Get());
mutexes->insert(&buffer->GetMutex());
mtlBuffers[i] = buffer->GetMTLBuffer();
mtlOffsets[i] = offsets[i];
}

View File

@ -75,6 +75,8 @@ namespace metal {
return ToBackendBase<MetalBackendTraits>(common);
}
class ResourceUploader;
class Device : public DeviceBase {
public:
Device(id<MTLDevice> mtlDevice);
@ -106,17 +108,30 @@ namespace metal {
id<MTLTexture> GetCurrentTexture();
id<MTLTexture> GetCurrentDepthTexture();
id<MTLCommandBuffer> GetPendingCommandBuffer();
void SubmitPendingCommandBuffer();
Serial GetPendingCommandSerial();
ResourceUploader* GetResourceUploader() const;
// NXT API
void Reference();
void Release();
private:
void OnCompletedHandler();
id<MTLDevice> mtlDevice = nil;
id<MTLCommandQueue> commandQueue = nil;
ResourceUploader* resourceUploader;
id<CAMetalDrawable> currentDrawable = nil;
id<MTLTexture> currentTexture = nil;
id<MTLTexture> currentDepthTexture = nil;
Serial finishedCommandSerial = 0;
Serial pendingCommandSerial = 1;
id<MTLCommandBuffer> pendingCommands = nil;
};
class BindGroup : public BindGroupBase {

View File

@ -22,6 +22,7 @@
#include "InputStateMTL.h"
#include "PipelineMTL.h"
#include "PipelineLayoutMTL.h"
#include "ResourceUploader.h"
#include "SamplerMTL.h"
#include "ShaderModuleMTL.h"
#include "TextureMTL.h"
@ -50,12 +51,19 @@ namespace metal {
// Device
Device::Device(id<MTLDevice> mtlDevice) : mtlDevice(mtlDevice) {
Device::Device(id<MTLDevice> mtlDevice)
: mtlDevice(mtlDevice), resourceUploader(new ResourceUploader(this)) {
[mtlDevice retain];
commandQueue = [mtlDevice newCommandQueue];
}
Device::~Device() {
[pendingCommands release];
pendingCommands = nil;
delete resourceUploader;
resourceUploader = nullptr;
[mtlDevice release];
mtlDevice = nil;
@ -119,6 +127,11 @@ namespace metal {
}
void Device::TickImpl() {
resourceUploader->Tick(finishedCommandSerial);
// Code above might have added GPU work, submit it. This also makes sure
// that even when no GPU work is happening, the serial number keeps incrementing.
SubmitPendingCommandBuffer();
}
void Device::SetNextDrawable(id<CAMetalDrawable> drawable) {
@ -183,6 +196,44 @@ namespace metal {
return currentDepthTexture;
}
id<MTLCommandBuffer> Device::GetPendingCommandBuffer() {
if (pendingCommands == nil) {
pendingCommands = [commandQueue commandBuffer];
}
return pendingCommands;
}
void Device::SubmitPendingCommandBuffer() {
if (pendingCommands == nil) {
return;
}
// Ok, ObjC blocks are weird. My understanding is that local variables are captured by value
// so this-> works as expected. However it is unclear how members are captured, (are they
// captured using this-> or by value?) so we make a copy of the pendingCommandSerial on the stack.
Serial pendingSerial = pendingCommandSerial;
[pendingCommands addCompletedHandler:^(id<MTLCommandBuffer> commandBuffer) {
this->finishedCommandSerial = pendingSerial;
}];
[pendingCommands commit];
pendingCommands = nil;
pendingCommandSerial ++;
}
uint64_t Device::GetPendingCommandSerial() {
// If this is called, then it means some piece of code somewhere will wait for this serial to
// complete. Make sure the pending command buffer is created so that it is on the worst case
// enqueued on the next Tick() and eventually increments the serial. Otherwise if no GPU work
// happens we could be waiting for this serial forever.
GetPendingCommandBuffer();
return pendingCommandSerial;
}
ResourceUploader* Device::GetResourceUploader() const {
return resourceUploader;
}
void Device::Reference() {
}
@ -227,29 +278,14 @@ namespace metal {
}
void Queue::Submit(uint32_t numCommands, CommandBuffer* const * commands) {
id<MTLCommandBuffer> commandBuffer = [commandQueue commandBuffer];
// Mutexes are necessary to prevent buffers from being written from the
// CPU before their previous value has been read from the GPU.
// https://developer.apple.com/library/content/documentation/3DDrawing/Conceptual/MTLBestPracticesGuide/TripleBuffering.html
// TODO(kainino@chromium.org): When we have resource transitions, all of these mutexes will be replaced.
std::unordered_set<std::mutex*> mutexes;
Device* device = ToBackend(GetDevice());
id<MTLCommandBuffer> commandBuffer = device->GetPendingCommandBuffer();
for (uint32_t i = 0; i < numCommands; ++i) {
commands[i]->FillCommands(commandBuffer, &mutexes);
commands[i]->FillCommands(commandBuffer);
}
for (auto mutex : mutexes) {
mutex->lock();
}
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> commandBuffer) {
// 'mutexes' is copied into this Block
for (auto mutex : mutexes) {
mutex->unlock();
}
}];
[commandBuffer commit];
device->SubmitPendingCommandBuffer();
}
// RenderPass

View File

@ -0,0 +1,44 @@
// Copyright 2017 The NXT Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BACKEND_METAL_RESOURCEUPLOADER_H_
#define BACKEND_METAL_RESOURCEUPLOADER_H_
#include "common/SerialQueue.h"
#import <Metal/Metal.h>
namespace backend {
namespace metal {
class Device;
class ResourceUploader {
public:
ResourceUploader(Device* device);
~ResourceUploader();
void BufferSubData(id<MTLBuffer> buffer, uint32_t start, uint32_t size, const void* data);
void Tick(Serial finishedSerial);
private:
Device* device;
SerialQueue<id<MTLBuffer>> inflightUploadBuffers;
};
}
}
#endif // BACKEND_METAL_RESOURCEUPLOADER_H_

View File

@ -0,0 +1,56 @@
// Copyright 2017 The NXT Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ResourceUploader.h"
#include "MetalBackend.h"
namespace backend {
namespace metal {
ResourceUploader::ResourceUploader(Device* device)
: device(device) {
}
ResourceUploader::~ResourceUploader() {
ASSERT(inflightUploadBuffers.Empty());
}
void ResourceUploader::BufferSubData(id<MTLBuffer> buffer, uint32_t start, uint32_t size, const void* data) {
// TODO(cwallez@chromium.org) use a ringbuffer instead of creating a small buffer for each update
id<MTLBuffer> uploadBuffer = [device->GetMTLDevice() newBufferWithLength:size
options:MTLResourceStorageModeShared];
memcpy([uploadBuffer contents], data, size);
id<MTLCommandBuffer> commandBuffer = device->GetPendingCommandBuffer();
id<MTLBlitCommandEncoder> encoder = [commandBuffer blitCommandEncoder];
[encoder copyFromBuffer:uploadBuffer
sourceOffset:0
toBuffer:buffer
destinationOffset:start
size:size];
[encoder endEncoding];
inflightUploadBuffers.Enqueue(uploadBuffer, device->GetPendingCommandSerial());
}
void ResourceUploader::Tick(Serial finishedSerial) {
for (id<MTLBuffer> buffer : inflightUploadBuffers.IterateUpTo(finishedSerial)) {
[buffer release];
}
inflightUploadBuffers.ClearUpTo(finishedSerial);
}
}
}