Use first-fit policy to reduce upload memory.

Reverts CL9160 by replacing the existing policy with one
that re-uses smaller ring-buffers.

Before
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 397865.698113 ns
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 398025.660377 ns
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 438816.754717 ns

After
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 118189.847059 ns
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 116808.235294 ns
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 117133.964706 ns

No change for Vulkan. About 3x faster with D3D.

BUG=dawn:211

Change-Id: Iaa6b0ef50305bf7df482f7e10e92353320039965
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/10441
Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Bryan Bernhart 2019-08-28 16:06:50 +00:00 committed by Commit Bot service account
parent 7ffd2346f8
commit 650859b420
3 changed files with 24 additions and 18 deletions

View File

@ -255,12 +255,8 @@ namespace dawn_native {
DynamicUploader* uploader = nullptr;
DAWN_TRY_ASSIGN(uploader, GetDevice()->GetDynamicUploader());
// TODO(bryan.bernhart@intel.com): Remove once alignment constraint is added to validation
// (dawn:73). D3D12 does not specify so we assume 4-byte alignment to be safe.
static constexpr size_t kDefaultAlignment = 4;
UploadHandle uploadHandle;
DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(count, kDefaultAlignment));
DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(count));
ASSERT(uploadHandle.mappedBuffer != nullptr);
memcpy(uploadHandle.mappedBuffer, data, count);

View File

@ -41,31 +41,41 @@ namespace dawn_native {
return {};
}
ResultOrError<UploadHandle> DynamicUploader::Allocate(uint32_t size, uint32_t alignment) {
ASSERT(IsPowerOfTwo(alignment));
ResultOrError<UploadHandle> DynamicUploader::Allocate(uint32_t size) {
// Note: Validation ensures size is already aligned.
// First-fit: find next smallest buffer large enough to satisfy the allocation request.
RingBuffer* targetRingBuffer = GetLargestBuffer();
for (auto& ringBuffer : mRingBuffers) {
// Prevent overflow.
ASSERT(ringBuffer->GetSize() >= ringBuffer->GetUsedSize());
const size_t remainingSize = ringBuffer->GetSize() - ringBuffer->GetUsedSize();
if (size <= remainingSize) {
targetRingBuffer = ringBuffer.get();
break;
}
}
// Align the requested allocation size
const size_t alignedSize = Align(size, alignment);
RingBuffer* largestRingBuffer = GetLargestBuffer();
UploadHandle uploadHandle = largestRingBuffer->SubAllocate(alignedSize);
UploadHandle uploadHandle = UploadHandle{};
if (targetRingBuffer != nullptr) {
uploadHandle = targetRingBuffer->SubAllocate(size);
}
// Upon failure, append a newly created (and much larger) ring buffer to fulfill the
// request.
if (uploadHandle.mappedBuffer == nullptr) {
// Compute the new max size (in powers of two to preserve alignment).
size_t newMaxSize = largestRingBuffer->GetSize();
size_t newMaxSize = targetRingBuffer->GetSize() * 2;
while (newMaxSize < size) {
newMaxSize *= 2;
}
// TODO(bryan.bernhart@intel.com): Fall-back to no sub-allocations should this fail.
DAWN_TRY(CreateAndAppendBuffer(newMaxSize));
largestRingBuffer = GetLargestBuffer();
uploadHandle = largestRingBuffer->SubAllocate(alignedSize);
targetRingBuffer = GetLargestBuffer();
uploadHandle = targetRingBuffer->SubAllocate(size);
}
uploadHandle.stagingBuffer = largestRingBuffer->GetStagingBuffer();
uploadHandle.stagingBuffer = targetRingBuffer->GetStagingBuffer();
return uploadHandle;
}

View File

@ -29,12 +29,12 @@ namespace dawn_native {
// We add functions to Create/Release StagingBuffers to the DynamicUploader as there's
// currently no place to track the allocated staging buffers such that they're freed after
// pending coommands are finished. This should be changed when better resource allocation is
// pending commands are finished. This should be changed when better resource allocation is
// implemented.
ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size);
void ReleaseStagingBuffer(std::unique_ptr<StagingBufferBase> stagingBuffer);
ResultOrError<UploadHandle> Allocate(uint32_t requiredSize, uint32_t alignment);
ResultOrError<UploadHandle> Allocate(uint32_t size);
void Tick(Serial lastCompletedSerial);
RingBuffer* GetLargestBuffer();