Use first-fit policy to reduce upload memory.

Reverts CL9160 by replacing the existing policy with one
that re-uses smaller ring-buffers.

Before
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 397865.698113 ns
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 398025.660377 ns
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 438816.754717 ns

After
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 118189.847059 ns
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 116808.235294 ns
*RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 117133.964706 ns

No change for Vulkan. About 3x faster with D3D.

BUG=dawn:211

Change-Id: Iaa6b0ef50305bf7df482f7e10e92353320039965
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/10441
Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Bryan Bernhart 2019-08-28 16:06:50 +00:00 committed by Commit Bot service account
parent 7ffd2346f8
commit 650859b420
3 changed files with 24 additions and 18 deletions

View File

@ -255,12 +255,8 @@ namespace dawn_native {
DynamicUploader* uploader = nullptr; DynamicUploader* uploader = nullptr;
DAWN_TRY_ASSIGN(uploader, GetDevice()->GetDynamicUploader()); DAWN_TRY_ASSIGN(uploader, GetDevice()->GetDynamicUploader());
// TODO(bryan.bernhart@intel.com): Remove once alignment constraint is added to validation
// (dawn:73). D3D12 does not specify so we assume 4-byte alignment to be safe.
static constexpr size_t kDefaultAlignment = 4;
UploadHandle uploadHandle; UploadHandle uploadHandle;
DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(count, kDefaultAlignment)); DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(count));
ASSERT(uploadHandle.mappedBuffer != nullptr); ASSERT(uploadHandle.mappedBuffer != nullptr);
memcpy(uploadHandle.mappedBuffer, data, count); memcpy(uploadHandle.mappedBuffer, data, count);

View File

@ -41,31 +41,41 @@ namespace dawn_native {
return {}; return {};
} }
ResultOrError<UploadHandle> DynamicUploader::Allocate(uint32_t size, uint32_t alignment) { ResultOrError<UploadHandle> DynamicUploader::Allocate(uint32_t size) {
ASSERT(IsPowerOfTwo(alignment)); // Note: Validation ensures size is already aligned.
// First-fit: find next smallest buffer large enough to satisfy the allocation request.
RingBuffer* targetRingBuffer = GetLargestBuffer();
for (auto& ringBuffer : mRingBuffers) {
// Prevent overflow.
ASSERT(ringBuffer->GetSize() >= ringBuffer->GetUsedSize());
const size_t remainingSize = ringBuffer->GetSize() - ringBuffer->GetUsedSize();
if (size <= remainingSize) {
targetRingBuffer = ringBuffer.get();
break;
}
}
// Align the requested allocation size UploadHandle uploadHandle = UploadHandle{};
const size_t alignedSize = Align(size, alignment); if (targetRingBuffer != nullptr) {
uploadHandle = targetRingBuffer->SubAllocate(size);
RingBuffer* largestRingBuffer = GetLargestBuffer(); }
UploadHandle uploadHandle = largestRingBuffer->SubAllocate(alignedSize);
// Upon failure, append a newly created (and much larger) ring buffer to fulfill the // Upon failure, append a newly created (and much larger) ring buffer to fulfill the
// request. // request.
if (uploadHandle.mappedBuffer == nullptr) { if (uploadHandle.mappedBuffer == nullptr) {
// Compute the new max size (in powers of two to preserve alignment). // Compute the new max size (in powers of two to preserve alignment).
size_t newMaxSize = largestRingBuffer->GetSize(); size_t newMaxSize = targetRingBuffer->GetSize() * 2;
while (newMaxSize < size) { while (newMaxSize < size) {
newMaxSize *= 2; newMaxSize *= 2;
} }
// TODO(bryan.bernhart@intel.com): Fall-back to no sub-allocations should this fail. // TODO(bryan.bernhart@intel.com): Fall-back to no sub-allocations should this fail.
DAWN_TRY(CreateAndAppendBuffer(newMaxSize)); DAWN_TRY(CreateAndAppendBuffer(newMaxSize));
largestRingBuffer = GetLargestBuffer(); targetRingBuffer = GetLargestBuffer();
uploadHandle = largestRingBuffer->SubAllocate(alignedSize); uploadHandle = targetRingBuffer->SubAllocate(size);
} }
uploadHandle.stagingBuffer = largestRingBuffer->GetStagingBuffer(); uploadHandle.stagingBuffer = targetRingBuffer->GetStagingBuffer();
return uploadHandle; return uploadHandle;
} }

View File

@ -29,12 +29,12 @@ namespace dawn_native {
// We add functions to Create/Release StagingBuffers to the DynamicUploader as there's // We add functions to Create/Release StagingBuffers to the DynamicUploader as there's
// currently no place to track the allocated staging buffers such that they're freed after // currently no place to track the allocated staging buffers such that they're freed after
// pending coommands are finished. This should be changed when better resource allocation is // pending commands are finished. This should be changed when better resource allocation is
// implemented. // implemented.
ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size); ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size);
void ReleaseStagingBuffer(std::unique_ptr<StagingBufferBase> stagingBuffer); void ReleaseStagingBuffer(std::unique_ptr<StagingBufferBase> stagingBuffer);
ResultOrError<UploadHandle> Allocate(uint32_t requiredSize, uint32_t alignment); ResultOrError<UploadHandle> Allocate(uint32_t size);
void Tick(Serial lastCompletedSerial); void Tick(Serial lastCompletedSerial);
RingBuffer* GetLargestBuffer(); RingBuffer* GetLargestBuffer();