Use first-fit policy to reduce upload memory.
Reverts CL9160 by replacing the existing policy with one that re-uses smaller ring-buffers. Before *RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 397865.698113 ns *RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 398025.660377 ns *RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 438816.754717 ns After *RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 118189.847059 ns *RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 116808.235294 ns *RESULT BufferUploadPerfRun/D3D12_SetSubData: wall_time= 117133.964706 ns No change for Vulkan. About 3x faster with D3D. BUG=dawn:211 Change-Id: Iaa6b0ef50305bf7df482f7e10e92353320039965 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/10441 Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
parent
7ffd2346f8
commit
650859b420
|
@ -255,12 +255,8 @@ namespace dawn_native {
|
|||
DynamicUploader* uploader = nullptr;
|
||||
DAWN_TRY_ASSIGN(uploader, GetDevice()->GetDynamicUploader());
|
||||
|
||||
// TODO(bryan.bernhart@intel.com): Remove once alignment constraint is added to validation
|
||||
// (dawn:73). D3D12 does not specify so we assume 4-byte alignment to be safe.
|
||||
static constexpr size_t kDefaultAlignment = 4;
|
||||
|
||||
UploadHandle uploadHandle;
|
||||
DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(count, kDefaultAlignment));
|
||||
DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(count));
|
||||
ASSERT(uploadHandle.mappedBuffer != nullptr);
|
||||
|
||||
memcpy(uploadHandle.mappedBuffer, data, count);
|
||||
|
|
|
@ -41,31 +41,41 @@ namespace dawn_native {
|
|||
return {};
|
||||
}
|
||||
|
||||
ResultOrError<UploadHandle> DynamicUploader::Allocate(uint32_t size, uint32_t alignment) {
|
||||
ASSERT(IsPowerOfTwo(alignment));
|
||||
ResultOrError<UploadHandle> DynamicUploader::Allocate(uint32_t size) {
|
||||
// Note: Validation ensures size is already aligned.
|
||||
// First-fit: find next smallest buffer large enough to satisfy the allocation request.
|
||||
RingBuffer* targetRingBuffer = GetLargestBuffer();
|
||||
for (auto& ringBuffer : mRingBuffers) {
|
||||
// Prevent overflow.
|
||||
ASSERT(ringBuffer->GetSize() >= ringBuffer->GetUsedSize());
|
||||
const size_t remainingSize = ringBuffer->GetSize() - ringBuffer->GetUsedSize();
|
||||
if (size <= remainingSize) {
|
||||
targetRingBuffer = ringBuffer.get();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Align the requested allocation size
|
||||
const size_t alignedSize = Align(size, alignment);
|
||||
|
||||
RingBuffer* largestRingBuffer = GetLargestBuffer();
|
||||
UploadHandle uploadHandle = largestRingBuffer->SubAllocate(alignedSize);
|
||||
UploadHandle uploadHandle = UploadHandle{};
|
||||
if (targetRingBuffer != nullptr) {
|
||||
uploadHandle = targetRingBuffer->SubAllocate(size);
|
||||
}
|
||||
|
||||
// Upon failure, append a newly created (and much larger) ring buffer to fulfill the
|
||||
// request.
|
||||
if (uploadHandle.mappedBuffer == nullptr) {
|
||||
// Compute the new max size (in powers of two to preserve alignment).
|
||||
size_t newMaxSize = largestRingBuffer->GetSize();
|
||||
size_t newMaxSize = targetRingBuffer->GetSize() * 2;
|
||||
while (newMaxSize < size) {
|
||||
newMaxSize *= 2;
|
||||
}
|
||||
|
||||
// TODO(bryan.bernhart@intel.com): Fall-back to no sub-allocations should this fail.
|
||||
DAWN_TRY(CreateAndAppendBuffer(newMaxSize));
|
||||
largestRingBuffer = GetLargestBuffer();
|
||||
uploadHandle = largestRingBuffer->SubAllocate(alignedSize);
|
||||
targetRingBuffer = GetLargestBuffer();
|
||||
uploadHandle = targetRingBuffer->SubAllocate(size);
|
||||
}
|
||||
|
||||
uploadHandle.stagingBuffer = largestRingBuffer->GetStagingBuffer();
|
||||
uploadHandle.stagingBuffer = targetRingBuffer->GetStagingBuffer();
|
||||
|
||||
return uploadHandle;
|
||||
}
|
||||
|
|
|
@ -29,12 +29,12 @@ namespace dawn_native {
|
|||
|
||||
// We add functions to Create/Release StagingBuffers to the DynamicUploader as there's
|
||||
// currently no place to track the allocated staging buffers such that they're freed after
|
||||
// pending coommands are finished. This should be changed when better resource allocation is
|
||||
// pending commands are finished. This should be changed when better resource allocation is
|
||||
// implemented.
|
||||
ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size);
|
||||
void ReleaseStagingBuffer(std::unique_ptr<StagingBufferBase> stagingBuffer);
|
||||
|
||||
ResultOrError<UploadHandle> Allocate(uint32_t requiredSize, uint32_t alignment);
|
||||
ResultOrError<UploadHandle> Allocate(uint32_t size);
|
||||
void Tick(Serial lastCompletedSerial);
|
||||
|
||||
RingBuffer* GetLargestBuffer();
|
||||
|
|
Loading…
Reference in New Issue