Inline CommandAllocator/Iterator
Inlining these hot functions decreases CPU time in perf tests for DrawCallPerf.Run/Vulkan by roughly 12% (55 to 47ns) and increases binary size by about 0.16% (~4kB). Bug: dawn:304 Change-Id: I84e5d011defe88d6f1492dcb54e421c3d1bf099f Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/14000 Commit-Queue: Austin Eng <enga@chromium.org> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
parent
56b12422da
commit
ff8b3f4397
|
@ -61,6 +61,9 @@
|
|||
# endif
|
||||
|
||||
# define DAWN_DECLARE_UNUSED __attribute__((unused))
|
||||
# if defined(NDEBUG)
|
||||
# define DAWN_FORCE_INLINE inline __attribute__((always_inline))
|
||||
# endif
|
||||
|
||||
// MSVC
|
||||
#elif defined(_MSC_VER)
|
||||
|
@ -77,6 +80,9 @@ extern void __cdecl __debugbreak(void);
|
|||
# endif
|
||||
|
||||
# define DAWN_DECLARE_UNUSED
|
||||
# if defined(NDEBUG)
|
||||
# define DAWN_FORCE_INLINE __forceinline
|
||||
# endif
|
||||
|
||||
#else
|
||||
# error "Unsupported compiler"
|
||||
|
@ -97,5 +103,8 @@ extern void __cdecl __debugbreak(void);
|
|||
#if !defined(DAWN_NO_DISCARD)
|
||||
# define DAWN_NO_DISCARD
|
||||
#endif
|
||||
#if !defined(DAWN_FORCE_INLINE)
|
||||
# define DAWN_FORCE_INLINE inline
|
||||
#endif
|
||||
|
||||
#endif // COMMON_COMPILER_H_
|
||||
|
|
|
@ -85,13 +85,6 @@ bool IsPtrAligned(const void* ptr, size_t alignment) {
|
|||
return (reinterpret_cast<size_t>(ptr) & (alignment - 1)) == 0;
|
||||
}
|
||||
|
||||
void* AlignVoidPtr(void* ptr, size_t alignment) {
|
||||
ASSERT(IsPowerOfTwo(alignment));
|
||||
ASSERT(alignment != 0);
|
||||
return reinterpret_cast<void*>((reinterpret_cast<size_t>(ptr) + (alignment - 1)) &
|
||||
~(alignment - 1));
|
||||
}
|
||||
|
||||
bool IsAligned(uint32_t value, size_t alignment) {
|
||||
ASSERT(alignment <= UINT32_MAX);
|
||||
ASSERT(IsPowerOfTwo(alignment));
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#ifndef COMMON_MATH_H_
|
||||
#define COMMON_MATH_H_
|
||||
|
||||
#include "common/Assert.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
@ -35,13 +37,19 @@ bool IsAligned(uint32_t value, size_t alignment);
|
|||
uint32_t Align(uint32_t value, size_t alignment);
|
||||
|
||||
template <typename T>
|
||||
T* AlignPtr(T* ptr, size_t alignment) {
|
||||
return static_cast<T*>(AlignVoidPtr(ptr, alignment));
|
||||
DAWN_FORCE_INLINE T* AlignPtr(T* ptr, size_t alignment) {
|
||||
ASSERT(IsPowerOfTwo(alignment));
|
||||
ASSERT(alignment != 0);
|
||||
return reinterpret_cast<T*>((reinterpret_cast<size_t>(ptr) + (alignment - 1)) &
|
||||
~(alignment - 1));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T* AlignPtr(const T* ptr, size_t alignment) {
|
||||
return static_cast<const T*>(AlignVoidPtr(const_cast<T*>(ptr), alignment));
|
||||
DAWN_FORCE_INLINE const T* AlignPtr(const T* ptr, size_t alignment) {
|
||||
ASSERT(IsPowerOfTwo(alignment));
|
||||
ASSERT(alignment != 0);
|
||||
return reinterpret_cast<const T*>((reinterpret_cast<size_t>(ptr) + (alignment - 1)) &
|
||||
~(alignment - 1));
|
||||
}
|
||||
|
||||
template <typename destType, typename sourceType>
|
||||
|
|
|
@ -23,12 +23,9 @@
|
|||
|
||||
namespace dawn_native {
|
||||
|
||||
constexpr uint32_t EndOfBlock = UINT_MAX; // std::numeric_limits<uint32_t>::max();
|
||||
constexpr uint32_t AdditionalData = UINT_MAX - 1; // std::numeric_limits<uint32_t>::max() - 1;
|
||||
|
||||
// TODO(cwallez@chromium.org): figure out a way to have more type safety for the iterator
|
||||
|
||||
CommandIterator::CommandIterator() : mEndOfBlock(EndOfBlock) {
|
||||
CommandIterator::CommandIterator() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
@ -42,7 +39,7 @@ namespace dawn_native {
|
|||
}
|
||||
}
|
||||
|
||||
CommandIterator::CommandIterator(CommandIterator&& other) : mEndOfBlock(EndOfBlock) {
|
||||
CommandIterator::CommandIterator(CommandIterator&& other) {
|
||||
if (!other.IsEmpty()) {
|
||||
mBlocks = std::move(other.mBlocks);
|
||||
other.Reset();
|
||||
|
@ -64,7 +61,7 @@ namespace dawn_native {
|
|||
}
|
||||
|
||||
CommandIterator::CommandIterator(CommandAllocator&& allocator)
|
||||
: mBlocks(allocator.AcquireBlocks()), mEndOfBlock(EndOfBlock) {
|
||||
: mBlocks(allocator.AcquireBlocks()) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
@ -74,6 +71,17 @@ namespace dawn_native {
|
|||
return *this;
|
||||
}
|
||||
|
||||
bool CommandIterator::NextCommandIdInNewBlock(uint32_t* commandId) {
|
||||
mCurrentBlock++;
|
||||
if (mCurrentBlock >= mBlocks.size()) {
|
||||
Reset();
|
||||
*commandId = detail::kEndOfBlock;
|
||||
return false;
|
||||
}
|
||||
mCurrentPtr = AlignPtr(mBlocks[mCurrentBlock].block, alignof(uint32_t));
|
||||
return NextCommandId(commandId);
|
||||
}
|
||||
|
||||
void CommandIterator::Reset() {
|
||||
mCurrentBlock = 0;
|
||||
|
||||
|
@ -97,47 +105,6 @@ namespace dawn_native {
|
|||
return mBlocks[0].block == reinterpret_cast<const uint8_t*>(&mEndOfBlock);
|
||||
}
|
||||
|
||||
bool CommandIterator::NextCommandId(uint32_t* commandId) {
|
||||
uint8_t* idPtr = AlignPtr(mCurrentPtr, alignof(uint32_t));
|
||||
ASSERT(idPtr + sizeof(uint32_t) <=
|
||||
mBlocks[mCurrentBlock].block + mBlocks[mCurrentBlock].size);
|
||||
|
||||
uint32_t id = *reinterpret_cast<uint32_t*>(idPtr);
|
||||
|
||||
if (id == EndOfBlock) {
|
||||
mCurrentBlock++;
|
||||
if (mCurrentBlock >= mBlocks.size()) {
|
||||
Reset();
|
||||
*commandId = EndOfBlock;
|
||||
return false;
|
||||
}
|
||||
mCurrentPtr = AlignPtr(mBlocks[mCurrentBlock].block, alignof(uint32_t));
|
||||
return NextCommandId(commandId);
|
||||
}
|
||||
|
||||
mCurrentPtr = idPtr + sizeof(uint32_t);
|
||||
*commandId = id;
|
||||
return true;
|
||||
}
|
||||
|
||||
void* CommandIterator::NextCommand(size_t commandSize, size_t commandAlignment) {
|
||||
uint8_t* commandPtr = AlignPtr(mCurrentPtr, commandAlignment);
|
||||
ASSERT(commandPtr + sizeof(commandSize) <=
|
||||
mBlocks[mCurrentBlock].block + mBlocks[mCurrentBlock].size);
|
||||
|
||||
mCurrentPtr = commandPtr + commandSize;
|
||||
return commandPtr;
|
||||
}
|
||||
|
||||
void* CommandIterator::NextData(size_t dataSize, size_t dataAlignment) {
|
||||
uint32_t id;
|
||||
bool hasId = NextCommandId(&id);
|
||||
ASSERT(hasId);
|
||||
ASSERT(id == AdditionalData);
|
||||
|
||||
return NextCommand(dataSize, dataAlignment);
|
||||
}
|
||||
|
||||
// Potential TODO(cwallez@chromium.org):
|
||||
// - Host the size and pointer to next block in the block itself to avoid having an allocation
|
||||
// in the vector
|
||||
|
@ -161,60 +128,23 @@ namespace dawn_native {
|
|||
ASSERT(mCurrentPtr != nullptr && mEndPtr != nullptr);
|
||||
ASSERT(IsPtrAligned(mCurrentPtr, alignof(uint32_t)));
|
||||
ASSERT(mCurrentPtr + sizeof(uint32_t) <= mEndPtr);
|
||||
*reinterpret_cast<uint32_t*>(mCurrentPtr) = EndOfBlock;
|
||||
*reinterpret_cast<uint32_t*>(mCurrentPtr) = detail::kEndOfBlock;
|
||||
|
||||
mCurrentPtr = nullptr;
|
||||
mEndPtr = nullptr;
|
||||
return std::move(mBlocks);
|
||||
}
|
||||
|
||||
uint8_t* CommandAllocator::Allocate(uint32_t commandId,
|
||||
uint8_t* CommandAllocator::AllocateInNewBlock(uint32_t commandId,
|
||||
size_t commandSize,
|
||||
size_t commandAlignment) {
|
||||
ASSERT(mCurrentPtr != nullptr);
|
||||
ASSERT(mEndPtr != nullptr);
|
||||
ASSERT(commandId != EndOfBlock);
|
||||
|
||||
// It should always be possible to allocate one id, for EndOfBlock tagging,
|
||||
ASSERT(IsPtrAligned(mCurrentPtr, alignof(uint32_t)));
|
||||
ASSERT(mEndPtr >= mCurrentPtr);
|
||||
ASSERT(static_cast<size_t>(mEndPtr - mCurrentPtr) >= sizeof(uint32_t));
|
||||
|
||||
// The memory after the ID will contain the following:
|
||||
// - the current ID
|
||||
// - padding to align the command, maximum kMaxSupportedAlignment
|
||||
// - the command of size commandSize
|
||||
// - padding to align the next ID, maximum alignof(uint32_t)
|
||||
// - the next ID of size sizeof(uint32_t)
|
||||
//
|
||||
// To avoid checking for overflows at every step of the computations we compute an upper
|
||||
// bound of the space that will be needed in addition to the command data.
|
||||
static constexpr size_t kWorstCaseAdditionalSize =
|
||||
sizeof(uint32_t) + kMaxSupportedAlignment + alignof(uint32_t) + sizeof(uint32_t);
|
||||
|
||||
// This can't overflow because by construction mCurrentPtr always has space for the next ID.
|
||||
size_t remainingSize = static_cast<size_t>(mEndPtr - mCurrentPtr);
|
||||
|
||||
// The good case were we have enough space for the command data and upper bound of the
|
||||
// extra required space.
|
||||
if ((remainingSize >= kWorstCaseAdditionalSize) &&
|
||||
(remainingSize - kWorstCaseAdditionalSize >= commandSize)) {
|
||||
// When there is not enough space, we signal the kEndOfBlock, so that the iterator knows
|
||||
// to move to the next one. kEndOfBlock on the last block means the end of the commands.
|
||||
uint32_t* idAlloc = reinterpret_cast<uint32_t*>(mCurrentPtr);
|
||||
*idAlloc = commandId;
|
||||
|
||||
uint8_t* commandAlloc = AlignPtr(mCurrentPtr + sizeof(uint32_t), commandAlignment);
|
||||
mCurrentPtr = AlignPtr(commandAlloc + commandSize, alignof(uint32_t));
|
||||
|
||||
return commandAlloc;
|
||||
}
|
||||
|
||||
// When there is not enough space, we signal the EndOfBlock, so that the iterator knows to
|
||||
// move to the next one. EndOfBlock on the last block means the end of the commands.
|
||||
uint32_t* idAlloc = reinterpret_cast<uint32_t*>(mCurrentPtr);
|
||||
*idAlloc = EndOfBlock;
|
||||
*idAlloc = detail::kEndOfBlock;
|
||||
|
||||
// We'll request a block that can contain at least the command ID, the command and an
|
||||
// additional ID to contain the EndOfBlock tag.
|
||||
// additional ID to contain the kEndOfBlock tag.
|
||||
size_t requestedBlockSize = commandSize + kWorstCaseAdditionalSize;
|
||||
|
||||
// The computation of the request could overflow.
|
||||
|
@ -228,10 +158,6 @@ namespace dawn_native {
|
|||
return Allocate(commandId, commandSize, commandAlignment);
|
||||
}
|
||||
|
||||
uint8_t* CommandAllocator::AllocateData(size_t commandSize, size_t commandAlignment) {
|
||||
return Allocate(AdditionalData, commandSize, commandAlignment);
|
||||
}
|
||||
|
||||
bool CommandAllocator::GetNewBlock(size_t minimumSize) {
|
||||
// Allocate blocks doubling sizes each time, to a maximum of 16k (or at least minimumSize).
|
||||
mLastAllocationSize =
|
||||
|
|
|
@ -15,6 +15,9 @@
|
|||
#ifndef DAWNNATIVE_COMMAND_ALLOCATOR_H_
|
||||
#define DAWNNATIVE_COMMAND_ALLOCATOR_H_
|
||||
|
||||
#include "common/Assert.h"
|
||||
#include "common/Math.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
@ -56,6 +59,11 @@ namespace dawn_native {
|
|||
};
|
||||
using CommandBlocks = std::vector<BlockDef>;
|
||||
|
||||
namespace detail {
|
||||
constexpr uint32_t kEndOfBlock = std::numeric_limits<uint32_t>::max();
|
||||
constexpr uint32_t kAdditionalData = std::numeric_limits<uint32_t>::max() - 1;
|
||||
} // namespace detail
|
||||
|
||||
class CommandAllocator;
|
||||
|
||||
// TODO(cwallez@chromium.org): prevent copy for both iterator and allocator
|
||||
|
@ -91,15 +99,46 @@ namespace dawn_native {
|
|||
private:
|
||||
bool IsEmpty() const;
|
||||
|
||||
bool NextCommandId(uint32_t* commandId);
|
||||
void* NextCommand(size_t commandSize, size_t commandAlignment);
|
||||
void* NextData(size_t dataSize, size_t dataAlignment);
|
||||
DAWN_FORCE_INLINE bool NextCommandId(uint32_t* commandId) {
|
||||
uint8_t* idPtr = AlignPtr(mCurrentPtr, alignof(uint32_t));
|
||||
ASSERT(idPtr + sizeof(uint32_t) <=
|
||||
mBlocks[mCurrentBlock].block + mBlocks[mCurrentBlock].size);
|
||||
|
||||
uint32_t id = *reinterpret_cast<uint32_t*>(idPtr);
|
||||
|
||||
if (id != detail::kEndOfBlock) {
|
||||
mCurrentPtr = idPtr + sizeof(uint32_t);
|
||||
*commandId = id;
|
||||
return true;
|
||||
}
|
||||
return NextCommandIdInNewBlock(commandId);
|
||||
}
|
||||
|
||||
bool NextCommandIdInNewBlock(uint32_t* commandId);
|
||||
|
||||
DAWN_FORCE_INLINE void* NextCommand(size_t commandSize, size_t commandAlignment) {
|
||||
uint8_t* commandPtr = AlignPtr(mCurrentPtr, commandAlignment);
|
||||
ASSERT(commandPtr + sizeof(commandSize) <=
|
||||
mBlocks[mCurrentBlock].block + mBlocks[mCurrentBlock].size);
|
||||
|
||||
mCurrentPtr = commandPtr + commandSize;
|
||||
return commandPtr;
|
||||
}
|
||||
|
||||
DAWN_FORCE_INLINE void* NextData(size_t dataSize, size_t dataAlignment) {
|
||||
uint32_t id;
|
||||
bool hasId = NextCommandId(&id);
|
||||
ASSERT(hasId);
|
||||
ASSERT(id == detail::kAdditionalData);
|
||||
|
||||
return NextCommand(dataSize, dataAlignment);
|
||||
}
|
||||
|
||||
CommandBlocks mBlocks;
|
||||
uint8_t* mCurrentPtr = nullptr;
|
||||
size_t mCurrentBlock = 0;
|
||||
// Used to avoid a special case for empty iterators.
|
||||
uint32_t mEndOfBlock;
|
||||
uint32_t mEndOfBlock = detail::kEndOfBlock;
|
||||
bool mDataWasDestroyed = false;
|
||||
};
|
||||
|
||||
|
@ -140,18 +179,67 @@ namespace dawn_native {
|
|||
// using the CommandAllocator passes the static_asserts.
|
||||
static constexpr size_t kMaxSupportedAlignment = 8;
|
||||
|
||||
// To avoid checking for overflows at every step of the computations we compute an upper
|
||||
// bound of the space that will be needed in addition to the command data.
|
||||
static constexpr size_t kWorstCaseAdditionalSize =
|
||||
sizeof(uint32_t) + kMaxSupportedAlignment + alignof(uint32_t) + sizeof(uint32_t);
|
||||
|
||||
friend CommandIterator;
|
||||
CommandBlocks&& AcquireBlocks();
|
||||
|
||||
uint8_t* Allocate(uint32_t commandId, size_t commandSize, size_t commandAlignment);
|
||||
uint8_t* AllocateData(size_t dataSize, size_t dataAlignment);
|
||||
DAWN_FORCE_INLINE uint8_t* Allocate(uint32_t commandId,
|
||||
size_t commandSize,
|
||||
size_t commandAlignment) {
|
||||
ASSERT(mCurrentPtr != nullptr);
|
||||
ASSERT(mEndPtr != nullptr);
|
||||
ASSERT(commandId != detail::kEndOfBlock);
|
||||
|
||||
// It should always be possible to allocate one id, for kEndOfBlock tagging,
|
||||
ASSERT(IsPtrAligned(mCurrentPtr, alignof(uint32_t)));
|
||||
ASSERT(mEndPtr >= mCurrentPtr);
|
||||
ASSERT(static_cast<size_t>(mEndPtr - mCurrentPtr) >= sizeof(uint32_t));
|
||||
|
||||
// The memory after the ID will contain the following:
|
||||
// - the current ID
|
||||
// - padding to align the command, maximum kMaxSupportedAlignment
|
||||
// - the command of size commandSize
|
||||
// - padding to align the next ID, maximum alignof(uint32_t)
|
||||
// - the next ID of size sizeof(uint32_t)
|
||||
|
||||
// This can't overflow because by construction mCurrentPtr always has space for the next
|
||||
// ID.
|
||||
size_t remainingSize = static_cast<size_t>(mEndPtr - mCurrentPtr);
|
||||
|
||||
// The good case were we have enough space for the command data and upper bound of the
|
||||
// extra required space.
|
||||
if ((remainingSize >= kWorstCaseAdditionalSize) &&
|
||||
(remainingSize - kWorstCaseAdditionalSize >= commandSize)) {
|
||||
uint32_t* idAlloc = reinterpret_cast<uint32_t*>(mCurrentPtr);
|
||||
*idAlloc = commandId;
|
||||
|
||||
uint8_t* commandAlloc = AlignPtr(mCurrentPtr + sizeof(uint32_t), commandAlignment);
|
||||
mCurrentPtr = AlignPtr(commandAlloc + commandSize, alignof(uint32_t));
|
||||
|
||||
return commandAlloc;
|
||||
}
|
||||
return AllocateInNewBlock(commandId, commandSize, commandAlignment);
|
||||
}
|
||||
|
||||
uint8_t* AllocateInNewBlock(uint32_t commandId,
|
||||
size_t commandSize,
|
||||
size_t commandAlignment);
|
||||
|
||||
DAWN_FORCE_INLINE uint8_t* AllocateData(size_t commandSize, size_t commandAlignment) {
|
||||
return Allocate(detail::kAdditionalData, commandSize, commandAlignment);
|
||||
}
|
||||
|
||||
bool GetNewBlock(size_t minimumSize);
|
||||
|
||||
CommandBlocks mBlocks;
|
||||
size_t mLastAllocationSize = 2048;
|
||||
|
||||
// Pointers to the current range of allocation in the block. Guaranteed to allow for at
|
||||
// least one uint32_t if not nullptr, so that the special EndOfBlock command id can always
|
||||
// least one uint32_t if not nullptr, so that the special kEndOfBlock command id can always
|
||||
// be written. Nullptr iff the blocks were moved out.
|
||||
uint8_t* mCurrentPtr = nullptr;
|
||||
uint8_t* mEndPtr = nullptr;
|
||||
|
|
Loading…
Reference in New Issue