metaforce/hecl/include/hecl/UniformBufferPool.hpp

208 lines
6.4 KiB
C++

#ifndef HECL_UNIFORMBUFFERPOOL_HPP
#define HECL_UNIFORMBUFFERPOOL_HPP
#include <boo/boo.hpp>
#include <vector>
#include <cstdlib>
#include <atomic>
#include "BitVector.hpp"
namespace hecl
{
#define HECL_UBUFPOOL_ALLOCATION_BLOCK 262144
/** This class provides a uniform structure for packing instanced uniform-buffer
* data with consistent stride into a vector of 256K 'Buckets'.
*
* This results in a space-efficient way of managing GPU data of things like UI
* widgets. These can potentially have numerous binding instances, so this avoids
* allocating a full GPU buffer object for each. */
template <typename UniformStruct>
class UniformBufferPool
{
public:
/* Resolve div_t type using ssize_t as basis */
#if _WIN32
using IndexTp = SSIZE_T;
#else
using IndexTp = ssize_t;
#endif
private:
struct InvalidTp {};
using DivTp = std::conditional_t<std::is_same<IndexTp, long long>::value, std::lldiv_t,
std::conditional_t<std::is_same<IndexTp, long>::value, std::ldiv_t,
std::conditional_t<std::is_same<IndexTp, int>::value, std::div_t, InvalidTp>>>;
static_assert(!std::is_same<DivTp, InvalidTp>::value, "unsupported IndexTp for DivTp resolution");
/** Size of single element, rounded up to 256-multiple */
static constexpr IndexTp m_stride = ROUND_UP_256(sizeof(UniformStruct));
static_assert(m_stride <= HECL_UBUFPOOL_ALLOCATION_BLOCK, "Stride too large for uniform pool");
/** Number of rounded elements per 256K bucket */
static constexpr IndexTp m_countPerBucket = HECL_UBUFPOOL_ALLOCATION_BLOCK / m_stride;
/** Buffer size per bucket (ideally 256K) */
static constexpr IndexTp m_sizePerBucket = m_stride * m_countPerBucket;
/** BitVector indicating free allocation blocks */
hecl::llvm::BitVector m_freeBlocks;
/** Efficient way to get bucket and block simultaneously */
DivTp getBucketDiv(IndexTp idx) const { return std::div(idx, m_countPerBucket); }
/** Factory pointer for building additional buffers */
boo::IGraphicsDataFactory* m_factory = nullptr;
/** Private bucket info */
struct Bucket
{
boo::ObjToken<boo::IGraphicsBufferD> buffer;
uint8_t* cpuBuffer = nullptr;
std::atomic_size_t useCount = {};
bool dirty = false;
Bucket() = default;
Bucket(const Bucket& other) = delete;
Bucket& operator=(const Bucket& other) = delete;
Bucket(Bucket&& other) = default;
Bucket& operator=(Bucket&& other) = default;
void updateBuffer()
{
if (cpuBuffer)
{
buffer->unmap();
cpuBuffer = nullptr;
}
dirty = false;
}
void increment(UniformBufferPool& pool)
{
if (useCount.fetch_add(1) == 0)
buffer = pool.m_factory->newPoolBuffer(boo::BufferUse::Uniform,
pool.m_stride, pool.m_countPerBucket BooTrace);
}
void decrement(UniformBufferPool& pool)
{
if (useCount.fetch_sub(1) == 1)
{
if (cpuBuffer)
{
buffer->unmap();
cpuBuffer = nullptr;
}
buffer.reset();
}
}
};
std::vector<std::unique_ptr<Bucket>> m_buckets;
public:
/** User block-owning token */
class Token
{
friend class UniformBufferPool;
UniformBufferPool* m_pool = nullptr;
IndexTp m_index = -1;
DivTp m_div;
Token(UniformBufferPool* pool)
: m_pool(pool)
{
auto& freeSpaces = pool->m_freeBlocks;
int idx = freeSpaces.find_first();
if (idx == -1)
{
pool->m_buckets.push_back(std::make_unique<Bucket>());
m_index = freeSpaces.size();
freeSpaces.resize(freeSpaces.size() + pool->m_countPerBucket, true);
}
else
{
m_index = idx;
}
freeSpaces.reset(m_index);
m_div = pool->getBucketDiv(m_index);
Bucket& bucket = *m_pool->m_buckets[m_div.quot];
bucket.increment(*m_pool);
}
public:
Token() = default;
Token(const Token& other) = delete;
Token& operator=(const Token& other) = delete;
Token& operator=(Token&& other)
{
m_pool = other.m_pool;
m_index = other.m_index;
m_div = other.m_div;
other.m_index = -1;
return *this;
}
Token(Token&& other)
: m_pool(other.m_pool), m_index(other.m_index),
m_div(other.m_div)
{
other.m_index = -1;
}
~Token()
{
if (m_index != -1)
{
m_pool->m_freeBlocks.set(m_index);
Bucket& bucket = *m_pool->m_buckets[m_div.quot];
bucket.decrement(*m_pool);
}
}
UniformStruct& access()
{
Bucket& bucket = *m_pool->m_buckets[m_div.quot];
if (!bucket.cpuBuffer)
bucket.cpuBuffer = reinterpret_cast<uint8_t*>(bucket.buffer->map(m_sizePerBucket));
bucket.dirty = true;
return reinterpret_cast<UniformStruct&>(bucket.cpuBuffer[m_div.rem * m_pool->m_stride]);
}
std::pair<boo::ObjToken<boo::IGraphicsBufferD>, IndexTp> getBufferInfo() const
{
Bucket& bucket = *m_pool->m_buckets[m_div.quot];
return {bucket.buffer, m_div.rem * m_pool->m_stride};
}
operator bool() const { return m_pool != nullptr && m_index != -1; }
};
UniformBufferPool() = default;
UniformBufferPool(const UniformBufferPool& other) = delete;
UniformBufferPool& operator=(const UniformBufferPool& other) = delete;
/** Load dirty buffer data into GPU */
void updateBuffers()
{
for (auto& bucket : m_buckets)
if (bucket->dirty)
bucket->updateBuffer();
}
/** Allocate free block into client-owned Token */
Token allocateBlock(boo::IGraphicsDataFactory* factory)
{
m_factory = factory;
return Token(this);
}
void doDestroy()
{
for (auto& bucket : m_buckets)
bucket->buffer.reset();
}
};
}
#endif // HECL_UNIFORMBUFFERPOOL_HPP