optimization: BlockAllocator: Actually allocate in blocks

Instead of hitting the heap for each and every call to Create()

Significantly improves performance for heavy loads. Slight performance
loss for lighter loads.

A: base.bench
B: new.bench

Test name                             | Δ (A → B)    | % (A → B)
--------------------------------------+--------------+-----------
GenerateSPIRV/"simple_fragment.wgsl"  | 27.021µs     | +6.4%
GenerateMSL/"simple_compute.wgsl"     | 35.592µs     | +6.1%
GenerateMSL/"simple_vertex.wgsl"      | 37.64µs      | +5.5%
GenerateHLSL/"simple_fragment.wgsl"   | 42.145µs     | +5.2%
GenerateGLSL/"simple_fragment.wgsl"   | 31.506µs     | +4.9%
GenerateHLSL/"simple_vertex.wgsl"     | 38.843µs     | +4.7%
GenerateMSL/"simple_fragment.wgsl"    | 29.977µs     | +4.5%
GenerateSPIRV/"simple_vertex.wgsl"    | 19.882µs     | +4.2%
GenerateGLSL/"simple_vertex.wgsl"     | 24.702µs     | +3.7%
GenerateSPIRV/"simple_compute.wgsl"   | 17.652µs     | +3.2%
GenerateHLSL/"simple_compute.wgsl"    | 26.826µs     | +2.7%
GenerateGLSL/"simple_compute.wgsl"    | 11.952µs     | +1.8%
ParseWGSL/"particles.wgsl"            | -104.83µs    | -4.2%
GenerateMSL/"particles.wgsl"          | -1.079243ms  | -9.4%
GenerateSPIRV/"particles.wgsl"        | -1.012483ms  | -9.4%
GenerateGLSL/"particles.wgsl"         | -3.522106ms  | -9.5%
GenerateHLSL/"particles.wgsl"         | -1.849666ms  | -10.6%

Issue: tint:1383
Change-Id: Ib691328538c597c06a75dfba392c99d2afbd5442
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/76961
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
This commit is contained in:
Ben Clayton 2022-01-22 20:32:38 +00:00 committed by Tint LUCI CQ
parent 73ced33dfb
commit ba1a8f8d05
6 changed files with 260 additions and 128 deletions

View File

@ -73,11 +73,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
ASSERT_EQ(tint::Program::printer(&src), tint::Program::printer(&dst));
// Check that none of the AST nodes or type pointers in dst are found in src
std::unordered_set<tint::ast::Node*> src_nodes;
std::unordered_set<const tint::ast::Node*> src_nodes;
for (auto* src_node : src.ASTNodes().Objects()) {
src_nodes.emplace(src_node);
}
std::unordered_set<tint::sem::Type*> src_types;
std::unordered_set<const tint::sem::Type*> src_types;
for (auto* src_type : src.Types()) {
src_types.emplace(src_type);
}

View File

@ -132,11 +132,11 @@ let declaration_order_check_4 : i32 = 1;
EXPECT_EQ(Program::printer(&src), Program::printer(&dst));
// Check that none of the AST nodes or type pointers in dst are found in src
std::unordered_set<ast::Node*> src_nodes;
std::unordered_set<const ast::Node*> src_nodes;
for (auto* src_node : src.ASTNodes().Objects()) {
src_nodes.emplace(src_node);
}
std::unordered_set<sem::Type*> src_types;
std::unordered_set<const sem::Type*> src_types;
for (auto* src_type : src.Types()) {
src_types.emplace(src_type);
}

View File

@ -15,130 +15,155 @@
#ifndef SRC_BLOCK_ALLOCATOR_H_
#define SRC_BLOCK_ALLOCATOR_H_
#include <memory>
#include <array>
#include <utility>
#include <vector>
#include "src/utils/math.h"
namespace tint {
/// A container and allocator of objects of (or deriving from) the template type
/// `T`.
/// Objects are allocated by calling Create(), and are owned by the
/// A container and allocator of objects of (or deriving from) the template
/// type `T`. Objects are allocated by calling Create(), and are owned by the
/// BlockAllocator. When the BlockAllocator is destructed, all constructed
/// objects are automatically destructed and freed.
///
/// Objects held by the BlockAllocator can be iterated over using a
/// View or ConstView.
template <typename T>
/// Objects held by the BlockAllocator can be iterated over using a View.
template <typename T,
size_t BLOCK_SIZE = 64 * 1024,
size_t BLOCK_ALIGNMENT = 16>
class BlockAllocator {
using InternalVector = std::vector<std::unique_ptr<T>>;
using InternalIterator = typename InternalVector::const_iterator;
/// Pointers is a chunk of T* pointers, forming a linked list.
/// The list of Pointers are used to maintain the list of allocated objects.
/// Pointers are allocated out of the block memory.
struct Pointers {
static constexpr size_t kMax = 32;
std::array<T*, kMax> ptrs;
Pointers* next;
};
public:
class View;
class ConstView;
/// Block is linked list of memory blocks.
/// Blocks are allocated out of heap memory.
///
/// Note: We're not using std::aligned_storage here as this warns / errors
/// on MSVC.
struct alignas(BLOCK_ALIGNMENT) Block {
uint8_t data[BLOCK_SIZE];
Block* next;
};
/// Constructor
BlockAllocator() = default;
/// Move constructor
BlockAllocator(BlockAllocator&&) = default;
/// Move assignment operator
/// @return this BlockAllocator
BlockAllocator& operator=(BlockAllocator&&) = default;
// Forward declaration
template <bool IS_CONST>
class TView;
/// An iterator for the objects owned by the BlockAllocator.
class Iterator {
template <bool IS_CONST>
class TIterator {
using PointerTy = std::conditional_t<IS_CONST, const T*, T*>;
public:
/// Equality operator
/// @param other the iterator to compare this iterator to
/// @returns true if this iterator is equal to other
bool operator==(const Iterator& other) const { return it_ == other.it_; }
bool operator==(const TIterator& other) const {
return ptrs == other.ptrs && idx == other.idx;
}
/// Inequality operator
/// @param other the iterator to compare this iterator to
/// @returns true if this iterator is not equal to other
bool operator!=(const Iterator& other) const { return it_ != other.it_; }
bool operator!=(const TIterator& other) const { return !(*this == other); }
/// Advances the iterator
/// @returns this iterator
Iterator& operator++() {
++it_;
TIterator& operator++() {
if (ptrs != nullptr) {
++idx;
if (idx == Pointers::kMax) {
idx = 0;
ptrs = ptrs->next;
}
}
return *this;
}
/// @returns the pointer to the object at the current iterator position
T* operator*() const { return it_->get(); }
PointerTy operator*() const { return ptrs ? ptrs->ptrs[idx] : nullptr; }
private:
friend View; // Keep internal iterator impl private.
explicit Iterator(InternalIterator it) : it_(it) {}
InternalIterator it_;
friend TView<IS_CONST>; // Keep internal iterator impl private.
explicit TIterator(const Pointers* p, size_t i) : ptrs(p), idx(i) {}
const Pointers* ptrs;
size_t idx;
};
/// A const iterator for the objects owned by the BlockAllocator.
class ConstIterator {
public:
/// Equality operator
/// @param other the iterator to compare this iterator to
/// @returns true if this iterator is equal to other
bool operator==(const ConstIterator& other) const {
return it_ == other.it_;
}
/// Inequality operator
/// @param other the iterator to compare this iterator to
/// @returns true if this iterator is not equal to other
bool operator!=(const ConstIterator& other) const {
return it_ != other.it_;
}
/// Advances the iterator
/// @returns this iterator
ConstIterator& operator++() {
++it_;
return *this;
}
/// @returns the pointer to the object at the current iterator position
T* operator*() const { return it_->get(); }
private:
friend ConstView; // Keep internal iterator impl private.
explicit ConstIterator(InternalIterator it) : it_(it) {}
InternalIterator it_;
};
/// View provides begin() and end() methods for looping over the objects owned
/// by the BlockAllocator.
class View {
/// View provides begin() and end() methods for looping over the objects
/// owned by the BlockAllocator.
template <bool IS_CONST>
class TView {
public:
/// @returns an iterator to the beginning of the view
Iterator begin() const { return Iterator(allocator_->objects_.begin()); }
TIterator<IS_CONST> begin() const {
return TIterator<IS_CONST>{allocator_->pointers_.root, 0};
}
/// @returns an iterator to the end of the view
Iterator end() const { return Iterator(allocator_->objects_.end()); }
TIterator<IS_CONST> end() const {
return allocator_->pointers_.current_index >= Pointers::kMax
? TIterator<IS_CONST>(nullptr, 0)
: TIterator<IS_CONST>(allocator_->pointers_.current,
allocator_->pointers_.current_index);
}
private:
friend BlockAllocator; // For BlockAllocator::operator View()
explicit View(BlockAllocator const* allocator) : allocator_(allocator) {}
explicit TView(BlockAllocator const* allocator) : allocator_(allocator) {}
BlockAllocator const* const allocator_;
};
public:
/// An iterator type over the objects of the BlockAllocator
using Iterator = TIterator<false>;
/// An immutable iterator type over the objects of the BlockAllocator
using ConstIterator = TIterator<true>;
/// View provides begin() and end() methods for looping over the objects
/// owned by the BlockAllocator.
using View = TView<false>;
/// ConstView provides begin() and end() methods for looping over the objects
/// owned by the BlockAllocator.
class ConstView {
public:
/// @returns an iterator to the beginning of the view
ConstIterator begin() const {
return ConstIterator(allocator_->objects_.begin());
}
/// @returns an iterator to the end of the view
ConstIterator end() const {
return ConstIterator(allocator_->objects_.end());
using ConstView = TView<true>;
/// Constructor
BlockAllocator() = default;
/// Move constructor
/// @param rhs the BlockAllocator to move
BlockAllocator(BlockAllocator&& rhs) {
std::swap(block_, rhs.block_);
std::swap(pointers_, rhs.pointers_);
}
private:
friend BlockAllocator; // For BlockAllocator::operator ConstView()
explicit ConstView(BlockAllocator const* allocator)
: allocator_(allocator) {}
BlockAllocator const* const allocator_;
};
/// Move assignment operator
/// @param rhs the BlockAllocator to move
/// @return this BlockAllocator
BlockAllocator& operator=(BlockAllocator&& rhs) {
if (this != &rhs) {
Reset();
std::swap(block_, rhs.block_);
std::swap(pointers_, rhs.pointers_);
}
return *this;
}
/// Destructor
~BlockAllocator() { Reset(); }
/// @return a View of all objects owned by this BlockAllocator
View Objects() { return View(this); }
/// @return a ConstView of all objects owned by this BlockAllocator
ConstView Objects() const { return ConstView(this); }
@ -152,17 +177,116 @@ class BlockAllocator {
static_assert(
std::is_same<T, TYPE>::value || std::is_base_of<T, TYPE>::value,
"TYPE does not derive from T");
auto uptr = std::make_unique<TYPE>(std::forward<ARGS>(args)...);
auto* ptr = uptr.get();
objects_.emplace_back(std::move(uptr));
static_assert(
std::is_same<T, TYPE>::value || std::has_virtual_destructor<T>::value,
"TYPE requires a virtual destructor when calling Create() for a type "
"that is not T");
auto* ptr = Allocate<TYPE>();
new (ptr) TYPE(std::forward<ARGS>(args)...);
AddObjectPointer(ptr);
return ptr;
}
/// Frees all allocations from the allocator.
void Reset() {
for (auto ptr : Objects()) {
ptr->~T();
}
auto* block = block_.root;
while (block != nullptr) {
auto* next = block->next;
delete block;
block = next;
}
block_ = {};
pointers_ = {};
}
private:
BlockAllocator(const BlockAllocator&) = delete;
BlockAllocator& operator=(const BlockAllocator&) = delete;
std::vector<std::unique_ptr<T>> objects_;
/// Allocates an instance of TYPE from the current block, or from a newly
/// allocated block if the current block is full.
template <typename TYPE>
TYPE* Allocate() {
static_assert(sizeof(TYPE) <= BLOCK_SIZE,
"Cannot construct TYPE with size greater than BLOCK_SIZE");
static_assert(alignof(TYPE) <= BLOCK_ALIGNMENT,
"alignof(TYPE) is greater than ALIGNMENT");
block_.current_offset =
utils::RoundUp(alignof(TYPE), block_.current_offset);
if (block_.current_offset + sizeof(TYPE) > BLOCK_SIZE) {
// Allocate a new block from the heap
auto* prev_block = block_.current;
block_.current = new Block;
if (!block_.current) {
return nullptr; // out of memory
}
block_.current->next = nullptr;
block_.current_offset = 0;
if (prev_block) {
prev_block->next = block_.current;
} else {
block_.root = block_.current;
}
}
auto* base = &block_.current->data[0];
auto* ptr = reinterpret_cast<TYPE*>(base + block_.current_offset);
block_.current_offset += sizeof(TYPE);
return ptr;
}
/// Adds `ptr` to the linked list of objects owned by this BlockAllocator.
/// Once added, `ptr` will be tracked for destruction when the BlockAllocator
/// is destructed.
void AddObjectPointer(T* ptr) {
if (pointers_.current_index >= Pointers::kMax) {
auto* prev_pointers = pointers_.current;
pointers_.current = Allocate<Pointers>();
if (!pointers_.current) {
return; // out of memory
}
pointers_.current->next = nullptr;
pointers_.current_index = 0;
if (prev_pointers) {
prev_pointers->next = pointers_.current;
} else {
pointers_.root = pointers_.current;
}
}
pointers_.current->ptrs[pointers_.current_index++] = ptr;
}
struct {
/// The root block of the block linked list
Block* root = nullptr;
/// The current (end) block of the blocked linked list.
/// New allocations come from this block
Block* current = nullptr;
/// The byte offset in #current for the next allocation.
/// Initialized with BLOCK_SIZE so that the first allocation triggers a
/// block allocation.
size_t current_offset = BLOCK_SIZE;
} block_;
struct {
/// The root Pointers structure of the pointers linked list
Pointers* root = nullptr;
/// The current (end) Pointers structure of the pointers linked list.
/// AddObjectPointer() adds to this structure.
Pointers* current = nullptr;
/// The array index in #current for the next append.
/// Initialized with Pointers::kMax so that the first append triggers a
/// allocation of the Pointers structure.
size_t current_index = Pointers::kMax;
} pointers_;
};
} // namespace tint

View File

@ -39,7 +39,7 @@ TEST_F(BlockAllocatorTest, Empty) {
FAIL() << "BlockAllocator should be empty";
}
}
for (int* i : static_cast<const Allocator&>(allocator).Objects()) {
for (const int* i : static_cast<const Allocator&>(allocator).Objects()) {
(void)i;
if ((true)) { // Workaround for "error: loop will run at most once"
FAIL() << "BlockAllocator should be empty";
@ -67,48 +67,53 @@ TEST_F(BlockAllocatorTest, ObjectLifetime) {
TEST_F(BlockAllocatorTest, MoveConstruct) {
using Allocator = BlockAllocator<LifetimeCounter>;
for (size_t n :
{0, 1, 10, 16, 20, 32, 50, 64, 100, 256, 300, 512, 500, 512}) {
size_t count = 0;
{
Allocator allocator_a;
for (int i = 0; i < 10; i++) {
for (size_t i = 0; i < n; i++) {
allocator_a.Create(&count);
}
EXPECT_EQ(count, 10u);
EXPECT_EQ(count, n);
Allocator allocator_b{std::move(allocator_a)};
EXPECT_EQ(count, 10u);
EXPECT_EQ(count, n);
}
EXPECT_EQ(count, 0u);
}
}
TEST_F(BlockAllocatorTest, MoveAssign) {
using Allocator = BlockAllocator<LifetimeCounter>;
for (size_t n :
{0, 1, 10, 16, 20, 32, 50, 64, 100, 256, 300, 512, 500, 512}) {
size_t count_a = 0;
size_t count_b = 0;
{
Allocator allocator_a;
for (int i = 0; i < 10; i++) {
for (size_t i = 0; i < n; i++) {
allocator_a.Create(&count_a);
}
EXPECT_EQ(count_a, 10u);
EXPECT_EQ(count_a, n);
Allocator allocator_b;
for (int i = 0; i < 10; i++) {
for (size_t i = 0; i < n; i++) {
allocator_b.Create(&count_b);
}
EXPECT_EQ(count_b, 10u);
EXPECT_EQ(count_b, n);
allocator_b = std::move(allocator_a);
EXPECT_EQ(count_a, 10u);
EXPECT_EQ(count_a, n);
EXPECT_EQ(count_b, 0u);
}
EXPECT_EQ(count_a, 0u);
EXPECT_EQ(count_b, 0u);
}
}
TEST_F(BlockAllocatorTest, ObjectOrder) {
@ -130,7 +135,7 @@ TEST_F(BlockAllocatorTest, ObjectOrder) {
}
{
int i = 0;
for (int* p : static_cast<const Allocator&>(allocator).Objects()) {
for (const int* p : static_cast<const Allocator&>(allocator).Objects()) {
EXPECT_EQ(*p, i);
i++;
}

View File

@ -286,7 +286,7 @@ struct BufferAccess {
/// Store describes a single storage or uniform buffer write
struct Store {
ast::AssignmentStatement* assignment; // The AST assignment statement
const ast::AssignmentStatement* assignment; // The AST assignment statement
BufferAccess target; // The target for the write
};

View File

@ -246,14 +246,14 @@ struct MultiplanarExternalTexture::State {
/// Constructs a StatementList containing all the statements making up the
/// bodies of the textureSampleExternal and textureLoadExternal functions.
/// @param callType determines which function body to generate
/// @param call_type determines which function body to generate
/// @returns a statement list that makes of the body of the chosen function
ast::StatementList createTexFnExtStatementList(sem::IntrinsicType callType) {
ast::StatementList createTexFnExtStatementList(sem::IntrinsicType call_type) {
using f32 = ProgramBuilder::f32;
const ast::CallExpression* single_plane_call;
const ast::CallExpression* plane_0_call;
const ast::CallExpression* plane_1_call;
if (callType == sem::IntrinsicType::kTextureSampleLevel) {
const ast::CallExpression* single_plane_call = nullptr;
const ast::CallExpression* plane_0_call = nullptr;
const ast::CallExpression* plane_1_call = nullptr;
if (call_type == sem::IntrinsicType::kTextureSampleLevel) {
// textureSampleLevel(plane0, smp, coord.xy, 0.0);
single_plane_call =
b.Call("textureSampleLevel", "plane0", "smp", "coord", 0.0f);
@ -263,13 +263,16 @@ struct MultiplanarExternalTexture::State {
// textureSampleLevel(plane1, smp, coord.xy, 0.0);
plane_1_call =
b.Call("textureSampleLevel", "plane1", "smp", "coord", 0.0f);
} else if (callType == sem::IntrinsicType::kTextureLoad) {
} else if (call_type == sem::IntrinsicType::kTextureLoad) {
// textureLoad(plane0, coords.xy, 0);
single_plane_call = b.Call("textureLoad", "plane0", "coord", 0);
// textureLoad(plane0, coords.xy, 0);
plane_0_call = b.Call("textureLoad", "plane0", "coord", 0);
// textureLoad(plane1, coords.xy, 0);
plane_1_call = b.Call("textureLoad", "plane1", "coord", 0);
} else {
TINT_ICE(Transform, b.Diagnostics())
<< "unhandled intrinsic: " << call_type;
}
return {