mirror of
https://github.com/encounter/dawn-cmake.git
synced 2025-12-16 00:17:03 +00:00
Add HLSL/MSL generator options for ArrayLengthFromUniform
ArrayLengthFromUniform is needed for correct bounds checks on dynamic storage buffers on D3D12. The intrinsic GetDimensions does not return the actual size of the buffer binding. ArrayLengthFromUniform is updated to output the indices of the uniform buffer that are statically used. This allows Dawn to minimize the amount of data needed to upload into the uniform buffer. These output indices are returned on the HLSL/MSL generator result. ArrayLengthFromUniform is also updated to allow only some of the arrayLength calls to be replaced with uniform buffer loads. For HLSL output, the remaining arrayLength computations will continue to use GetDimensions(). For MSL, it is invalid to not specify an index into the uniform buffer for all storage buffers. After Dawn is updated to use the array_length_from_uniform option in the Metal backend, the buffer_size_ubo_index member for MSL output may be removed. Bug: dawn:429 Change-Id: I9da4ec4a20882e9f1bfa5bb026725d72529eff26 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/69301 Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: James Price <jrprice@google.com> Commit-Queue: Austin Eng <enga@chromium.org>
This commit is contained in:
@@ -35,60 +35,18 @@ namespace transform {
|
||||
ArrayLengthFromUniform::ArrayLengthFromUniform() = default;
|
||||
ArrayLengthFromUniform::~ArrayLengthFromUniform() = default;
|
||||
|
||||
void ArrayLengthFromUniform::Run(CloneContext& ctx,
|
||||
const DataMap& inputs,
|
||||
DataMap& outputs) {
|
||||
if (!Requires<InlinePointerLets, Simplify>(ctx)) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto* cfg = inputs.Get<Config>();
|
||||
if (cfg == nullptr) {
|
||||
ctx.dst->Diagnostics().add_error(
|
||||
diag::System::Transform,
|
||||
"missing transform data for " + std::string(TypeInfo().name));
|
||||
return;
|
||||
}
|
||||
|
||||
/// Iterate over all arrayLength() intrinsics that operate on
|
||||
/// storage buffer variables.
|
||||
/// @param ctx the CloneContext.
|
||||
/// @param functor of type void(const ast::CallExpression*, const
|
||||
/// sem::VariableUser, const sem::GlobalVariable*). It takes in an
|
||||
/// ast::CallExpression of the arrayLength call expression node, a
|
||||
/// sem::VariableUser of the used storage buffer variable, and the
|
||||
/// sem::GlobalVariable for the storage buffer.
|
||||
template <typename F>
|
||||
static void IterateArrayLengthOnStorageVar(CloneContext& ctx, F&& functor) {
|
||||
auto& sem = ctx.src->Sem();
|
||||
|
||||
const char* kBufferSizeMemberName = "buffer_size";
|
||||
|
||||
// Determine the size of the buffer size array.
|
||||
uint32_t max_buffer_size_index = 0;
|
||||
for (auto& idx : cfg->bindpoint_to_size_index) {
|
||||
if (idx.second > max_buffer_size_index) {
|
||||
max_buffer_size_index = idx.second;
|
||||
}
|
||||
}
|
||||
|
||||
// Get (or create, on first call) the uniform buffer that will receive the
|
||||
// size of each storage buffer in the module.
|
||||
const ast::Variable* buffer_size_ubo = nullptr;
|
||||
auto get_ubo = [&]() {
|
||||
if (!buffer_size_ubo) {
|
||||
// Emit an array<vec4<u32>, N>, where N is 1/4 number of elements.
|
||||
// We do this because UBOs require an element stride that is 16-byte
|
||||
// aligned.
|
||||
auto* buffer_size_struct = ctx.dst->Structure(
|
||||
ctx.dst->Sym(),
|
||||
{ctx.dst->Member(
|
||||
kBufferSizeMemberName,
|
||||
ctx.dst->ty.array(ctx.dst->ty.vec4(ctx.dst->ty.u32()),
|
||||
(max_buffer_size_index / 4) + 1))},
|
||||
|
||||
ast::DecorationList{ctx.dst->create<ast::StructBlockDecoration>()});
|
||||
buffer_size_ubo = ctx.dst->Global(
|
||||
ctx.dst->Sym(), ctx.dst->ty.Of(buffer_size_struct),
|
||||
ast::StorageClass::kUniform,
|
||||
ast::DecorationList{
|
||||
ctx.dst->create<ast::GroupDecoration>(cfg->ubo_binding.group),
|
||||
ctx.dst->create<ast::BindingDecoration>(
|
||||
cfg->ubo_binding.binding)});
|
||||
}
|
||||
return buffer_size_ubo;
|
||||
};
|
||||
|
||||
// Find all calls to the arrayLength() intrinsic.
|
||||
for (auto* node : ctx.src->ASTNodes().Objects()) {
|
||||
auto* call_expr = node->As<ast::CallExpression>();
|
||||
@@ -137,23 +95,91 @@ void ArrayLengthFromUniform::Run(CloneContext& ctx,
|
||||
<< "storage buffer is not a global variable";
|
||||
break;
|
||||
}
|
||||
functor(call_expr, storage_buffer_sem, var);
|
||||
}
|
||||
}
|
||||
|
||||
void ArrayLengthFromUniform::Run(CloneContext& ctx,
|
||||
const DataMap& inputs,
|
||||
DataMap& outputs) {
|
||||
if (!Requires<InlinePointerLets, Simplify>(ctx)) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto* cfg = inputs.Get<Config>();
|
||||
if (cfg == nullptr) {
|
||||
ctx.dst->Diagnostics().add_error(
|
||||
diag::System::Transform,
|
||||
"missing transform data for " + std::string(TypeInfo().name));
|
||||
return;
|
||||
}
|
||||
|
||||
const char* kBufferSizeMemberName = "buffer_size";
|
||||
|
||||
// Determine the size of the buffer size array.
|
||||
uint32_t max_buffer_size_index = 0;
|
||||
|
||||
IterateArrayLengthOnStorageVar(
|
||||
ctx, [&](const ast::CallExpression*, const sem::VariableUser*,
|
||||
const sem::GlobalVariable* var) {
|
||||
auto binding = var->BindingPoint();
|
||||
auto idx_itr = cfg->bindpoint_to_size_index.find(binding);
|
||||
if (idx_itr == cfg->bindpoint_to_size_index.end()) {
|
||||
return;
|
||||
}
|
||||
if (idx_itr->second > max_buffer_size_index) {
|
||||
max_buffer_size_index = idx_itr->second;
|
||||
}
|
||||
});
|
||||
|
||||
// Get (or create, on first call) the uniform buffer that will receive the
|
||||
// size of each storage buffer in the module.
|
||||
const ast::Variable* buffer_size_ubo = nullptr;
|
||||
auto get_ubo = [&]() {
|
||||
if (!buffer_size_ubo) {
|
||||
// Emit an array<vec4<u32>, N>, where N is 1/4 number of elements.
|
||||
// We do this because UBOs require an element stride that is 16-byte
|
||||
// aligned.
|
||||
auto* buffer_size_struct = ctx.dst->Structure(
|
||||
ctx.dst->Sym(),
|
||||
{ctx.dst->Member(
|
||||
kBufferSizeMemberName,
|
||||
ctx.dst->ty.array(ctx.dst->ty.vec4(ctx.dst->ty.u32()),
|
||||
(max_buffer_size_index / 4) + 1))},
|
||||
|
||||
ast::DecorationList{ctx.dst->create<ast::StructBlockDecoration>()});
|
||||
buffer_size_ubo = ctx.dst->Global(
|
||||
ctx.dst->Sym(), ctx.dst->ty.Of(buffer_size_struct),
|
||||
ast::StorageClass::kUniform,
|
||||
ast::DecorationList{
|
||||
ctx.dst->create<ast::GroupDecoration>(cfg->ubo_binding.group),
|
||||
ctx.dst->create<ast::BindingDecoration>(
|
||||
cfg->ubo_binding.binding)});
|
||||
}
|
||||
return buffer_size_ubo;
|
||||
};
|
||||
|
||||
std::unordered_set<uint32_t> used_size_indices;
|
||||
|
||||
IterateArrayLengthOnStorageVar(ctx, [&](const ast::CallExpression* call_expr,
|
||||
const sem::VariableUser*
|
||||
storage_buffer_sem,
|
||||
const sem::GlobalVariable* var) {
|
||||
auto binding = var->BindingPoint();
|
||||
auto idx_itr = cfg->bindpoint_to_size_index.find(binding);
|
||||
if (idx_itr == cfg->bindpoint_to_size_index.end()) {
|
||||
ctx.dst->Diagnostics().add_error(
|
||||
diag::System::Transform,
|
||||
"missing size index mapping for binding point (" +
|
||||
std::to_string(binding.group) + "," +
|
||||
std::to_string(binding.binding) + ")");
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t size_index = idx_itr->second;
|
||||
used_size_indices.insert(size_index);
|
||||
|
||||
// Load the total storage buffer size from the UBO.
|
||||
uint32_t array_index = idx_itr->second / 4;
|
||||
uint32_t array_index = size_index / 4;
|
||||
auto* vec_expr = ctx.dst->IndexAccessor(
|
||||
ctx.dst->MemberAccessor(get_ubo()->symbol, kBufferSizeMemberName),
|
||||
array_index);
|
||||
uint32_t vec_index = idx_itr->second % 4;
|
||||
uint32_t vec_index = size_index % 4;
|
||||
auto* total_storage_buffer_size =
|
||||
ctx.dst->IndexAccessor(vec_expr, vec_index);
|
||||
|
||||
@@ -170,20 +196,23 @@ void ArrayLengthFromUniform::Run(CloneContext& ctx,
|
||||
ctx.dst->Sub(total_storage_buffer_size, array_offset), array_stride);
|
||||
|
||||
ctx.Replace(call_expr, array_length);
|
||||
}
|
||||
});
|
||||
|
||||
ctx.Clone();
|
||||
|
||||
outputs.Add<Result>(buffer_size_ubo ? true : false);
|
||||
outputs.Add<Result>(used_size_indices);
|
||||
}
|
||||
|
||||
ArrayLengthFromUniform::Config::Config(sem::BindingPoint ubo_bp)
|
||||
: ubo_binding(ubo_bp) {}
|
||||
ArrayLengthFromUniform::Config::Config(const Config&) = default;
|
||||
ArrayLengthFromUniform::Config& ArrayLengthFromUniform::Config::operator=(
|
||||
const Config&) = default;
|
||||
ArrayLengthFromUniform::Config::~Config() = default;
|
||||
|
||||
ArrayLengthFromUniform::Result::Result(bool needs_sizes)
|
||||
: needs_buffer_sizes(needs_sizes) {}
|
||||
ArrayLengthFromUniform::Result::Result(
|
||||
std::unordered_set<uint32_t> used_size_indices_in)
|
||||
: used_size_indices(std::move(used_size_indices_in)) {}
|
||||
ArrayLengthFromUniform::Result::Result(const Result&) = default;
|
||||
ArrayLengthFromUniform::Result::~Result() = default;
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#define SRC_TRANSFORM_ARRAY_LENGTH_FROM_UNIFORM_H_
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "src/sem/binding_point.h"
|
||||
#include "src/transform/transform.h"
|
||||
@@ -66,6 +67,10 @@ class ArrayLengthFromUniform
|
||||
/// Copy constructor
|
||||
Config(const Config&);
|
||||
|
||||
/// Copy assignment
|
||||
/// @return this Config
|
||||
Config& operator=(const Config&);
|
||||
|
||||
/// Destructor
|
||||
~Config() override;
|
||||
|
||||
@@ -79,8 +84,8 @@ class ArrayLengthFromUniform
|
||||
/// Information produced about what the transform did.
|
||||
struct Result : public Castable<Result, transform::Data> {
|
||||
/// Constructor
|
||||
/// @param needs_sizes True if the transform generated the buffer sizes UBO.
|
||||
explicit Result(bool needs_sizes);
|
||||
/// @param used_size_indices Indices into the UBO that are statically used.
|
||||
explicit Result(std::unordered_set<uint32_t> used_size_indices);
|
||||
|
||||
/// Copy constructor
|
||||
Result(const Result&);
|
||||
@@ -88,8 +93,8 @@ class ArrayLengthFromUniform
|
||||
/// Destructor
|
||||
~Result() override;
|
||||
|
||||
/// True if the transform generated the buffer sizes UBO.
|
||||
const bool needs_buffer_sizes;
|
||||
/// Indices into the UBO that are statically used.
|
||||
const std::unordered_set<uint32_t> used_size_indices;
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
@@ -110,8 +110,8 @@ fn main() {
|
||||
Run<InlinePointerLets, Simplify, ArrayLengthFromUniform>(src, data);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
EXPECT_TRUE(
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->needs_buffer_sizes);
|
||||
EXPECT_EQ(std::unordered_set<uint32_t>({0}),
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->used_size_indices);
|
||||
}
|
||||
|
||||
TEST_F(ArrayLengthFromUniformTest, WithStride) {
|
||||
@@ -164,8 +164,8 @@ fn main() {
|
||||
Run<InlinePointerLets, Simplify, ArrayLengthFromUniform>(src, data);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
EXPECT_TRUE(
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->needs_buffer_sizes);
|
||||
EXPECT_EQ(std::unordered_set<uint32_t>({0}),
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->used_size_indices);
|
||||
}
|
||||
|
||||
TEST_F(ArrayLengthFromUniformTest, MultipleStorageBuffers) {
|
||||
@@ -286,8 +286,124 @@ fn main() {
|
||||
Run<InlinePointerLets, Simplify, ArrayLengthFromUniform>(src, data);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
EXPECT_TRUE(
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->needs_buffer_sizes);
|
||||
EXPECT_EQ(std::unordered_set<uint32_t>({0, 1, 2, 3, 4}),
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->used_size_indices);
|
||||
}
|
||||
|
||||
TEST_F(ArrayLengthFromUniformTest, MultipleUnusedStorageBuffers) {
|
||||
auto* src = R"(
|
||||
[[block]]
|
||||
struct SB1 {
|
||||
x : i32;
|
||||
arr1 : array<i32>;
|
||||
};
|
||||
[[block]]
|
||||
struct SB2 {
|
||||
x : i32;
|
||||
arr2 : array<vec4<f32>>;
|
||||
};
|
||||
[[block]]
|
||||
struct SB3 {
|
||||
x : i32;
|
||||
arr3 : array<vec4<f32>>;
|
||||
};
|
||||
[[block]]
|
||||
struct SB4 {
|
||||
x : i32;
|
||||
arr4 : array<vec4<f32>>;
|
||||
};
|
||||
[[block]]
|
||||
struct SB5 {
|
||||
x : i32;
|
||||
arr5 : array<vec4<f32>>;
|
||||
};
|
||||
|
||||
[[group(0), binding(2)]] var<storage, read> sb1 : SB1;
|
||||
[[group(1), binding(2)]] var<storage, read> sb2 : SB2;
|
||||
[[group(2), binding(2)]] var<storage, read> sb3 : SB3;
|
||||
[[group(3), binding(2)]] var<storage, read> sb4 : SB4;
|
||||
[[group(4), binding(2)]] var<storage, read> sb5 : SB5;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn main() {
|
||||
var len1 : u32 = arrayLength(&(sb1.arr1));
|
||||
var len3 : u32 = arrayLength(&(sb3.arr3));
|
||||
var x : u32 = (len1 + len3);
|
||||
}
|
||||
)";
|
||||
|
||||
auto* expect = R"(
|
||||
[[block]]
|
||||
struct tint_symbol {
|
||||
buffer_size : array<vec4<u32>, 1u>;
|
||||
};
|
||||
|
||||
[[group(0), binding(30)]] var<uniform> tint_symbol_1 : tint_symbol;
|
||||
|
||||
[[block]]
|
||||
struct SB1 {
|
||||
x : i32;
|
||||
arr1 : array<i32>;
|
||||
};
|
||||
|
||||
[[block]]
|
||||
struct SB2 {
|
||||
x : i32;
|
||||
arr2 : array<vec4<f32>>;
|
||||
};
|
||||
|
||||
[[block]]
|
||||
struct SB3 {
|
||||
x : i32;
|
||||
arr3 : array<vec4<f32>>;
|
||||
};
|
||||
|
||||
[[block]]
|
||||
struct SB4 {
|
||||
x : i32;
|
||||
arr4 : array<vec4<f32>>;
|
||||
};
|
||||
|
||||
[[block]]
|
||||
struct SB5 {
|
||||
x : i32;
|
||||
arr5 : array<vec4<f32>>;
|
||||
};
|
||||
|
||||
[[group(0), binding(2)]] var<storage, read> sb1 : SB1;
|
||||
|
||||
[[group(1), binding(2)]] var<storage, read> sb2 : SB2;
|
||||
|
||||
[[group(2), binding(2)]] var<storage, read> sb3 : SB3;
|
||||
|
||||
[[group(3), binding(2)]] var<storage, read> sb4 : SB4;
|
||||
|
||||
[[group(4), binding(2)]] var<storage, read> sb5 : SB5;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn main() {
|
||||
var len1 : u32 = ((tint_symbol_1.buffer_size[0u][0u] - 4u) / 4u);
|
||||
var len3 : u32 = ((tint_symbol_1.buffer_size[0u][2u] - 16u) / 16u);
|
||||
var x : u32 = (len1 + len3);
|
||||
}
|
||||
)";
|
||||
|
||||
ArrayLengthFromUniform::Config cfg({0, 30u});
|
||||
cfg.bindpoint_to_size_index.emplace(sem::BindingPoint{0, 2u}, 0);
|
||||
cfg.bindpoint_to_size_index.emplace(sem::BindingPoint{1u, 2u}, 1);
|
||||
cfg.bindpoint_to_size_index.emplace(sem::BindingPoint{2u, 2u}, 2);
|
||||
cfg.bindpoint_to_size_index.emplace(sem::BindingPoint{3u, 2u}, 3);
|
||||
cfg.bindpoint_to_size_index.emplace(sem::BindingPoint{4u, 2u}, 4);
|
||||
|
||||
DataMap data;
|
||||
data.Add<ArrayLengthFromUniform::Config>(std::move(cfg));
|
||||
|
||||
auto got =
|
||||
Run<InlinePointerLets, Simplify, ArrayLengthFromUniform>(src, data);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
EXPECT_EQ(std::unordered_set<uint32_t>({0, 2}),
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->used_size_indices);
|
||||
}
|
||||
|
||||
TEST_F(ArrayLengthFromUniformTest, NoArrayLengthCalls) {
|
||||
@@ -316,8 +432,8 @@ fn main() {
|
||||
Run<InlinePointerLets, Simplify, ArrayLengthFromUniform>(src, data);
|
||||
|
||||
EXPECT_EQ(src, str(got));
|
||||
EXPECT_FALSE(
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->needs_buffer_sizes);
|
||||
EXPECT_EQ(std::unordered_set<uint32_t>(),
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->used_size_indices);
|
||||
}
|
||||
|
||||
TEST_F(ArrayLengthFromUniformTest, MissingBindingPointToIndexMapping) {
|
||||
@@ -346,7 +462,37 @@ fn main() {
|
||||
}
|
||||
)";
|
||||
|
||||
auto* expect = "error: missing size index mapping for binding point (1,2)";
|
||||
auto* expect = R"(
|
||||
[[block]]
|
||||
struct tint_symbol {
|
||||
buffer_size : array<vec4<u32>, 1u>;
|
||||
};
|
||||
|
||||
[[group(0), binding(30)]] var<uniform> tint_symbol_1 : tint_symbol;
|
||||
|
||||
[[block]]
|
||||
struct SB1 {
|
||||
x : i32;
|
||||
arr1 : array<i32>;
|
||||
};
|
||||
|
||||
[[block]]
|
||||
struct SB2 {
|
||||
x : i32;
|
||||
arr2 : array<vec4<f32>>;
|
||||
};
|
||||
|
||||
[[group(0), binding(2)]] var<storage, read> sb1 : SB1;
|
||||
|
||||
[[group(1), binding(2)]] var<storage, read> sb2 : SB2;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn main() {
|
||||
var len1 : u32 = ((tint_symbol_1.buffer_size[0u][0u] - 4u) / 4u);
|
||||
var len2 : u32 = arrayLength(&(sb2.arr2));
|
||||
var x : u32 = (len1 + len2);
|
||||
}
|
||||
)";
|
||||
|
||||
ArrayLengthFromUniform::Config cfg({0, 30u});
|
||||
cfg.bindpoint_to_size_index.emplace(sem::BindingPoint{0, 2}, 0);
|
||||
@@ -358,6 +504,8 @@ fn main() {
|
||||
Run<InlinePointerLets, Simplify, ArrayLengthFromUniform>(src, data);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
EXPECT_EQ(std::unordered_set<uint32_t>({0}),
|
||||
got.data.Get<ArrayLengthFromUniform::Result>()->used_size_indices);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Reference in New Issue
Block a user