mirror of
https://github.com/encounter/dawn-cmake.git
synced 2025-12-11 22:44:04 +00:00
[hlsl] transform: Zero init arrays with a loop
If the array size is greater than a threshold. This is a work around for FXC stalling when initializing large arrays with a single zero-init assignment. Bug: tint:936 Fixed: tint:943 Fixed: tint:942 Change-Id: Ie93c8f373874b8d6d020d041fa48b38fb1352f71 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/56775 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Reviewed-by: Antonio Maiorano <amaiorano@google.com>
This commit is contained in:
committed by
Tint LUCI CQ
parent
b0455217fa
commit
b4ff73e250
@@ -66,6 +66,10 @@ Output Hlsl::Run(const Program* in, const DataMap&) {
|
||||
manager.Add<ExternalTextureTransform>();
|
||||
manager.Add<PromoteInitializersToConstVar>();
|
||||
manager.Add<PadArrayElements>();
|
||||
|
||||
ZeroInitWorkgroupMemory::Config zero_init_cfg;
|
||||
zero_init_cfg.init_arrays_with_loop_size_threshold = 32; // 8 scalars
|
||||
data.Add<ZeroInitWorkgroupMemory::Config>(zero_init_cfg);
|
||||
data.Add<CanonicalizeEntryPointIO::Config>(
|
||||
CanonicalizeEntryPointIO::BuiltinStyle::kStructMember);
|
||||
auto out = manager.Run(in, data);
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "src/utils/get_or_create.h"
|
||||
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory);
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory::Config);
|
||||
|
||||
namespace tint {
|
||||
namespace transform {
|
||||
@@ -32,14 +33,16 @@ namespace transform {
|
||||
struct ZeroInitWorkgroupMemory::State {
|
||||
/// The clone context
|
||||
CloneContext& ctx;
|
||||
/// The built statements
|
||||
ast::StatementList& stmts;
|
||||
/// The config
|
||||
Config cfg;
|
||||
|
||||
/// Zero() generates the statements required to zero initialize the workgroup
|
||||
/// storage expression of type `ty`.
|
||||
/// @param ty the expression type
|
||||
/// @param stmts the built statements
|
||||
/// @param get_expr a function that builds the AST nodes for the expression
|
||||
void Zero(const sem::Type* ty,
|
||||
ast::StatementList& stmts,
|
||||
const std::function<ast::Expression*()>& get_expr) {
|
||||
if (CanZero(ty)) {
|
||||
auto* var = get_expr();
|
||||
@@ -61,21 +64,32 @@ struct ZeroInitWorkgroupMemory::State {
|
||||
if (auto* str = ty->As<sem::Struct>()) {
|
||||
for (auto* member : str->Members()) {
|
||||
auto name = ctx.Clone(member->Declaration()->symbol());
|
||||
Zero(member->Type(),
|
||||
Zero(member->Type(), stmts,
|
||||
[&] { return ctx.dst->MemberAccessor(get_expr(), name); });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto* arr = ty->As<sem::Array>()) {
|
||||
// TODO(bclayton): If array sizes become pipeline-overridable then this
|
||||
// will need to emit code for a loop.
|
||||
// See https://github.com/gpuweb/gpuweb/pull/1792
|
||||
for (size_t i = 0; i < arr->Count(); i++) {
|
||||
Zero(arr->ElemType(), [&] {
|
||||
return ctx.dst->IndexAccessor(get_expr(),
|
||||
static_cast<ProgramBuilder::u32>(i));
|
||||
});
|
||||
if (ShouldEmitForLoop(arr)) {
|
||||
auto i = ctx.dst->Symbols().New("i");
|
||||
auto* i_decl = ctx.dst->Decl(ctx.dst->Var(i, ctx.dst->ty.i32()));
|
||||
auto* cond = ctx.dst->create<ast::BinaryExpression>(
|
||||
ast::BinaryOp::kLessThan, ctx.dst->Expr(i),
|
||||
ctx.dst->Expr(static_cast<int>(arr->Count())));
|
||||
auto* inc = ctx.dst->Assign(i, ctx.dst->Add(i, 1));
|
||||
ast::StatementList for_stmts;
|
||||
Zero(arr->ElemType(), for_stmts,
|
||||
[&] { return ctx.dst->IndexAccessor(get_expr(), i); });
|
||||
auto* body = ctx.dst->Block(for_stmts);
|
||||
stmts.emplace_back(ctx.dst->For(i_decl, cond, inc, body));
|
||||
} else {
|
||||
for (size_t i = 0; i < arr->Count(); i++) {
|
||||
Zero(arr->ElemType(), stmts, [&] {
|
||||
return ctx.dst->IndexAccessor(get_expr(),
|
||||
static_cast<ProgramBuilder::u32>(i));
|
||||
});
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -89,7 +103,7 @@ struct ZeroInitWorkgroupMemory::State {
|
||||
/// CanZero() returns false, then the type needs to be initialized by
|
||||
/// decomposing the initialization into multiple sub-initializations.
|
||||
/// @param ty the type to inspect
|
||||
static bool CanZero(const sem::Type* ty) {
|
||||
bool CanZero(const sem::Type* ty) {
|
||||
if (ty->Is<sem::Atomic>()) {
|
||||
return false;
|
||||
}
|
||||
@@ -101,21 +115,39 @@ struct ZeroInitWorkgroupMemory::State {
|
||||
}
|
||||
}
|
||||
if (auto* arr = ty->As<sem::Array>()) {
|
||||
if (!CanZero(arr->ElemType())) {
|
||||
if (ShouldEmitForLoop(arr) || !CanZero(arr->ElemType())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// @returns true if the array should be emitted as a for-loop instead of
|
||||
/// using zero-initializer statements.
|
||||
/// @param array the array
|
||||
bool ShouldEmitForLoop(const sem::Array* array) {
|
||||
// TODO(bclayton): If array sizes become pipeline-overridable then this
|
||||
// we need to return true for these arrays.
|
||||
// See https://github.com/gpuweb/gpuweb/pull/1792
|
||||
return (cfg.init_arrays_with_loop_size_threshold != 0) &&
|
||||
(array->SizeInBytes() >= cfg.init_arrays_with_loop_size_threshold);
|
||||
}
|
||||
};
|
||||
|
||||
ZeroInitWorkgroupMemory::ZeroInitWorkgroupMemory() = default;
|
||||
|
||||
ZeroInitWorkgroupMemory::~ZeroInitWorkgroupMemory() = default;
|
||||
|
||||
void ZeroInitWorkgroupMemory::Run(CloneContext& ctx, const DataMap&, DataMap&) {
|
||||
void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
|
||||
const DataMap& inputs,
|
||||
DataMap&) {
|
||||
auto& sem = ctx.src->Sem();
|
||||
|
||||
Config cfg;
|
||||
if (auto* c = inputs.Get<Config>()) {
|
||||
cfg = *c;
|
||||
}
|
||||
|
||||
for (auto* ast_func : ctx.src->AST().Functions()) {
|
||||
if (!ast_func->IsEntryPoint()) {
|
||||
continue;
|
||||
@@ -129,7 +161,7 @@ void ZeroInitWorkgroupMemory::Run(CloneContext& ctx, const DataMap&, DataMap&) {
|
||||
if (var->StorageClass() != ast::StorageClass::kWorkgroup) {
|
||||
continue;
|
||||
}
|
||||
State{ctx, stmts}.Zero(var->Type()->UnwrapRef(), [&] {
|
||||
State{ctx, cfg}.Zero(var->Type()->UnwrapRef(), stmts, [&] {
|
||||
auto var_name = ctx.Clone(var->Declaration()->symbol());
|
||||
return ctx.dst->Expr(var_name);
|
||||
});
|
||||
@@ -193,5 +225,11 @@ void ZeroInitWorkgroupMemory::Run(CloneContext& ctx, const DataMap&, DataMap&) {
|
||||
ctx.Clone();
|
||||
}
|
||||
|
||||
ZeroInitWorkgroupMemory::Config::Config() = default;
|
||||
ZeroInitWorkgroupMemory::Config::Config(const Config&) = default;
|
||||
ZeroInitWorkgroupMemory::Config::~Config() = default;
|
||||
ZeroInitWorkgroupMemory::Config& ZeroInitWorkgroupMemory::Config::operator=(
|
||||
const Config&) = default;
|
||||
|
||||
} // namespace transform
|
||||
} // namespace tint
|
||||
|
||||
@@ -26,6 +26,27 @@ namespace transform {
|
||||
class ZeroInitWorkgroupMemory
|
||||
: public Castable<ZeroInitWorkgroupMemory, Transform> {
|
||||
public:
|
||||
/// Configuration options for the transform
|
||||
struct Config : public Castable<Config, Data> {
|
||||
/// Constructor
|
||||
Config();
|
||||
|
||||
/// Copy constructor
|
||||
Config(const Config&);
|
||||
|
||||
/// Destructor
|
||||
~Config() override;
|
||||
|
||||
/// Assignment operator
|
||||
/// @returns this Config
|
||||
Config& operator=(const Config&);
|
||||
|
||||
/// If greater than 0, then arrays of at least this size in bytes will be
|
||||
/// zero initialized using a for loop. If 0, then the array is assigned a
|
||||
/// zero initialized array with a single statement.
|
||||
uint32_t init_arrays_with_loop_size_threshold = 0;
|
||||
};
|
||||
|
||||
/// Constructor
|
||||
ZeroInitWorkgroupMemory();
|
||||
|
||||
|
||||
@@ -558,6 +558,56 @@ fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
TEST_F(ZeroInitWorkgroupMemoryTest, WorkgroupArray_InitWithLoop) {
|
||||
auto* src = R"(
|
||||
struct S {
|
||||
a : array<i32, 3>; // size: 12, less than the loop threshold
|
||||
b : array<i32, 4>; // size: 16, equal to the loop threshold
|
||||
c : array<i32, 5>; // size: 20, greater than the loop threshold
|
||||
};
|
||||
|
||||
var<workgroup> w : S;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f() {
|
||||
ignore(w); // Initialization should be inserted above this statement
|
||||
}
|
||||
)";
|
||||
auto* expect = R"(
|
||||
struct S {
|
||||
a : array<i32, 3>;
|
||||
b : array<i32, 4>;
|
||||
c : array<i32, 5>;
|
||||
};
|
||||
|
||||
var<workgroup> w : S;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
w.a = array<i32, 3>();
|
||||
for(var i : i32; (i < 4); i = (i + 1)) {
|
||||
w.b[i] = i32();
|
||||
}
|
||||
for(var i_1 : i32; (i_1 < 5); i_1 = (i_1 + 1)) {
|
||||
w.c[i_1] = i32();
|
||||
}
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(w);
|
||||
}
|
||||
)";
|
||||
|
||||
ZeroInitWorkgroupMemory::Config cfg;
|
||||
cfg.init_arrays_with_loop_size_threshold = 16;
|
||||
|
||||
DataMap data;
|
||||
data.Add<ZeroInitWorkgroupMemory::Config>(cfg);
|
||||
auto got = Run<ZeroInitWorkgroupMemory>(src, data);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace transform
|
||||
} // namespace tint
|
||||
|
||||
Reference in New Issue
Block a user