transform: Optimize ZeroInitWorkgroupMemory for arrays

Spread the array zeroing across as many workgroup invocations as possible.

Bug: tint:910
Change-Id: I1cb5a6aaafd2a0a4093ea3b9797c173378bc5605
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60203
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Reviewed-by: David Neto <dneto@google.com>
This commit is contained in:
Ben Clayton 2021-07-30 14:08:06 +00:00 committed by Tint LUCI CQ
parent 669c57f3d1
commit 89a0bde59c
124 changed files with 1812 additions and 1596 deletions

View File

@ -72,9 +72,6 @@ Output Hlsl::Run(const Program* in, const DataMap& inputs) {
manager.Add<PromoteInitializersToConstVar>(); manager.Add<PromoteInitializersToConstVar>();
manager.Add<PadArrayElements>(); manager.Add<PadArrayElements>();
ZeroInitWorkgroupMemory::Config zero_init_cfg;
zero_init_cfg.init_arrays_with_loop_size_threshold = 32; // 8 scalars
data.Add<ZeroInitWorkgroupMemory::Config>(zero_init_cfg);
data.Add<CanonicalizeEntryPointIO::Config>( data.Add<CanonicalizeEntryPointIO::Config>(
CanonicalizeEntryPointIO::BuiltinStyle::kStructMember); CanonicalizeEntryPointIO::BuiltinStyle::kStructMember);
auto out = manager.Run(in, data); auto out = manager.Run(in, data);

View File

@ -38,7 +38,7 @@ fn main() {
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_1 : f32; [[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_1 : f32;
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_2 : f32; [[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_2 : f32;
if ((local_invocation_index == 0u)) { {
tint_symbol_1 = f32(); tint_symbol_1 = f32();
} }
workgroupBarrier(); workgroupBarrier();
@ -95,7 +95,7 @@ fn foo(a : f32, tint_symbol_3 : ptr<private, f32>, tint_symbol_4 : ptr<workgroup
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_5 : f32; [[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_5 : f32;
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_6 : f32; [[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_6 : f32;
if ((local_invocation_index == 0u)) { {
tint_symbol_5 = f32(); tint_symbol_5 = f32();
} }
workgroupBarrier(); workgroupBarrier();
@ -152,7 +152,7 @@ fn main() {
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_1 : f32; [[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_1 : f32;
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_2 : f32; [[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_2 : f32;
if ((local_invocation_index == 0u)) { {
tint_symbol_1 = f32(); tint_symbol_1 = f32();
} }
workgroupBarrier(); workgroupBarrier();

View File

@ -14,17 +14,21 @@
#include "src/transform/zero_init_workgroup_memory.h" #include "src/transform/zero_init_workgroup_memory.h"
#include <algorithm>
#include <map>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <vector>
#include "src/ast/workgroup_decoration.h"
#include "src/program_builder.h" #include "src/program_builder.h"
#include "src/sem/atomic_type.h" #include "src/sem/atomic_type.h"
#include "src/sem/function.h" #include "src/sem/function.h"
#include "src/sem/variable.h" #include "src/sem/variable.h"
#include "src/utils/get_or_create.h" #include "src/utils/get_or_create.h"
#include "src/utils/unique_vector.h"
TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory); TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory);
TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory::Config);
namespace tint { namespace tint {
namespace transform { namespace transform {
@ -33,152 +37,112 @@ namespace transform {
struct ZeroInitWorkgroupMemory::State { struct ZeroInitWorkgroupMemory::State {
/// The clone context /// The clone context
CloneContext& ctx; CloneContext& ctx;
/// The config
Config cfg;
/// Zero() generates the statements required to zero initialize the workgroup /// An alias to *ctx.dst
/// storage expression of type `ty`. ProgramBuilder& b = *ctx.dst;
/// @param ty the expression type
/// @param stmts the built statements /// The constant size of the workgroup. If 0, then #workgroup_size_expr should
/// @param get_expr a function that builds the AST nodes for the expression /// be used instead.
void Zero(const sem::Type* ty, uint32_t workgroup_size_const = 0;
ast::StatementList& stmts, /// The size of the workgroup as an expression generator. Use if
const std::function<ast::Expression*()>& get_expr) { /// #workgroup_size_const is 0.
if (CanZero(ty)) { std::function<ast::Expression*()> workgroup_size_expr;
auto* var = get_expr();
auto* zero_init = ctx.dst->Construct(CreateASTTypeFor(ctx, ty)); /// ArrayIndex represents a function on the local invocation index, of
stmts.emplace_back( /// the form: `array_index = (local_invocation_index % modulo) / division`
ctx.dst->create<ast::AssignmentStatement>(var, zero_init)); struct ArrayIndex {
return; /// The RHS of the modulus part of the expression
uint32_t modulo = 1;
/// The RHS of the division part of the expression
uint32_t division = 1;
/// Equality operator
/// @param i the ArrayIndex to compare to this ArrayIndex
/// @returns true if `i` and this ArrayIndex are equal
bool operator==(const ArrayIndex& i) const {
return modulo == i.modulo && division == i.division;
} }
if (auto* atomic = ty->As<sem::Atomic>()) { /// Hash function for the ArrayIndex type
auto* zero_init = struct Hasher {
ctx.dst->Construct(CreateASTTypeFor(ctx, atomic->Type())); /// @param i the ArrayIndex to calculate a hash for
auto* store = ctx.dst->Call("atomicStore", ctx.dst->AddressOf(get_expr()), /// @returns the hash value for the ArrayIndex `i`
zero_init); size_t operator()(const ArrayIndex& i) const {
stmts.emplace_back(ctx.dst->create<ast::CallStatement>(store)); return utils::Hash(i.modulo, i.division);
return;
}
if (auto* str = ty->As<sem::Struct>()) {
for (auto* member : str->Members()) {
auto name = ctx.Clone(member->Declaration()->symbol());
Zero(member->Type(), stmts,
[&] { return ctx.dst->MemberAccessor(get_expr(), name); });
} }
return; };
} };
if (auto* arr = ty->As<sem::Array>()) { /// A list of unique ArrayIndex
if (ShouldEmitForLoop(arr)) { using ArrayIndices = UniqueVector<ArrayIndex, ArrayIndex::Hasher>;
auto i = ctx.dst->Symbols().New("i");
auto* i_decl = ctx.dst->Decl(ctx.dst->Var(i, ctx.dst->ty.i32()));
auto* cond = ctx.dst->create<ast::BinaryExpression>(
ast::BinaryOp::kLessThan, ctx.dst->Expr(i),
ctx.dst->Expr(static_cast<int>(arr->Count())));
auto* inc = ctx.dst->Assign(i, ctx.dst->Add(i, 1));
ast::StatementList for_stmts;
Zero(arr->ElemType(), for_stmts,
[&] { return ctx.dst->IndexAccessor(get_expr(), i); });
auto* body = ctx.dst->Block(for_stmts);
stmts.emplace_back(ctx.dst->For(i_decl, cond, inc, body));
} else {
for (size_t i = 0; i < arr->Count(); i++) {
Zero(arr->ElemType(), stmts, [&] {
return ctx.dst->IndexAccessor(get_expr(),
static_cast<ProgramBuilder::u32>(i));
});
}
}
return;
}
TINT_UNREACHABLE(Transform, ctx.dst->Diagnostics()) /// Expression holds information about an expression that is being built for a
<< "could not zero workgroup type: " << ty->type_name(); /// statement will zero workgroup values.
} struct Expression {
/// The AST expression node
ast::Expression* expr = nullptr;
/// The number of iterations required to zero the value
uint32_t num_iterations = 0;
/// All array indices used by this expression
ArrayIndices array_indices;
};
/// @returns true if the type `ty` can be zeroed with a simple zero-value /// Statement holds information about a statement that will zero workgroup
/// expression in the form of a type constructor without operands. If /// values.
/// CanZero() returns false, then the type needs to be initialized by struct Statement {
/// decomposing the initialization into multiple sub-initializations. /// The AST statement node
/// @param ty the type to inspect ast::Statement* stmt;
bool CanZero(const sem::Type* ty) { /// The number of iterations required to zero the value
if (ty->Is<sem::Atomic>()) { uint32_t num_iterations;
return false; /// All array indices used by this statement
} ArrayIndices array_indices;
if (auto* str = ty->As<sem::Struct>()) { };
for (auto* member : str->Members()) {
if (!CanZero(member->Type())) {
return false;
}
}
}
if (auto* arr = ty->As<sem::Array>()) {
if (ShouldEmitForLoop(arr) || !CanZero(arr->ElemType())) {
return false;
}
}
return true;
}
/// @returns true if the array should be emitted as a for-loop instead of /// All statements that zero workgroup memory
/// using zero-initializer statements. std::vector<Statement> statements;
/// @param array the array
bool ShouldEmitForLoop(const sem::Array* array) {
// TODO(bclayton): If array sizes become pipeline-overridable then this
// we need to return true for these arrays.
// See https://github.com/gpuweb/gpuweb/pull/1792
return (cfg.init_arrays_with_loop_size_threshold != 0) &&
(array->Size() >= cfg.init_arrays_with_loop_size_threshold);
}
};
ZeroInitWorkgroupMemory::ZeroInitWorkgroupMemory() = default; /// A map of ArrayIndex to the name reserved for the `let` declaration of that
/// index.
std::unordered_map<ArrayIndex, Symbol, ArrayIndex::Hasher> array_index_names;
ZeroInitWorkgroupMemory::~ZeroInitWorkgroupMemory() = default; /// Constructor
/// @param c the CloneContext used for the transform
explicit State(CloneContext& c) : ctx(c) {}
void ZeroInitWorkgroupMemory::Run(CloneContext& ctx, /// Run inserts the workgroup memory zero-initialization logic at the top of
const DataMap& inputs, /// the given function
DataMap&) { /// @param fn a compute shader entry point function
auto& sem = ctx.src->Sem(); void Run(ast::Function* fn) {
auto& sem = ctx.src->Sem();
Config cfg; CalculateWorkgroupSize(
if (auto* c = inputs.Get<Config>()) { ast::GetDecoration<ast::WorkgroupDecoration>(fn->decorations()));
cfg = *c;
}
for (auto* ast_func : ctx.src->AST().Functions()) { // Generate a list of statements to zero initialize each of the
if (!ast_func->IsEntryPoint()) { // workgroup storage variables used by `fn`. This will populate #statements.
continue; auto* func = sem.Get(fn);
}
// Generate a list of statements to zero initialize each of the workgroup
// storage variables.
ast::StatementList stmts;
auto* func = sem.Get(ast_func);
for (auto* var : func->ReferencedModuleVariables()) { for (auto* var : func->ReferencedModuleVariables()) {
if (var->StorageClass() != ast::StorageClass::kWorkgroup) { if (var->StorageClass() == ast::StorageClass::kWorkgroup) {
continue; BuildZeroingStatements(
var->Type()->UnwrapRef(), [&](uint32_t num_values) {
auto var_name = ctx.Clone(var->Declaration()->symbol());
return Expression{b.Expr(var_name), num_values, ArrayIndices{}};
});
} }
State{ctx, cfg}.Zero(var->Type()->UnwrapRef(), stmts, [&] {
auto var_name = ctx.Clone(var->Declaration()->symbol());
return ctx.dst->Expr(var_name);
});
} }
if (stmts.empty()) { if (statements.empty()) {
continue; // No workgroup variables to initialize. return; // No workgroup variables to initialize.
} }
// Scan the entry point for an existing local_invocation_index builtin // Scan the entry point for an existing local_invocation_index builtin
// parameter // parameter
ast::Expression* local_index = nullptr; std::function<ast::Expression*()> local_index;
for (auto* param : ast_func->params()) { for (auto* param : fn->params()) {
if (auto* builtin = ast::GetDecoration<ast::BuiltinDecoration>( if (auto* builtin = ast::GetDecoration<ast::BuiltinDecoration>(
param->decorations())) { param->decorations())) {
if (builtin->value() == ast::Builtin::kLocalInvocationIndex) { if (builtin->value() == ast::Builtin::kLocalInvocationIndex) {
local_index = ctx.dst->Expr(ctx.Clone(param->symbol())); local_index = [=] { return b.Expr(ctx.Clone(param->symbol())); };
break; break;
} }
} }
@ -188,9 +152,11 @@ void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
if (auto* builtin = ast::GetDecoration<ast::BuiltinDecoration>( if (auto* builtin = ast::GetDecoration<ast::BuiltinDecoration>(
member->Declaration()->decorations())) { member->Declaration()->decorations())) {
if (builtin->value() == ast::Builtin::kLocalInvocationIndex) { if (builtin->value() == ast::Builtin::kLocalInvocationIndex) {
auto* param_expr = ctx.dst->Expr(ctx.Clone(param->symbol())); local_index = [=] {
auto member_name = ctx.Clone(member->Declaration()->symbol()); auto* param_expr = b.Expr(ctx.Clone(param->symbol()));
local_index = ctx.dst->MemberAccessor(param_expr, member_name); auto member_name = ctx.Clone(member->Declaration()->symbol());
return b.MemberAccessor(param_expr, member_name);
};
break; break;
} }
} }
@ -199,37 +165,283 @@ void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
} }
if (!local_index) { if (!local_index) {
// No existing local index parameter. Append one to the entry point. // No existing local index parameter. Append one to the entry point.
auto* param = ctx.dst->Param( auto* param =
ctx.dst->Symbols().New("local_invocation_index"), ctx.dst->ty.u32(), b.Param(b.Symbols().New("local_invocation_index"), b.ty.u32(),
{ctx.dst->Builtin(ast::Builtin::kLocalInvocationIndex)}); {b.Builtin(ast::Builtin::kLocalInvocationIndex)});
ctx.InsertBack(ast_func->params(), param); ctx.InsertBack(fn->params(), param);
local_index = ctx.dst->Expr(param->symbol()); local_index = [=] { return b.Expr(param->symbol()); };
} }
// We only want to zero-initialize the workgroup memory with the first // Take the zeroing statements and bin them by the number of iterations
// shader invocation. Construct an if statement that holds stmts. // required to zero the workgroup data. We then emit these in blocks,
// TODO(crbug.com/tint/910): We should attempt to optimize this for arrays. // possibly wrapped in if-statements or for-loops.
auto* if_zero_local_index = ctx.dst->create<ast::BinaryExpression>( std::unordered_map<uint32_t, std::vector<Statement>>
ast::BinaryOp::kEqual, local_index, ctx.dst->Expr(0u)); stmts_by_num_iterations;
auto* if_stmt = ctx.dst->If(if_zero_local_index, ctx.dst->Block(stmts)); std::vector<uint32_t> num_sorted_iterations;
for (auto& s : statements) {
auto& stmts = stmts_by_num_iterations[s.num_iterations];
if (stmts.empty()) {
num_sorted_iterations.emplace_back(s.num_iterations);
}
stmts.emplace_back(s);
}
std::sort(num_sorted_iterations.begin(), num_sorted_iterations.end());
// Insert this if-statement at the top of the entry point. // Loop over the statements, grouped by num_iterations.
ctx.InsertFront(ast_func->body()->statements(), if_stmt); for (auto num_iterations : num_sorted_iterations) {
auto& stmts = stmts_by_num_iterations[num_iterations];
// Append a single workgroup barrier after the if statement. // Gather all the array indices used by all the statements in the block.
ctx.InsertFront( ArrayIndices array_indices;
ast_func->body()->statements(), for (auto& s : stmts) {
ctx.dst->create<ast::CallStatement>(ctx.dst->Call("workgroupBarrier"))); for (auto& idx : s.array_indices) {
array_indices.add(idx);
}
}
// Determine the block type used to emit these statements.
if (workgroup_size_const == 0 || num_iterations > workgroup_size_const) {
// Either the workgroup size is dynamic, or smaller than num_iterations.
// In either case, we need to generate a for loop to ensure we
// initialize all the array elements.
//
// for (var idx : u32 = local_index;
// idx < num_iterations;
// idx += workgroup_size) {
// ...
// }
auto idx = b.Symbols().New("idx");
auto* init = b.Decl(b.Var(idx, b.ty.u32(), local_index()));
auto* cond = b.create<ast::BinaryExpression>(
ast::BinaryOp::kLessThan, b.Expr(idx), b.Expr(num_iterations));
auto* cont = b.Assign(
idx, b.Add(idx, workgroup_size_const ? b.Expr(workgroup_size_const)
: workgroup_size_expr()));
auto block = DeclareArrayIndices(num_iterations, array_indices,
[&] { return b.Expr(idx); });
for (auto& s : stmts) {
block.emplace_back(s.stmt);
}
auto* for_loop = b.For(init, cond, cont, b.Block(block));
ctx.InsertFront(fn->body()->statements(), for_loop);
} else if (num_iterations < workgroup_size_const) {
// Workgroup size is a known constant, but is greater than
// num_iterations. Emit an if statement:
//
// if (local_index < num_iterations) {
// ...
// }
auto* cond = b.create<ast::BinaryExpression>(
ast::BinaryOp::kLessThan, local_index(), b.Expr(num_iterations));
auto block = DeclareArrayIndices(num_iterations, array_indices,
[&] { return b.Expr(local_index()); });
for (auto& s : stmts) {
block.emplace_back(s.stmt);
}
auto* if_stmt = b.If(cond, b.Block(block));
ctx.InsertFront(fn->body()->statements(), if_stmt);
} else {
// Workgroup size exactly equals num_iterations.
// No need for any conditionals. Just emit a basic block:
//
// {
// ...
// }
auto block = DeclareArrayIndices(num_iterations, array_indices,
[&] { return b.Expr(local_index()); });
for (auto& s : stmts) {
block.emplace_back(s.stmt);
}
ctx.InsertFront(fn->body()->statements(), b.Block(block));
}
}
// Append a single workgroup barrier after the zero initialization.
ctx.InsertFront(fn->body()->statements(),
b.create<ast::CallStatement>(b.Call("workgroupBarrier")));
} }
/// BuildZeroingExpr is a function that builds a sub-expression used to zero
/// workgroup values. `num_values` is the number of elements that the
/// expression will be used to zero. Returns the expression.
using BuildZeroingExpr = std::function<Expression(uint32_t num_values)>;
/// BuildZeroingStatements() generates the statements required to zero
/// initialize the workgroup storage expression of type `ty`.
/// @param ty the expression type
/// @param get_expr a function that builds the AST nodes for the expression.
void BuildZeroingStatements(const sem::Type* ty,
const BuildZeroingExpr& get_expr) {
if (CanTriviallyZero(ty)) {
auto var = get_expr(1u);
auto* zero_init = b.Construct(CreateASTTypeFor(ctx, ty));
statements.emplace_back(Statement{b.Assign(var.expr, zero_init),
var.num_iterations, var.array_indices});
return;
}
if (auto* atomic = ty->As<sem::Atomic>()) {
auto* zero_init = b.Construct(CreateASTTypeFor(ctx, atomic->Type()));
auto expr = get_expr(1u);
auto* store = b.Call("atomicStore", b.AddressOf(expr.expr), zero_init);
statements.emplace_back(Statement{b.create<ast::CallStatement>(store),
expr.num_iterations,
expr.array_indices});
return;
}
if (auto* str = ty->As<sem::Struct>()) {
for (auto* member : str->Members()) {
auto name = ctx.Clone(member->Declaration()->symbol());
BuildZeroingStatements(member->Type(), [&](uint32_t num_values) {
auto s = get_expr(num_values);
return Expression{b.MemberAccessor(s.expr, name), s.num_iterations,
s.array_indices};
});
}
return;
}
if (auto* arr = ty->As<sem::Array>()) {
BuildZeroingStatements(arr->ElemType(), [&](uint32_t num_values) {
// num_values is the number of values to zero for the element type.
// The number of iterations required to zero the array and its elements
// is:
// `num_values * arr->Count()`
// The index for this array is:
// `(idx % modulo) / division`
auto modulo = num_values * arr->Count();
auto division = num_values;
auto a = get_expr(modulo);
auto array_indices = a.array_indices;
array_indices.add(ArrayIndex{modulo, division});
auto index =
utils::GetOrCreate(array_index_names, ArrayIndex{modulo, division},
[&] { return b.Symbols().New("i"); });
return Expression{b.IndexAccessor(a.expr, index), a.num_iterations,
array_indices};
});
return;
}
TINT_UNREACHABLE(Transform, b.Diagnostics())
<< "could not zero workgroup type: " << ty->type_name();
}
/// DeclareArrayIndices returns a list of statements that contain the `let`
/// declarations for all of the ArrayIndices.
/// @param num_iterations the number of iterations for the block
/// @param array_indices the list of array indices to generate `let`
/// declarations for
/// @param iteration a function that returns the index of the current
/// iteration.
/// @returns the list of `let` statements that declare the array indices
ast::StatementList DeclareArrayIndices(
uint32_t num_iterations,
const ArrayIndices& array_indices,
const std::function<ast::Expression*()>& iteration) {
ast::StatementList stmts;
std::map<Symbol, ArrayIndex> indices_by_name;
for (auto index : array_indices) {
auto name = array_index_names.at(index);
auto* mod =
(num_iterations > index.modulo)
? b.create<ast::BinaryExpression>(
ast::BinaryOp::kModulo, iteration(), b.Expr(index.modulo))
: iteration();
auto* div = (index.division != 1u) ? b.Div(mod, index.division) : mod;
auto* decl = b.Decl(b.Const(name, b.ty.u32(), div));
stmts.emplace_back(decl);
}
return stmts;
}
/// CalculateWorkgroupSize initializes the members #workgroup_size_const and
/// #workgroup_size_expr with the linear workgroup size.
/// @param deco the workgroup decoration applied to the entry point function
void CalculateWorkgroupSize(const ast::WorkgroupDecoration* deco) {
bool is_signed = false;
workgroup_size_const = 1u;
workgroup_size_expr = nullptr;
for (auto* expr : deco->values()) {
if (!expr) {
continue;
}
auto* sem = ctx.src->Sem().Get(expr);
if (auto c = sem->ConstantValue()) {
if (c.ElementType()->Is<sem::I32>()) {
workgroup_size_const *= static_cast<uint32_t>(c.Elements()[0].i32);
continue;
} else if (c.ElementType()->Is<sem::U32>()) {
workgroup_size_const *= c.Elements()[0].u32;
continue;
}
}
// Constant value could not be found. Build expression instead.
workgroup_size_expr = [this, expr, size = workgroup_size_expr] {
auto* e = ctx.Clone(expr);
if (ctx.src->TypeOf(expr)->UnwrapRef()->Is<sem::I32>()) {
e = b.Construct<ProgramBuilder::u32>(e);
}
return size ? b.Mul(size(), e) : e;
};
}
if (workgroup_size_expr) {
if (workgroup_size_const != 1) {
// Fold workgroup_size_const in to workgroup_size_expr
workgroup_size_expr = [this, is_signed,
const_size = workgroup_size_const,
expr_size = workgroup_size_expr] {
return is_signed
? b.Mul(expr_size(), static_cast<int32_t>(const_size))
: b.Mul(expr_size(), const_size);
};
}
// Indicate that workgroup_size_expr should be used instead of the
// constant.
workgroup_size_const = 0;
}
}
/// @returns true if a variable with store type `ty` can be efficiently zeroed
/// by assignment of a type constructor without operands. If
/// CanTriviallyZero() returns false, then the type needs to be
/// initialized by decomposing the initialization into multiple
/// sub-initializations.
/// @param ty the type to inspect
bool CanTriviallyZero(const sem::Type* ty) {
if (ty->Is<sem::Atomic>()) {
return false;
}
if (auto* str = ty->As<sem::Struct>()) {
for (auto* member : str->Members()) {
if (!CanTriviallyZero(member->Type())) {
return false;
}
}
}
if (ty->Is<sem::Array>()) {
return false;
}
// True for all other storable types
return true;
}
};
ZeroInitWorkgroupMemory::ZeroInitWorkgroupMemory() = default;
ZeroInitWorkgroupMemory::~ZeroInitWorkgroupMemory() = default;
void ZeroInitWorkgroupMemory::Run(CloneContext& ctx, const DataMap&, DataMap&) {
for (auto* fn : ctx.src->AST().Functions()) {
if (fn->pipeline_stage() == ast::PipelineStage::kCompute) {
State{ctx}.Run(fn);
}
}
ctx.Clone(); ctx.Clone();
} }
ZeroInitWorkgroupMemory::Config::Config() = default;
ZeroInitWorkgroupMemory::Config::Config(const Config&) = default;
ZeroInitWorkgroupMemory::Config::~Config() = default;
ZeroInitWorkgroupMemory::Config& ZeroInitWorkgroupMemory::Config::operator=(
const Config&) = default;
} // namespace transform } // namespace transform
} // namespace tint } // namespace tint

View File

@ -26,27 +26,6 @@ namespace transform {
class ZeroInitWorkgroupMemory class ZeroInitWorkgroupMemory
: public Castable<ZeroInitWorkgroupMemory, Transform> { : public Castable<ZeroInitWorkgroupMemory, Transform> {
public: public:
/// Configuration options for the transform
struct Config : public Castable<Config, Data> {
/// Constructor
Config();
/// Copy constructor
Config(const Config&);
/// Destructor
~Config() override;
/// Assignment operator
/// @returns this Config
Config& operator=(const Config&);
/// If greater than 0, then arrays of at least this size in bytes will be
/// zero initialized using a for loop. If 0, then the array is assigned a
/// zero initialized array with a single statement.
uint32_t init_arrays_with_loop_size_threshold = 0;
};
/// Constructor /// Constructor
ZeroInitWorkgroupMemory(); ZeroInitWorkgroupMemory();

View File

@ -76,7 +76,7 @@ TEST_F(ZeroInitWorkgroupMemoryTest, SingleWorkgroupVar_ExistingLocalIndex) {
var<workgroup> v : i32; var<workgroup> v : i32;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) { fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(v); // Initialization should be inserted above this statement ignore(v); // Initialization should be inserted above this statement
} }
)"; )";
@ -84,8 +84,8 @@ fn f([[builtin(local_invocation_index)]] idx : u32) {
var<workgroup> v : i32; var<workgroup> v : i32;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) { fn f([[builtin(local_invocation_index)]] local_idx : u32) {
if ((idx == 0u)) { {
v = i32(); v = i32();
} }
workgroupBarrier(); workgroupBarrier();
@ -104,7 +104,7 @@ TEST_F(ZeroInitWorkgroupMemoryTest,
var<workgroup> v : i32; var<workgroup> v : i32;
struct Params { struct Params {
[[builtin(local_invocation_index)]] idx : u32; [[builtin(local_invocation_index)]] local_idx : u32;
}; };
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
@ -117,12 +117,12 @@ var<workgroup> v : i32;
struct Params { struct Params {
[[builtin(local_invocation_index)]] [[builtin(local_invocation_index)]]
idx : u32; local_idx : u32;
}; };
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f(params : Params) { fn f(params : Params) {
if ((params.idx == 0u)) { {
v = i32(); v = i32();
} }
workgroupBarrier(); workgroupBarrier();
@ -149,7 +149,7 @@ var<workgroup> v : i32;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) { {
v = i32(); v = i32();
} }
workgroupBarrier(); workgroupBarrier();
@ -162,7 +162,8 @@ fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
EXPECT_EQ(expect, str(got)); EXPECT_EQ(expect, str(got));
} }
TEST_F(ZeroInitWorkgroupMemoryTest, MultipleWorkgroupVar_ExistingLocalIndex) { TEST_F(ZeroInitWorkgroupMemoryTest,
MultipleWorkgroupVar_ExistingLocalIndex_Size1) {
auto* src = R"( auto* src = R"(
struct S { struct S {
x : i32; x : i32;
@ -176,7 +177,7 @@ var<workgroup> b : S;
var<workgroup> c : array<S, 32>; var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) { fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(a); // Initialization should be inserted above this statement ignore(a); // Initialization should be inserted above this statement
ignore(b); ignore(b);
ignore(c); ignore(c);
@ -195,11 +196,246 @@ var<workgroup> b : S;
var<workgroup> c : array<S, 32>; var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) { fn f([[builtin(local_invocation_index)]] local_idx : u32) {
if ((idx == 0u)) { {
a = i32(); a = i32();
b = S(); b.x = i32();
c = array<S, 32>(); }
for(var idx : u32 = local_idx; (idx < 8u); idx = (idx + 1u)) {
let i : u32 = idx;
b.y[i] = i32();
}
for(var idx_1 : u32 = local_idx; (idx_1 < 32u); idx_1 = (idx_1 + 1u)) {
let i_1 : u32 = idx_1;
c[i_1].x = i32();
}
for(var idx_2 : u32 = local_idx; (idx_2 < 256u); idx_2 = (idx_2 + 1u)) {
let i_2 : u32 = (idx_2 / 8u);
let i : u32 = (idx_2 % 8u);
c[i_2].y[i] = i32();
}
workgroupBarrier();
ignore(a);
ignore(b);
ignore(c);
}
)";
auto got = Run<ZeroInitWorkgroupMemory>(src);
EXPECT_EQ(expect, str(got));
}
TEST_F(ZeroInitWorkgroupMemoryTest,
MultipleWorkgroupVar_ExistingLocalIndex_Size_2_3) {
auto* src = R"(
struct S {
x : i32;
y : array<i32, 8>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(2, 3)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(a); // Initialization should be inserted above this statement
ignore(b);
ignore(c);
}
)";
auto* expect = R"(
struct S {
x : i32;
y : array<i32, 8>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(2, 3)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
if ((local_idx < 1u)) {
a = i32();
b.x = i32();
}
for(var idx : u32 = local_idx; (idx < 8u); idx = (idx + 6u)) {
let i : u32 = idx;
b.y[i] = i32();
}
for(var idx_1 : u32 = local_idx; (idx_1 < 32u); idx_1 = (idx_1 + 6u)) {
let i_1 : u32 = idx_1;
c[i_1].x = i32();
}
for(var idx_2 : u32 = local_idx; (idx_2 < 256u); idx_2 = (idx_2 + 6u)) {
let i_2 : u32 = (idx_2 / 8u);
let i : u32 = (idx_2 % 8u);
c[i_2].y[i] = i32();
}
workgroupBarrier();
ignore(a);
ignore(b);
ignore(c);
}
)";
auto got = Run<ZeroInitWorkgroupMemory>(src);
EXPECT_EQ(expect, str(got));
}
TEST_F(ZeroInitWorkgroupMemoryTest,
MultipleWorkgroupVar_ExistingLocalIndex_Size_2_3_X) {
auto* src = R"(
struct S {
x : i32;
y : array<i32, 8>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[override(1)]] let X : i32;
[[stage(compute), workgroup_size(2, 3, X)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(a); // Initialization should be inserted above this statement
ignore(b);
ignore(c);
}
)";
auto* expect =
R"(
struct S {
x : i32;
y : array<i32, 8>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[override(1)]] let X : i32;
[[stage(compute), workgroup_size(2, 3, X)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
for(var idx : u32 = local_idx; (idx < 1u); idx = (idx + (u32(X) * 6u))) {
a = i32();
b.x = i32();
}
for(var idx_1 : u32 = local_idx; (idx_1 < 8u); idx_1 = (idx_1 + (u32(X) * 6u))) {
let i : u32 = idx_1;
b.y[i] = i32();
}
for(var idx_2 : u32 = local_idx; (idx_2 < 32u); idx_2 = (idx_2 + (u32(X) * 6u))) {
let i_1 : u32 = idx_2;
c[i_1].x = i32();
}
for(var idx_3 : u32 = local_idx; (idx_3 < 256u); idx_3 = (idx_3 + (u32(X) * 6u))) {
let i_2 : u32 = (idx_3 / 8u);
let i : u32 = (idx_3 % 8u);
c[i_2].y[i] = i32();
}
workgroupBarrier();
ignore(a);
ignore(b);
ignore(c);
}
)";
auto got = Run<ZeroInitWorkgroupMemory>(src);
EXPECT_EQ(expect, str(got));
}
TEST_F(ZeroInitWorkgroupMemoryTest,
MultipleWorkgroupVar_ExistingLocalIndex_Size_5u_X_10u) {
auto* src = R"(
struct S {
x : array<array<i32, 8>, 10>;
y : array<i32, 8>;
z : array<array<array<i32, 8>, 10>, 20>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[override(1)]] let X : u32;
[[stage(compute), workgroup_size(5u, X, 10u)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(a); // Initialization should be inserted above this statement
ignore(b);
ignore(c);
}
)";
auto* expect =
R"(
struct S {
x : array<array<i32, 8>, 10>;
y : array<i32, 8>;
z : array<array<array<i32, 8>, 10>, 20>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[override(1)]] let X : u32;
[[stage(compute), workgroup_size(5u, X, 10u)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
for(var idx : u32 = local_idx; (idx < 1u); idx = (idx + (X * 50u))) {
a = i32();
}
for(var idx_1 : u32 = local_idx; (idx_1 < 8u); idx_1 = (idx_1 + (X * 50u))) {
let i_1 : u32 = idx_1;
b.y[i_1] = i32();
}
for(var idx_2 : u32 = local_idx; (idx_2 < 80u); idx_2 = (idx_2 + (X * 50u))) {
let i : u32 = (idx_2 / 8u);
let i_1 : u32 = (idx_2 % 8u);
b.x[i][i_1] = i32();
}
for(var idx_3 : u32 = local_idx; (idx_3 < 256u); idx_3 = (idx_3 + (X * 50u))) {
let i_4 : u32 = (idx_3 / 8u);
let i_1 : u32 = (idx_3 % 8u);
c[i_4].y[i_1] = i32();
}
for(var idx_4 : u32 = local_idx; (idx_4 < 1600u); idx_4 = (idx_4 + (X * 50u))) {
let i_2 : u32 = (idx_4 / 80u);
let i : u32 = ((idx_4 % 80u) / 8u);
let i_1 : u32 = (idx_4 % 8u);
b.z[i_2][i][i_1] = i32();
}
for(var idx_5 : u32 = local_idx; (idx_5 < 2560u); idx_5 = (idx_5 + (X * 50u))) {
let i_3 : u32 = (idx_5 / 80u);
let i : u32 = ((idx_5 % 80u) / 8u);
let i_1 : u32 = (idx_5 % 8u);
c[i_3].x[i][i_1] = i32();
}
for(var idx_6 : u32 = local_idx; (idx_6 < 51200u); idx_6 = (idx_6 + (X * 50u))) {
let i_5 : u32 = (idx_6 / 1600u);
let i_2 : u32 = ((idx_6 % 1600u) / 80u);
let i : u32 = ((idx_6 % 80u) / 8u);
let i_1 : u32 = (idx_6 % 8u);
c[i_5].z[i_2][i][i_1] = i32();
} }
workgroupBarrier(); workgroupBarrier();
ignore(a); ignore(a);
@ -247,10 +483,22 @@ var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>, [[builtin(local_invocation_index)]] local_invocation_index : u32) { fn f([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>, [[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) { {
a = i32(); a = i32();
b = S(); b.x = i32();
c = array<S, 32>(); }
for(var idx : u32 = local_invocation_index; (idx < 8u); idx = (idx + 1u)) {
let i : u32 = idx;
b.y[i] = i32();
}
for(var idx_1 : u32 = local_invocation_index; (idx_1 < 32u); idx_1 = (idx_1 + 1u)) {
let i_1 : u32 = idx_1;
c[i_1].x = i32();
}
for(var idx_2 : u32 = local_invocation_index; (idx_2 < 256u); idx_2 = (idx_2 + 1u)) {
let i_2 : u32 = (idx_2 / 8u);
let i : u32 = (idx_2 % 8u);
c[i_2].y[i] = i32();
} }
workgroupBarrier(); workgroupBarrier();
ignore(a); ignore(a);
@ -283,12 +531,12 @@ fn f1() {
ignore(c); ignore(c);
} }
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1, 2, 3)]]
fn f2([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>) { fn f2([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>) {
ignore(b); // Initialization should be inserted above this statement ignore(b); // Initialization should be inserted above this statement
} }
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(4, 5, 6)]]
fn f3() { fn f3() {
ignore(c); // Initialization should be inserted above this statement ignore(c); // Initialization should be inserted above this statement
ignore(a); ignore(a);
@ -308,30 +556,50 @@ var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f1([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn f1([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) { {
a = i32(); a = i32();
c = array<S, 32>(); }
for(var idx : u32 = local_invocation_index; (idx < 32u); idx = (idx + 1u)) {
let i : u32 = idx;
c[i].x = i32();
}
for(var idx_1 : u32 = local_invocation_index; (idx_1 < 256u); idx_1 = (idx_1 + 1u)) {
let i_1 : u32 = (idx_1 / 8u);
let i_2 : u32 = (idx_1 % 8u);
c[i_1].y[i_2] = i32();
} }
workgroupBarrier(); workgroupBarrier();
ignore(a); ignore(a);
ignore(c); ignore(c);
} }
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1, 2, 3)]]
fn f2([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>, [[builtin(local_invocation_index)]] local_invocation_index_1 : u32) { fn f2([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>, [[builtin(local_invocation_index)]] local_invocation_index_1 : u32) {
if ((local_invocation_index_1 == 0u)) { if ((local_invocation_index_1 < 1u)) {
b = S(); b.x = i32();
}
for(var idx_2 : u32 = local_invocation_index_1; (idx_2 < 8u); idx_2 = (idx_2 + 6u)) {
let i_3 : u32 = idx_2;
b.y[i_3] = i32();
} }
workgroupBarrier(); workgroupBarrier();
ignore(b); ignore(b);
} }
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(4, 5, 6)]]
fn f3([[builtin(local_invocation_index)]] local_invocation_index_2 : u32) { fn f3([[builtin(local_invocation_index)]] local_invocation_index_2 : u32) {
if ((local_invocation_index_2 == 0u)) { if ((local_invocation_index_2 < 1u)) {
c = array<S, 32>();
a = i32(); a = i32();
} }
if ((local_invocation_index_2 < 32u)) {
let i_4 : u32 = local_invocation_index_2;
c[i_4].x = i32();
}
for(var idx_3 : u32 = local_invocation_index_2; (idx_3 < 256u); idx_3 = (idx_3 + 120u)) {
let i_5 : u32 = (idx_3 / 8u);
let i_6 : u32 = (idx_3 % 8u);
c[i_5].y[i_6] = i32();
}
workgroupBarrier(); workgroupBarrier();
ignore(c); ignore(c);
ignore(a); ignore(a);
@ -356,7 +624,7 @@ fn call_use_v() {
} }
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) { fn f([[builtin(local_invocation_index)]] local_idx : u32) {
call_use_v(); // Initialization should be inserted above this statement call_use_v(); // Initialization should be inserted above this statement
} }
)"; )";
@ -372,8 +640,8 @@ fn call_use_v() {
} }
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) { fn f([[builtin(local_invocation_index)]] local_idx : u32) {
if ((idx == 0u)) { {
v = i32(); v = i32();
} }
workgroupBarrier(); workgroupBarrier();
@ -404,7 +672,7 @@ var<workgroup> u : atomic<u32>;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) { {
atomicStore(&(i), i32()); atomicStore(&(i), i32());
atomicStore(&(u), u32()); atomicStore(&(u), u32());
} }
@ -449,7 +717,7 @@ var<workgroup> w : S;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) { {
w.a = i32(); w.a = i32();
atomicStore(&(w.i), i32()); atomicStore(&(w.i), i32());
w.b = f32(); w.b = f32();
@ -480,11 +748,9 @@ var<workgroup> w : array<atomic<u32>, 4>;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) { for(var idx : u32 = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
atomicStore(&(w[0u]), u32()); let i : u32 = idx;
atomicStore(&(w[1u]), u32()); atomicStore(&(w[i]), u32());
atomicStore(&(w[2u]), u32());
atomicStore(&(w[3u]), u32());
} }
workgroupBarrier(); workgroupBarrier();
ignore(w); ignore(w);
@ -526,27 +792,13 @@ var<workgroup> w : array<S, 4>;
[[stage(compute), workgroup_size(1)]] [[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) { fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) { for(var idx : u32 = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
w[0u].a = i32(); let i_1 : u32 = idx;
atomicStore(&(w[0u].i), i32()); w[i_1].a = i32();
w[0u].b = f32(); atomicStore(&(w[i_1].i), i32());
atomicStore(&(w[0u].u), u32()); w[i_1].b = f32();
w[0u].c = u32(); atomicStore(&(w[i_1].u), u32());
w[1u].a = i32(); w[i_1].c = u32();
atomicStore(&(w[1u].i), i32());
w[1u].b = f32();
atomicStore(&(w[1u].u), u32());
w[1u].c = u32();
w[2u].a = i32();
atomicStore(&(w[2u].i), i32());
w[2u].b = f32();
atomicStore(&(w[2u].u), u32());
w[2u].c = u32();
w[3u].a = i32();
atomicStore(&(w[3u].i), i32());
w[3u].b = f32();
atomicStore(&(w[3u].u), u32());
w[3u].c = u32();
} }
workgroupBarrier(); workgroupBarrier();
ignore(w); ignore(w);
@ -558,56 +810,6 @@ fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
EXPECT_EQ(expect, str(got)); EXPECT_EQ(expect, str(got));
} }
TEST_F(ZeroInitWorkgroupMemoryTest, WorkgroupArray_InitWithLoop) {
auto* src = R"(
struct S {
a : array<i32, 3>; // size: 12, less than the loop threshold
b : array<i32, 4>; // size: 16, equal to the loop threshold
c : array<i32, 5>; // size: 20, greater than the loop threshold
};
var<workgroup> w : S;
[[stage(compute), workgroup_size(1)]]
fn f() {
ignore(w); // Initialization should be inserted above this statement
}
)";
auto* expect = R"(
struct S {
a : array<i32, 3>;
b : array<i32, 4>;
c : array<i32, 5>;
};
var<workgroup> w : S;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
w.a = array<i32, 3>();
for(var i : i32; (i < 4); i = (i + 1)) {
w.b[i] = i32();
}
for(var i_1 : i32; (i_1 < 5); i_1 = (i_1 + 1)) {
w.c[i_1] = i32();
}
}
workgroupBarrier();
ignore(w);
}
)";
ZeroInitWorkgroupMemory::Config cfg;
cfg.init_arrays_with_loop_size_threshold = 16;
DataMap data;
data.Add<ZeroInitWorkgroupMemory::Config>(cfg);
auto got = Run<ZeroInitWorkgroupMemory>(src, data);
EXPECT_EQ(expect, str(got));
}
} // namespace } // namespace
} // namespace transform } // namespace transform
} // namespace tint } // namespace tint

View File

@ -16,11 +16,10 @@ struct tint_symbol_2 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void f(tint_symbol_2 tint_symbol_1) { void f(tint_symbol_2 tint_symbol_1) {
const uint local_invocation_index = tint_symbol_1.local_invocation_index; const uint local_invocation_index = tint_symbol_1.local_invocation_index;
if ((local_invocation_index == 0u)) { {
{ for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
for(int i = 0; (i < 64); i = (i + 1)) { const uint i = idx;
s.data[i] = 0; s.data[i] = 0;
}
} }
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -15,13 +15,13 @@ struct Result {
}; };
kernel void f(uint local_invocation_index [[thread_index_in_threadgroup]], constant UBO& ubo [[buffer(0)]], device Result& result [[buffer(1)]]) { kernel void f(uint local_invocation_index [[thread_index_in_threadgroup]], constant UBO& ubo [[buffer(0)]], device Result& result [[buffer(1)]]) {
threadgroup S tint_symbol_2; threadgroup S tint_symbol_1;
if ((local_invocation_index == 0u)) { for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
S const tint_symbol_1 = {}; uint const i = idx;
tint_symbol_2 = tint_symbol_1; tint_symbol_1.data.arr[i] = int();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
result.out = tint_symbol_2.data.arr[ubo.dynamic_idx]; result.out = tint_symbol_1.data.arr[ubo.dynamic_idx];
return; return;
} }

View File

@ -1,7 +1,7 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 38 ; Bound: 52
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
@ -18,6 +18,7 @@
OpName %s "s" OpName %s "s"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
OpName %f "f" OpName %f "f"
OpName %idx "idx"
OpDecorate %UBO Block OpDecorate %UBO Block
OpMemberDecorate %UBO 0 Offset 0 OpMemberDecorate %UBO 0 Offset 0
OpDecorate %ubo NonWritable OpDecorate %ubo NonWritable
@ -47,30 +48,51 @@
%tint_symbol = OpVariable %_ptr_Input_uint Input %tint_symbol = OpVariable %_ptr_Input_uint Input
%void = OpTypeVoid %void = OpTypeVoid
%16 = OpTypeFunction %void %16 = OpTypeFunction %void
%uint_0 = OpConstant %uint 0 %_ptr_Function_uint = OpTypePointer Function %uint
%23 = OpConstantNull %uint
%bool = OpTypeBool %bool = OpTypeBool
%26 = OpConstantNull %S %uint_0 = OpConstant %uint 0
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
%38 = OpConstantNull %int
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
%_ptr_Uniform_int = OpTypePointer Uniform %int %_ptr_Uniform_int = OpTypePointer Uniform %int
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
%f = OpFunction %void None %16 %f = OpFunction %void None %16
%19 = OpLabel %19 = OpLabel
%idx = OpVariable %_ptr_Function_uint Function %23
%20 = OpLoad %uint %tint_symbol %20 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %20 %uint_0 OpStore %idx %20
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpStore %s %26
OpBranch %24 OpBranch %24
%24 = OpLabel %24 = OpLabel
OpLoopMerge %25 %26 None
OpBranch %27
%27 = OpLabel
%29 = OpLoad %uint %idx
%30 = OpULessThan %bool %29 %uint_64
%28 = OpLogicalNot %bool %30
OpSelectionMerge %32 None
OpBranchConditional %28 %33 %32
%33 = OpLabel
OpBranch %25
%32 = OpLabel
%34 = OpLoad %uint %idx
%37 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34
OpStore %37 %38
OpBranch %26
%26 = OpLabel
%39 = OpLoad %uint %idx
%41 = OpIAdd %uint %39 %uint_1
OpStore %idx %41
OpBranch %24
%25 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%31 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0 %46 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
%33 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0 %48 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
%34 = OpLoad %int %33 %49 = OpLoad %int %48
%36 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34 %50 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %49
%37 = OpLoad %int %36 %51 = OpLoad %int %50
OpStore %31 %37 OpStore %46 %51
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -16,11 +16,10 @@ struct tint_symbol_2 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void f(tint_symbol_2 tint_symbol_1) { void f(tint_symbol_2 tint_symbol_1) {
const uint local_invocation_index = tint_symbol_1.local_invocation_index; const uint local_invocation_index = tint_symbol_1.local_invocation_index;
if ((local_invocation_index == 0u)) { {
{ for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
for(int i = 0; (i < 64); i = (i + 1)) { const uint i = idx;
s.data[i] = 0; s.data[i] = 0;
}
} }
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -15,14 +15,14 @@ struct Result {
}; };
kernel void f(uint local_invocation_index [[thread_index_in_threadgroup]], constant UBO& ubo [[buffer(0)]], device Result& result [[buffer(1)]]) { kernel void f(uint local_invocation_index [[thread_index_in_threadgroup]], constant UBO& ubo [[buffer(0)]], device Result& result [[buffer(1)]]) {
threadgroup S tint_symbol_2; threadgroup S tint_symbol_1;
if ((local_invocation_index == 0u)) { for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
S const tint_symbol_1 = {}; uint const i = idx;
tint_symbol_2 = tint_symbol_1; tint_symbol_1.data.arr[i] = int();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
tint_symbol_2.data.arr[ubo.dynamic_idx] = 1; tint_symbol_1.data.arr[ubo.dynamic_idx] = 1;
result.out = tint_symbol_2.data.arr[3]; result.out = tint_symbol_1.data.arr[3];
return; return;
} }

View File

@ -1,7 +1,7 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 41 ; Bound: 55
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
@ -18,6 +18,7 @@
OpName %s "s" OpName %s "s"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
OpName %f "f" OpName %f "f"
OpName %idx "idx"
OpDecorate %UBO Block OpDecorate %UBO Block
OpMemberDecorate %UBO 0 Offset 0 OpMemberDecorate %UBO 0 Offset 0
OpDecorate %ubo NonWritable OpDecorate %ubo NonWritable
@ -47,34 +48,55 @@
%tint_symbol = OpVariable %_ptr_Input_uint Input %tint_symbol = OpVariable %_ptr_Input_uint Input
%void = OpTypeVoid %void = OpTypeVoid
%16 = OpTypeFunction %void %16 = OpTypeFunction %void
%uint_0 = OpConstant %uint 0 %_ptr_Function_uint = OpTypePointer Function %uint
%23 = OpConstantNull %uint
%bool = OpTypeBool %bool = OpTypeBool
%26 = OpConstantNull %S %uint_0 = OpConstant %uint 0
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
%38 = OpConstantNull %int
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%_ptr_Uniform_int = OpTypePointer Uniform %int %_ptr_Uniform_int = OpTypePointer Uniform %int
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
%int_3 = OpConstant %int 3 %int_3 = OpConstant %int 3
%f = OpFunction %void None %16 %f = OpFunction %void None %16
%19 = OpLabel %19 = OpLabel
%idx = OpVariable %_ptr_Function_uint Function %23
%20 = OpLoad %uint %tint_symbol %20 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %20 %uint_0 OpStore %idx %20
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpStore %s %26
OpBranch %24 OpBranch %24
%24 = OpLabel %24 = OpLabel
OpLoopMerge %25 %26 None
OpBranch %27
%27 = OpLabel
%29 = OpLoad %uint %idx
%30 = OpULessThan %bool %29 %uint_64
%28 = OpLogicalNot %bool %30
OpSelectionMerge %32 None
OpBranchConditional %28 %33 %32
%33 = OpLabel
OpBranch %25
%32 = OpLabel
%34 = OpLoad %uint %idx
%37 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34
OpStore %37 %38
OpBranch %26
%26 = OpLabel
%39 = OpLoad %uint %idx
%41 = OpIAdd %uint %39 %uint_1
OpStore %idx %41
OpBranch %24
%25 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%31 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0 %46 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
%32 = OpLoad %int %31 %47 = OpLoad %int %46
%34 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %32 %48 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %47
OpStore %34 %int_1 OpStore %48 %int_1
%37 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0 %51 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
%39 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %int_3 %53 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %int_3
%40 = OpLoad %int %39 %54 = OpLoad %int %53
OpStore %37 %40 OpStore %51 %54
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -59,24 +59,12 @@ void main(tint_symbol_1 tint_symbol) {
const uint3 local_id = tint_symbol.local_id; const uint3 local_id = tint_symbol.local_id;
const uint3 global_id = tint_symbol.global_id; const uint3 global_id = tint_symbol.global_id;
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
{ for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 256u)) {
for(int i = 0; (i < 64); i = (i + 1)) { const uint i = (idx / 64u);
{ const uint i_1 = (idx % 64u);
for(int i_1 = 0; (i_1 < 64); i_1 = (i_1 + 1)) { mm_Asub[i][i_1] = 0.0f;
mm_Asub[i][i_1] = 0.0f; mm_Bsub[i][i_1] = 0.0f;
}
}
}
}
{
for(int i_2 = 0; (i_2 < 64); i_2 = (i_2 + 1)) {
{
for(int i_3 = 0; (i_3 < 64); i_3 = (i_3 + 1)) {
mm_Bsub[i_2][i_3] = 0.0f;
}
}
}
} }
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -51,13 +51,13 @@ void mm_write(constant Uniforms& uniforms, device Matrix& resultMatrix, uint row
} }
kernel void tint_symbol(uint3 local_id [[thread_position_in_threadgroup]], uint3 global_id [[thread_position_in_grid]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Uniforms& uniforms [[buffer(3)]], const device Matrix& firstMatrix [[buffer(0)]], const device Matrix& secondMatrix [[buffer(1)]], device Matrix& resultMatrix [[buffer(2)]]) { kernel void tint_symbol(uint3 local_id [[thread_position_in_threadgroup]], uint3 global_id [[thread_position_in_grid]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Uniforms& uniforms [[buffer(3)]], const device Matrix& firstMatrix [[buffer(0)]], const device Matrix& secondMatrix [[buffer(1)]], device Matrix& resultMatrix [[buffer(2)]]) {
threadgroup tint_array_wrapper tint_symbol_4; threadgroup tint_array_wrapper tint_symbol_2;
threadgroup tint_array_wrapper tint_symbol_5; threadgroup tint_array_wrapper tint_symbol_3;
if ((local_invocation_index == 0u)) { for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 256u)) {
tint_array_wrapper const tint_symbol_2 = {.arr={}}; uint const i = (idx / 64u);
tint_symbol_4 = tint_symbol_2; uint const i_1 = (idx % 64u);
tint_array_wrapper const tint_symbol_3 = {.arr={}}; tint_symbol_2.arr[i].arr[i_1] = float();
tint_symbol_5 = tint_symbol_3; tint_symbol_3.arr[i].arr[i_1] = float();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint const tileRow = (local_id.y * RowPerThread); uint const tileRow = (local_id.y * RowPerThread);
@ -80,23 +80,23 @@ kernel void tint_symbol(uint3 local_id [[thread_position_in_threadgroup]], uint3
for(uint innerCol = 0u; (innerCol < ColPerThreadA); innerCol = (innerCol + 1u)) { for(uint innerCol = 0u; (innerCol < ColPerThreadA); innerCol = (innerCol + 1u)) {
uint const inputRow = (tileRow + innerRow); uint const inputRow = (tileRow + innerRow);
uint const inputCol = (tileColA + innerCol); uint const inputCol = (tileColA + innerCol);
tint_symbol_4.arr[inputRow].arr[inputCol] = mm_readA(uniforms, firstMatrix, (globalRow + innerRow), ((t * TileInner) + inputCol)); tint_symbol_2.arr[inputRow].arr[inputCol] = mm_readA(uniforms, firstMatrix, (globalRow + innerRow), ((t * TileInner) + inputCol));
} }
} }
for(uint innerRow = 0u; (innerRow < RowPerThreadB); innerRow = (innerRow + 1u)) { for(uint innerRow = 0u; (innerRow < RowPerThreadB); innerRow = (innerRow + 1u)) {
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) { for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
uint const inputRow = (tileRowB + innerRow); uint const inputRow = (tileRowB + innerRow);
uint const inputCol = (tileCol + innerCol); uint const inputCol = (tileCol + innerCol);
tint_symbol_5.arr[innerCol].arr[inputCol] = mm_readB(uniforms, secondMatrix, ((t * TileInner) + inputRow), (globalCol + innerCol)); tint_symbol_3.arr[innerCol].arr[inputCol] = mm_readB(uniforms, secondMatrix, ((t * TileInner) + inputRow), (globalCol + innerCol));
} }
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
for(uint k = 0u; (k < TileInner); k = (k + 1u)) { for(uint k = 0u; (k < TileInner); k = (k + 1u)) {
for(uint inner = 0u; (inner < ColPerThread); inner = (inner + 1u)) { for(uint inner = 0u; (inner < ColPerThread); inner = (inner + 1u)) {
BCached.arr[inner] = tint_symbol_5.arr[k].arr[(tileCol + inner)]; BCached.arr[inner] = tint_symbol_3.arr[k].arr[(tileCol + inner)];
} }
for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) { for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
ACached = tint_symbol_4.arr[(tileRow + innerRow)].arr[k]; ACached = tint_symbol_2.arr[(tileRow + innerRow)].arr[k];
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) { for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
uint const index = ((innerRow * ColPerThread) + innerCol); uint const index = ((innerRow * ColPerThread) + innerCol);
acc.arr[index] = (acc.arr[index] + (ACached * BCached.arr[innerCol])); acc.arr[index] = (acc.arr[index] + (ACached * BCached.arr[innerCol]));

View File

@ -1,7 +1,7 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 356 ; Bound: 372
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
@ -38,6 +38,7 @@
OpName %col_1 "col" OpName %col_1 "col"
OpName %value "value" OpName %value "value"
OpName %main "main" OpName %main "main"
OpName %idx "idx"
OpName %acc "acc" OpName %acc "acc"
OpName %ACached "ACached" OpName %ACached "ACached"
OpName %BCached "BCached" OpName %BCached "BCached"
@ -114,20 +115,21 @@
%void = OpTypeVoid %void = OpTypeVoid
%75 = OpTypeFunction %void %uint %uint %float %75 = OpTypeFunction %void %uint %uint %float
%98 = OpTypeFunction %void %98 = OpTypeFunction %void
%105 = OpConstantNull %_arr__arr_float_TileAOuter_TileAOuter %_ptr_Function_uint = OpTypePointer Function %uint
%104 = OpConstantNull %uint
%uint_4096 = OpConstant %uint 4096
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%121 = OpConstantNull %float
%uint_256 = OpConstant %uint 256
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%uint_16 = OpConstant %uint 16 %uint_16 = OpConstant %uint 16
%_arr_float_uint_16 = OpTypeArray %float %uint_16 %_arr_float_uint_16 = OpTypeArray %float %uint_16
%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16 %_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16
%129 = OpConstantNull %_arr_float_uint_16 %149 = OpConstantNull %_arr_float_uint_16
%_ptr_Function_float = OpTypePointer Function %float %_ptr_Function_float = OpTypePointer Function %float
%132 = OpConstantNull %float
%_arr_float_RowPerThread = OpTypeArray %float %RowPerThread %_arr_float_RowPerThread = OpTypeArray %float %RowPerThread
%_ptr_Function__arr_float_RowPerThread = OpTypePointer Function %_arr_float_RowPerThread %_ptr_Function__arr_float_RowPerThread = OpTypePointer Function %_arr_float_RowPerThread
%136 = OpConstantNull %_arr_float_RowPerThread %155 = OpConstantNull %_arr_float_RowPerThread
%_ptr_Function_uint = OpTypePointer Function %uint
%139 = OpConstantNull %uint
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%mm_readA = OpFunction %float None %25 %mm_readA = OpFunction %float None %25
%row = OpFunctionParameter %uint %row = OpFunctionParameter %uint
%col = OpFunctionParameter %uint %col = OpFunctionParameter %uint
@ -218,377 +220,399 @@
OpFunctionEnd OpFunctionEnd
%main = OpFunction %void None %98 %main = OpFunction %void None %98
%100 = OpLabel %100 = OpLabel
%acc = OpVariable %_ptr_Function__arr_float_uint_16 Function %129 %idx = OpVariable %_ptr_Function_uint Function %104
%ACached = OpVariable %_ptr_Function_float Function %132 %acc = OpVariable %_ptr_Function__arr_float_uint_16 Function %149
%BCached = OpVariable %_ptr_Function__arr_float_RowPerThread Function %136 %ACached = OpVariable %_ptr_Function_float Function %121
%index = OpVariable %_ptr_Function_uint Function %139 %BCached = OpVariable %_ptr_Function__arr_float_RowPerThread Function %155
%t = OpVariable %_ptr_Function_uint Function %139 %index = OpVariable %_ptr_Function_uint Function %104
%innerRow = OpVariable %_ptr_Function_uint Function %139 %t = OpVariable %_ptr_Function_uint Function %104
%innerCol = OpVariable %_ptr_Function_uint Function %139 %innerRow = OpVariable %_ptr_Function_uint Function %104
%innerRow_0 = OpVariable %_ptr_Function_uint Function %139 %innerCol = OpVariable %_ptr_Function_uint Function %104
%innerCol_0 = OpVariable %_ptr_Function_uint Function %139 %innerRow_0 = OpVariable %_ptr_Function_uint Function %104
%k = OpVariable %_ptr_Function_uint Function %139 %innerCol_0 = OpVariable %_ptr_Function_uint Function %104
%inner = OpVariable %_ptr_Function_uint Function %139 %k = OpVariable %_ptr_Function_uint Function %104
%innerRow_1 = OpVariable %_ptr_Function_uint Function %139 %inner = OpVariable %_ptr_Function_uint Function %104
%innerCol_1 = OpVariable %_ptr_Function_uint Function %139 %innerRow_1 = OpVariable %_ptr_Function_uint Function %104
%innerRow_2 = OpVariable %_ptr_Function_uint Function %139 %innerCol_1 = OpVariable %_ptr_Function_uint Function %104
%innerCol_2 = OpVariable %_ptr_Function_uint Function %139 %innerRow_2 = OpVariable %_ptr_Function_uint Function %104
%innerCol_2 = OpVariable %_ptr_Function_uint Function %104
%101 = OpLoad %uint %tint_symbol_2 %101 = OpLoad %uint %tint_symbol_2
%102 = OpIEqual %bool %101 %uint_0 OpStore %idx %101
OpSelectionMerge %103 None OpBranch %105
OpBranchConditional %102 %104 %103 %105 = OpLabel
%104 = OpLabel OpLoopMerge %106 %107 None
OpStore %mm_Asub %105 OpBranch %108
OpStore %mm_Bsub %105 %108 = OpLabel
OpBranch %103 %110 = OpLoad %uint %idx
%103 = OpLabel %112 = OpULessThan %bool %110 %uint_4096
%109 = OpLogicalNot %bool %112
OpSelectionMerge %113 None
OpBranchConditional %109 %114 %113
%114 = OpLabel
OpBranch %106
%113 = OpLabel
%115 = OpLoad %uint %idx
%116 = OpUDiv %uint %115 %TileAOuter
%117 = OpLoad %uint %idx
%118 = OpUMod %uint %117 %TileAOuter
%120 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %116 %118
OpStore %120 %121
%122 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %116 %118
OpStore %122 %121
OpBranch %107
%107 = OpLabel
%123 = OpLoad %uint %idx
%125 = OpIAdd %uint %123 %uint_256
OpStore %idx %125
OpBranch %105
%106 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%108 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1 %128 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
%109 = OpLoad %uint %108 %129 = OpLoad %uint %128
%110 = OpIMul %uint %109 %RowPerThread %130 = OpIMul %uint %129 %RowPerThread
%111 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0 %131 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
%112 = OpLoad %uint %111 %132 = OpLoad %uint %131
%113 = OpIMul %uint %112 %RowPerThread %133 = OpIMul %uint %132 %RowPerThread
%114 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_1 %134 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_1
%115 = OpLoad %uint %114 %135 = OpLoad %uint %134
%116 = OpIMul %uint %115 %RowPerThread %136 = OpIMul %uint %135 %RowPerThread
%117 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0 %137 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
%118 = OpLoad %uint %117 %138 = OpLoad %uint %137
%119 = OpIMul %uint %118 %RowPerThread %139 = OpIMul %uint %138 %RowPerThread
%120 = OpAccessChain %_ptr_Uniform_uint %uniforms %uint_1 %140 = OpAccessChain %_ptr_Uniform_uint %uniforms %uint_1
%121 = OpLoad %uint %120 %141 = OpLoad %uint %140
%122 = OpISub %uint %121 %uint_1 %142 = OpISub %uint %141 %uint_1
%123 = OpUDiv %uint %122 %TileAOuter %143 = OpUDiv %uint %142 %TileAOuter
%124 = OpIAdd %uint %123 %uint_1 %144 = OpIAdd %uint %143 %uint_1
OpStore %index %uint_0 OpStore %index %uint_0
OpBranch %140 OpBranch %157
%140 = OpLabel %157 = OpLabel
OpLoopMerge %141 %142 None OpLoopMerge %158 %159 None
OpBranch %143 OpBranch %160
%143 = OpLabel %160 = OpLabel
%145 = OpLoad %uint %index %162 = OpLoad %uint %index
%146 = OpIMul %uint %RowPerThread %RowPerThread %163 = OpIMul %uint %RowPerThread %RowPerThread
%147 = OpULessThan %bool %145 %146 %164 = OpULessThan %bool %162 %163
%144 = OpLogicalNot %bool %147 %161 = OpLogicalNot %bool %164
OpSelectionMerge %148 None OpSelectionMerge %165 None
OpBranchConditional %144 %149 %148 OpBranchConditional %161 %166 %165
%149 = OpLabel
OpBranch %141
%148 = OpLabel
%150 = OpLoad %uint %index
%151 = OpAccessChain %_ptr_Function_float %acc %150
OpStore %151 %float_0
OpBranch %142
%142 = OpLabel
%152 = OpLoad %uint %index
%153 = OpIAdd %uint %152 %uint_1
OpStore %index %153
OpBranch %140
%141 = OpLabel
%154 = OpUDiv %uint %TileAOuter %uint_16
%155 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
%156 = OpLoad %uint %155
%157 = OpIMul %uint %156 %154
%158 = OpUDiv %uint %TileAOuter %uint_16
%159 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
%160 = OpLoad %uint %159
%161 = OpIMul %uint %160 %158
OpStore %t %uint_0
OpBranch %163
%163 = OpLabel
OpLoopMerge %164 %165 None
OpBranch %166
%166 = OpLabel %166 = OpLabel
%168 = OpLoad %uint %t OpBranch %158
%169 = OpULessThan %bool %168 %124 %165 = OpLabel
%167 = OpLogicalNot %bool %169 %167 = OpLoad %uint %index
OpSelectionMerge %170 None %168 = OpAccessChain %_ptr_Function_float %acc %167
OpBranchConditional %167 %171 %170 OpStore %168 %float_0
%171 = OpLabel OpBranch %159
OpBranch %164 %159 = OpLabel
%170 = OpLabel %169 = OpLoad %uint %index
OpStore %innerRow %uint_0 %170 = OpIAdd %uint %169 %uint_1
OpBranch %173 OpStore %index %170
%173 = OpLabel OpBranch %157
OpLoopMerge %174 %175 None %158 = OpLabel
OpBranch %176 %171 = OpUDiv %uint %TileAOuter %uint_16
%176 = OpLabel %172 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
%178 = OpLoad %uint %innerRow %173 = OpLoad %uint %172
%179 = OpULessThan %bool %178 %RowPerThread %174 = OpIMul %uint %173 %171
%177 = OpLogicalNot %bool %179 %175 = OpUDiv %uint %TileAOuter %uint_16
OpSelectionMerge %180 None %176 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
OpBranchConditional %177 %181 %180 %177 = OpLoad %uint %176
%181 = OpLabel %178 = OpIMul %uint %177 %175
OpBranch %174 OpStore %t %uint_0
OpBranch %180
%180 = OpLabel %180 = OpLabel
OpStore %innerCol %uint_0 OpLoopMerge %181 %182 None
OpBranch %183 OpBranch %183
%183 = OpLabel %183 = OpLabel
OpLoopMerge %184 %185 None %185 = OpLoad %uint %t
OpBranch %186 %186 = OpULessThan %bool %185 %144
%186 = OpLabel %184 = OpLogicalNot %bool %186
%188 = OpLoad %uint %innerCol OpSelectionMerge %187 None
%189 = OpULessThan %bool %188 %154 OpBranchConditional %184 %188 %187
%187 = OpLogicalNot %bool %189 %188 = OpLabel
OpSelectionMerge %190 None OpBranch %181
OpBranchConditional %187 %191 %190 %187 = OpLabel
%191 = OpLabel OpStore %innerRow %uint_0
OpBranch %184 OpBranch %190
%190 = OpLabel %190 = OpLabel
%192 = OpLoad %uint %innerRow OpLoopMerge %191 %192 None
%193 = OpIAdd %uint %110 %192 OpBranch %193
%194 = OpLoad %uint %innerCol %193 = OpLabel
%195 = OpIAdd %uint %157 %194 %195 = OpLoad %uint %innerRow
%197 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %193 %195 %196 = OpULessThan %bool %195 %RowPerThread
%199 = OpLoad %uint %innerRow %194 = OpLogicalNot %bool %196
%200 = OpIAdd %uint %116 %199 OpSelectionMerge %197 None
%201 = OpLoad %uint %t OpBranchConditional %194 %198 %197
%202 = OpIMul %uint %201 %TileAOuter %198 = OpLabel
%203 = OpIAdd %uint %202 %195 OpBranch %191
%198 = OpFunctionCall %float %mm_readA %200 %203 %197 = OpLabel
OpStore %197 %198 OpStore %innerCol %uint_0
OpBranch %185 OpBranch %200
%185 = OpLabel %200 = OpLabel
%204 = OpLoad %uint %innerCol OpLoopMerge %201 %202 None
%205 = OpIAdd %uint %204 %uint_1 OpBranch %203
OpStore %innerCol %205 %203 = OpLabel
OpBranch %183 %205 = OpLoad %uint %innerCol
%184 = OpLabel %206 = OpULessThan %bool %205 %171
OpBranch %175 %204 = OpLogicalNot %bool %206
%175 = OpLabel OpSelectionMerge %207 None
%206 = OpLoad %uint %innerRow OpBranchConditional %204 %208 %207
%207 = OpIAdd %uint %206 %uint_1 %208 = OpLabel
OpStore %innerRow %207 OpBranch %201
OpBranch %173 %207 = OpLabel
%174 = OpLabel %209 = OpLoad %uint %innerRow
%210 = OpIAdd %uint %130 %209
%211 = OpLoad %uint %innerCol
%212 = OpIAdd %uint %174 %211
%213 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %210 %212
%215 = OpLoad %uint %innerRow
%216 = OpIAdd %uint %136 %215
%217 = OpLoad %uint %t
%218 = OpIMul %uint %217 %TileAOuter
%219 = OpIAdd %uint %218 %212
%214 = OpFunctionCall %float %mm_readA %216 %219
OpStore %213 %214
OpBranch %202
%202 = OpLabel
%220 = OpLoad %uint %innerCol
%221 = OpIAdd %uint %220 %uint_1
OpStore %innerCol %221
OpBranch %200
%201 = OpLabel
OpBranch %192
%192 = OpLabel
%222 = OpLoad %uint %innerRow
%223 = OpIAdd %uint %222 %uint_1
OpStore %innerRow %223
OpBranch %190
%191 = OpLabel
OpStore %innerRow_0 %uint_0 OpStore %innerRow_0 %uint_0
OpBranch %209 OpBranch %225
%209 = OpLabel %225 = OpLabel
OpLoopMerge %210 %211 None OpLoopMerge %226 %227 None
OpBranch %212 OpBranch %228
%212 = OpLabel %228 = OpLabel
%214 = OpLoad %uint %innerRow_0 %230 = OpLoad %uint %innerRow_0
%215 = OpULessThan %bool %214 %158 %231 = OpULessThan %bool %230 %175
%213 = OpLogicalNot %bool %215 %229 = OpLogicalNot %bool %231
OpSelectionMerge %216 None OpSelectionMerge %232 None
OpBranchConditional %213 %217 %216 OpBranchConditional %229 %233 %232
%217 = OpLabel %233 = OpLabel
OpBranch %210 OpBranch %226
%216 = OpLabel %232 = OpLabel
OpStore %innerCol_0 %uint_0 OpStore %innerCol_0 %uint_0
OpBranch %219 OpBranch %235
%219 = OpLabel %235 = OpLabel
OpLoopMerge %220 %221 None OpLoopMerge %236 %237 None
OpBranch %222 OpBranch %238
%222 = OpLabel %238 = OpLabel
%224 = OpLoad %uint %innerCol_0
%225 = OpULessThan %bool %224 %RowPerThread
%223 = OpLogicalNot %bool %225
OpSelectionMerge %226 None
OpBranchConditional %223 %227 %226
%227 = OpLabel
OpBranch %220
%226 = OpLabel
%228 = OpLoad %uint %innerRow_0
%229 = OpIAdd %uint %161 %228
%230 = OpLoad %uint %innerCol_0
%231 = OpIAdd %uint %113 %230
%232 = OpLoad %uint %innerCol_0
%233 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %232 %231
%235 = OpLoad %uint %t
%236 = OpIMul %uint %235 %TileAOuter
%237 = OpIAdd %uint %236 %229
%238 = OpLoad %uint %innerCol_0
%239 = OpIAdd %uint %119 %238
%234 = OpFunctionCall %float %mm_readB %237 %239
OpStore %233 %234
OpBranch %221
%221 = OpLabel
%240 = OpLoad %uint %innerCol_0 %240 = OpLoad %uint %innerCol_0
%241 = OpIAdd %uint %240 %uint_1 %241 = OpULessThan %bool %240 %RowPerThread
OpStore %innerCol_0 %241 %239 = OpLogicalNot %bool %241
OpBranch %219 OpSelectionMerge %242 None
%220 = OpLabel OpBranchConditional %239 %243 %242
OpBranch %211 %243 = OpLabel
%211 = OpLabel OpBranch %236
%242 = OpLoad %uint %innerRow_0 %242 = OpLabel
%243 = OpIAdd %uint %242 %uint_1 %244 = OpLoad %uint %innerRow_0
OpStore %innerRow_0 %243 %245 = OpIAdd %uint %178 %244
OpBranch %209 %246 = OpLoad %uint %innerCol_0
%210 = OpLabel %247 = OpIAdd %uint %133 %246
%248 = OpLoad %uint %innerCol_0
%249 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %248 %247
%251 = OpLoad %uint %t
%252 = OpIMul %uint %251 %TileAOuter
%253 = OpIAdd %uint %252 %245
%254 = OpLoad %uint %innerCol_0
%255 = OpIAdd %uint %139 %254
%250 = OpFunctionCall %float %mm_readB %253 %255
OpStore %249 %250
OpBranch %237
%237 = OpLabel
%256 = OpLoad %uint %innerCol_0
%257 = OpIAdd %uint %256 %uint_1
OpStore %innerCol_0 %257
OpBranch %235
%236 = OpLabel
OpBranch %227
%227 = OpLabel
%258 = OpLoad %uint %innerRow_0
%259 = OpIAdd %uint %258 %uint_1
OpStore %innerRow_0 %259
OpBranch %225
%226 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
OpStore %k %uint_0 OpStore %k %uint_0
OpBranch %246 OpBranch %262
%246 = OpLabel %262 = OpLabel
OpLoopMerge %247 %248 None OpLoopMerge %263 %264 None
OpBranch %249 OpBranch %265
%249 = OpLabel %265 = OpLabel
%251 = OpLoad %uint %k
%252 = OpULessThan %bool %251 %TileAOuter
%250 = OpLogicalNot %bool %252
OpSelectionMerge %253 None
OpBranchConditional %250 %254 %253
%254 = OpLabel
OpBranch %247
%253 = OpLabel
OpStore %inner %uint_0
OpBranch %256
%256 = OpLabel
OpLoopMerge %257 %258 None
OpBranch %259
%259 = OpLabel
%261 = OpLoad %uint %inner
%262 = OpULessThan %bool %261 %RowPerThread
%260 = OpLogicalNot %bool %262
OpSelectionMerge %263 None
OpBranchConditional %260 %264 %263
%264 = OpLabel
OpBranch %257
%263 = OpLabel
%265 = OpLoad %uint %inner
%266 = OpAccessChain %_ptr_Function_float %BCached %265
%267 = OpLoad %uint %k %267 = OpLoad %uint %k
%268 = OpLoad %uint %inner %268 = OpULessThan %bool %267 %TileAOuter
%269 = OpIAdd %uint %113 %268 %266 = OpLogicalNot %bool %268
%270 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %267 %269 OpSelectionMerge %269 None
%271 = OpLoad %float %270 OpBranchConditional %266 %270 %269
OpStore %266 %271 %270 = OpLabel
OpBranch %258 OpBranch %263
%258 = OpLabel %269 = OpLabel
%272 = OpLoad %uint %inner OpStore %inner %uint_0
%273 = OpIAdd %uint %272 %uint_1 OpBranch %272
OpStore %inner %273 %272 = OpLabel
OpBranch %256 OpLoopMerge %273 %274 None
%257 = OpLabel
OpStore %innerRow_1 %uint_0
OpBranch %275 OpBranch %275
%275 = OpLabel %275 = OpLabel
OpLoopMerge %276 %277 None %277 = OpLoad %uint %inner
OpBranch %278 %278 = OpULessThan %bool %277 %RowPerThread
%278 = OpLabel %276 = OpLogicalNot %bool %278
%280 = OpLoad %uint %innerRow_1 OpSelectionMerge %279 None
%281 = OpULessThan %bool %280 %RowPerThread OpBranchConditional %276 %280 %279
%279 = OpLogicalNot %bool %281 %280 = OpLabel
OpSelectionMerge %282 None OpBranch %273
OpBranchConditional %279 %283 %282 %279 = OpLabel
%283 = OpLabel %281 = OpLoad %uint %inner
OpBranch %276 %282 = OpAccessChain %_ptr_Function_float %BCached %281
%282 = OpLabel %283 = OpLoad %uint %k
%284 = OpLoad %uint %innerRow_1 %284 = OpLoad %uint %inner
%285 = OpIAdd %uint %110 %284 %285 = OpIAdd %uint %133 %284
%286 = OpLoad %uint %k %286 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %283 %285
%287 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %285 %286 %287 = OpLoad %float %286
%288 = OpLoad %float %287 OpStore %282 %287
OpStore %ACached %288 OpBranch %274
%274 = OpLabel
%288 = OpLoad %uint %inner
%289 = OpIAdd %uint %288 %uint_1
OpStore %inner %289
OpBranch %272
%273 = OpLabel
OpStore %innerRow_1 %uint_0
OpBranch %291
%291 = OpLabel
OpLoopMerge %292 %293 None
OpBranch %294
%294 = OpLabel
%296 = OpLoad %uint %innerRow_1
%297 = OpULessThan %bool %296 %RowPerThread
%295 = OpLogicalNot %bool %297
OpSelectionMerge %298 None
OpBranchConditional %295 %299 %298
%299 = OpLabel
OpBranch %292
%298 = OpLabel
%300 = OpLoad %uint %innerRow_1
%301 = OpIAdd %uint %130 %300
%302 = OpLoad %uint %k
%303 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %301 %302
%304 = OpLoad %float %303
OpStore %ACached %304
OpStore %innerCol_1 %uint_0 OpStore %innerCol_1 %uint_0
OpBranch %290 OpBranch %306
%290 = OpLabel %306 = OpLabel
OpLoopMerge %291 %292 None OpLoopMerge %307 %308 None
OpBranch %309
%309 = OpLabel
%311 = OpLoad %uint %innerCol_1
%312 = OpULessThan %bool %311 %RowPerThread
%310 = OpLogicalNot %bool %312
OpSelectionMerge %313 None
OpBranchConditional %310 %314 %313
%314 = OpLabel
OpBranch %307
%313 = OpLabel
%315 = OpLoad %uint %innerRow_1
%316 = OpIMul %uint %315 %RowPerThread
%317 = OpLoad %uint %innerCol_1
%318 = OpIAdd %uint %316 %317
%319 = OpAccessChain %_ptr_Function_float %acc %318
%320 = OpAccessChain %_ptr_Function_float %acc %318
%321 = OpLoad %float %320
%322 = OpLoad %float %ACached
%323 = OpLoad %uint %innerCol_1
%324 = OpAccessChain %_ptr_Function_float %BCached %323
%325 = OpLoad %float %324
%326 = OpFMul %float %322 %325
%327 = OpFAdd %float %321 %326
OpStore %319 %327
OpBranch %308
%308 = OpLabel
%328 = OpLoad %uint %innerCol_1
%329 = OpIAdd %uint %328 %uint_1
OpStore %innerCol_1 %329
OpBranch %306
%307 = OpLabel
OpBranch %293 OpBranch %293
%293 = OpLabel %293 = OpLabel
%295 = OpLoad %uint %innerCol_1 %330 = OpLoad %uint %innerRow_1
%296 = OpULessThan %bool %295 %RowPerThread %331 = OpIAdd %uint %330 %uint_1
%294 = OpLogicalNot %bool %296 OpStore %innerRow_1 %331
OpSelectionMerge %297 None
OpBranchConditional %294 %298 %297
%298 = OpLabel
OpBranch %291 OpBranch %291
%297 = OpLabel
%299 = OpLoad %uint %innerRow_1
%300 = OpIMul %uint %299 %RowPerThread
%301 = OpLoad %uint %innerCol_1
%302 = OpIAdd %uint %300 %301
%303 = OpAccessChain %_ptr_Function_float %acc %302
%304 = OpAccessChain %_ptr_Function_float %acc %302
%305 = OpLoad %float %304
%306 = OpLoad %float %ACached
%307 = OpLoad %uint %innerCol_1
%308 = OpAccessChain %_ptr_Function_float %BCached %307
%309 = OpLoad %float %308
%310 = OpFMul %float %306 %309
%311 = OpFAdd %float %305 %310
OpStore %303 %311
OpBranch %292
%292 = OpLabel %292 = OpLabel
%312 = OpLoad %uint %innerCol_1 OpBranch %264
%313 = OpIAdd %uint %312 %uint_1 %264 = OpLabel
OpStore %innerCol_1 %313 %332 = OpLoad %uint %k
OpBranch %290 %333 = OpIAdd %uint %332 %uint_1
%291 = OpLabel OpStore %k %333
OpBranch %277 OpBranch %262
%277 = OpLabel %263 = OpLabel
%314 = OpLoad %uint %innerRow_1
%315 = OpIAdd %uint %314 %uint_1
OpStore %innerRow_1 %315
OpBranch %275
%276 = OpLabel
OpBranch %248
%248 = OpLabel
%316 = OpLoad %uint %k
%317 = OpIAdd %uint %316 %uint_1
OpStore %k %317
OpBranch %246
%247 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
OpBranch %165 OpBranch %182
%165 = OpLabel %182 = OpLabel
%319 = OpLoad %uint %t %335 = OpLoad %uint %t
%320 = OpIAdd %uint %319 %uint_1 %336 = OpIAdd %uint %335 %uint_1
OpStore %t %320 OpStore %t %336
OpBranch %163 OpBranch %180
%164 = OpLabel %181 = OpLabel
OpStore %innerRow_2 %uint_0 OpStore %innerRow_2 %uint_0
OpBranch %322 OpBranch %338
%322 = OpLabel %338 = OpLabel
OpLoopMerge %323 %324 None OpLoopMerge %339 %340 None
OpBranch %325 OpBranch %341
%325 = OpLabel %341 = OpLabel
%327 = OpLoad %uint %innerRow_2 %343 = OpLoad %uint %innerRow_2
%328 = OpULessThan %bool %327 %RowPerThread %344 = OpULessThan %bool %343 %RowPerThread
%326 = OpLogicalNot %bool %328 %342 = OpLogicalNot %bool %344
OpSelectionMerge %329 None OpSelectionMerge %345 None
OpBranchConditional %326 %330 %329 OpBranchConditional %342 %346 %345
%330 = OpLabel %346 = OpLabel
OpBranch %323 OpBranch %339
%329 = OpLabel %345 = OpLabel
OpStore %innerCol_2 %uint_0 OpStore %innerCol_2 %uint_0
OpBranch %332 OpBranch %348
%332 = OpLabel %348 = OpLabel
OpLoopMerge %333 %334 None OpLoopMerge %349 %350 None
OpBranch %335 OpBranch %351
%335 = OpLabel %351 = OpLabel
%337 = OpLoad %uint %innerCol_2 %353 = OpLoad %uint %innerCol_2
%338 = OpULessThan %bool %337 %RowPerThread %354 = OpULessThan %bool %353 %RowPerThread
%336 = OpLogicalNot %bool %338 %352 = OpLogicalNot %bool %354
OpSelectionMerge %339 None OpSelectionMerge %355 None
OpBranchConditional %336 %340 %339 OpBranchConditional %352 %356 %355
%356 = OpLabel
OpBranch %349
%355 = OpLabel
%357 = OpLoad %uint %innerRow_2
%358 = OpIMul %uint %357 %RowPerThread
%359 = OpLoad %uint %innerCol_2
%360 = OpIAdd %uint %358 %359
%362 = OpLoad %uint %innerRow_2
%363 = OpIAdd %uint %136 %362
%364 = OpLoad %uint %innerCol_2
%365 = OpIAdd %uint %139 %364
%366 = OpAccessChain %_ptr_Function_float %acc %360
%367 = OpLoad %float %366
%361 = OpFunctionCall %void %mm_write %363 %365 %367
OpBranch %350
%350 = OpLabel
%368 = OpLoad %uint %innerCol_2
%369 = OpIAdd %uint %368 %uint_1
OpStore %innerCol_2 %369
OpBranch %348
%349 = OpLabel
OpBranch %340
%340 = OpLabel %340 = OpLabel
OpBranch %333 %370 = OpLoad %uint %innerRow_2
%371 = OpIAdd %uint %370 %uint_1
OpStore %innerRow_2 %371
OpBranch %338
%339 = OpLabel %339 = OpLabel
%341 = OpLoad %uint %innerRow_2
%342 = OpIMul %uint %341 %RowPerThread
%343 = OpLoad %uint %innerCol_2
%344 = OpIAdd %uint %342 %343
%346 = OpLoad %uint %innerRow_2
%347 = OpIAdd %uint %116 %346
%348 = OpLoad %uint %innerCol_2
%349 = OpIAdd %uint %119 %348
%350 = OpAccessChain %_ptr_Function_float %acc %344
%351 = OpLoad %float %350
%345 = OpFunctionCall %void %mm_write %347 %349 %351
OpBranch %334
%334 = OpLabel
%352 = OpLoad %uint %innerCol_2
%353 = OpIAdd %uint %352 %uint_1
OpStore %innerCol_2 %353
OpBranch %332
%333 = OpLabel
OpBranch %324
%324 = OpLabel
%354 = OpLoad %uint %innerRow_2
%355 = OpIAdd %uint %354 %uint_1
OpStore %innerRow_2 %355
OpBranch %322
%323 = OpLabel
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -21,15 +21,11 @@ void main(tint_symbol_1 tint_symbol) {
const uint3 WorkGroupID = tint_symbol.WorkGroupID; const uint3 WorkGroupID = tint_symbol.WorkGroupID;
const uint3 LocalInvocationID = tint_symbol.LocalInvocationID; const uint3 LocalInvocationID = tint_symbol.LocalInvocationID;
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
{ for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
for(int i_1 = 0; (i_1 < 4); i_1 = (i_1 + 1)) { const uint i_1 = (idx / 256u);
{ const uint i_2 = (idx % 256u);
for(int i_2 = 0; (i_2 < 256); i_2 = (i_2 + 1)) { tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
}
}
}
} }
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -15,15 +15,16 @@ struct tint_array_wrapper {
tint_array_wrapper_1 arr[4]; tint_array_wrapper_1 arr[4];
}; };
kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture(1)]], sampler tint_symbol_5 [[sampler(0)]], texture2d<float, access::write> tint_symbol_6 [[texture(2)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Params& params [[buffer(1)]], constant Flip& flip [[buffer(3)]]) { kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_3 [[texture(1)]], sampler tint_symbol_4 [[sampler(0)]], texture2d<float, access::write> tint_symbol_5 [[texture(2)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Params& params [[buffer(1)]], constant Flip& flip [[buffer(3)]]) {
threadgroup tint_array_wrapper tint_symbol_3; threadgroup tint_array_wrapper tint_symbol_2;
if ((local_invocation_index == 0u)) { for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
tint_array_wrapper const tint_symbol_2 = {.arr={}}; uint const i_1 = (idx / 256u);
tint_symbol_3 = tint_symbol_2; uint const i_2 = (idx % 256u);
tint_symbol_2.arr[i_1].arr[i_2] = float3();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint const filterOffset = ((params.filterDim - 1u) / 2u); uint const filterOffset = ((params.filterDim - 1u) / 2u);
int2 const dims = int2(tint_symbol_4.get_width(0), tint_symbol_4.get_height(0)); int2 const dims = int2(tint_symbol_3.get_width(0), tint_symbol_3.get_height(0));
int2 const baseIndex = (int2(((WorkGroupID.xy * uint2(params.blockDim, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u)))) - int2(int(filterOffset), 0)); int2 const baseIndex = (int2(((WorkGroupID.xy * uint2(params.blockDim, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u)))) - int2(int(filterOffset), 0));
for(uint r = 0u; (r < 4u); r = (r + 1u)) { for(uint r = 0u; (r < 4u); r = (r + 1u)) {
for(uint c = 0u; (c < 4u); c = (c + 1u)) { for(uint c = 0u; (c < 4u); c = (c + 1u)) {
@ -31,7 +32,7 @@ kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture
if ((flip.value != 0u)) { if ((flip.value != 0u)) {
loadIndex = loadIndex.yx; loadIndex = loadIndex.yx;
} }
tint_symbol_3.arr[r].arr[((4u * LocalInvocationID.x) + c)] = tint_symbol_4.sample(tint_symbol_5, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), level(0.0f)).rgb; tint_symbol_2.arr[r].arr[((4u * LocalInvocationID.x) + c)] = tint_symbol_3.sample(tint_symbol_4, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), level(0.0f)).rgb;
} }
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
@ -46,9 +47,9 @@ kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture
float3 acc = float3(0.0f, 0.0f, 0.0f); float3 acc = float3(0.0f, 0.0f, 0.0f);
for(uint f = 0u; (f < params.filterDim); f = (f + 1u)) { for(uint f = 0u; (f < params.filterDim); f = (f + 1u)) {
uint i = ((center + f) - filterOffset); uint i = ((center + f) - filterOffset);
acc = (acc + ((1.0f / float(params.filterDim)) * tint_symbol_3.arr[r].arr[i])); acc = (acc + ((1.0f / float(params.filterDim)) * tint_symbol_2.arr[r].arr[i]));
} }
tint_symbol_6.write(float4(acc, 1.0f), uint2(writeIndex)); tint_symbol_5.write(float4(acc, 1.0f), uint2(writeIndex));
} }
} }
} }

View File

@ -1,7 +1,7 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 239 ; Bound: 254
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpCapability ImageQuery OpCapability ImageQuery
@ -23,6 +23,7 @@
OpName %tint_symbol_1 "tint_symbol_1" OpName %tint_symbol_1 "tint_symbol_1"
OpName %tint_symbol_2 "tint_symbol_2" OpName %tint_symbol_2 "tint_symbol_2"
OpName %main "main" OpName %main "main"
OpName %idx "idx"
OpName %r "r" OpName %r "r"
OpName %c "c" OpName %c "c"
OpName %loadIndex "loadIndex" OpName %loadIndex "loadIndex"
@ -87,288 +88,310 @@
%tint_symbol_2 = OpVariable %_ptr_Input_uint Input %tint_symbol_2 = OpVariable %_ptr_Input_uint Input
%void = OpTypeVoid %void = OpTypeVoid
%31 = OpTypeFunction %void %31 = OpTypeFunction %void
%uint_0 = OpConstant %uint 0 %_ptr_Function_uint = OpTypePointer Function %uint
%38 = OpConstantNull %uint
%uint_1024 = OpConstant %uint 1024
%bool = OpTypeBool %bool = OpTypeBool
%41 = OpConstantNull %_arr__arr_v3float_uint_256_uint_4 %_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
%56 = OpConstantNull %v3float
%uint_64 = OpConstant %uint 64
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%uint_0 = OpConstant %uint 0
%_ptr_Uniform_uint = OpTypePointer Uniform %uint %_ptr_Uniform_uint = OpTypePointer Uniform %uint
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%int = OpTypeInt 32 1 %int = OpTypeInt 32 1
%v2int = OpTypeVector %int 2 %v2int = OpTypeVector %int 2
%int_0 = OpConstant %int 0 %int_0 = OpConstant %int 0
%v2uint = OpTypeVector %uint 2 %v2uint = OpTypeVector %uint 2
%66 = OpConstantComposite %v2uint %uint_4 %uint_1 %85 = OpConstantComposite %v2uint %uint_4 %uint_1
%_ptr_Function_uint = OpTypePointer Function %uint
%74 = OpConstantNull %uint
%_ptr_Function_v2int = OpTypePointer Function %v2int %_ptr_Function_v2int = OpTypePointer Function %v2int
%102 = OpConstantNull %v2int %119 = OpConstantNull %v2int
%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
%v4float = OpTypeVector %float 4 %v4float = OpTypeVector %float 4
%122 = OpTypeSampledImage %10 %138 = OpTypeSampledImage %10
%v2float = OpTypeVector %float 2 %v2float = OpTypeVector %float 2
%float_0_25 = OpConstant %float 0.25 %float_0_25 = OpConstant %float 0.25
%128 = OpConstantComposite %v2float %float_0_25 %float_0_25 %144 = OpConstantComposite %v2float %float_0_25 %float_0_25
%float_0 = OpConstant %float 0 %float_0 = OpConstant %float 0
%v2bool = OpTypeVector %bool 2 %v2bool = OpTypeVector %bool 2
%193 = OpConstantComposite %v3float %float_0 %float_0 %float_0 %209 = OpConstantComposite %v3float %float_0 %float_0 %float_0
%_ptr_Function_v3float = OpTypePointer Function %v3float %_ptr_Function_v3float = OpTypePointer Function %v3float
%196 = OpConstantNull %v3float
%float_1 = OpConstant %float 1 %float_1 = OpConstant %float 1
%main = OpFunction %void None %31 %main = OpFunction %void None %31
%34 = OpLabel %34 = OpLabel
%r = OpVariable %_ptr_Function_uint Function %74 %idx = OpVariable %_ptr_Function_uint Function %38
%c = OpVariable %_ptr_Function_uint Function %74 %r = OpVariable %_ptr_Function_uint Function %38
%loadIndex = OpVariable %_ptr_Function_v2int Function %102 %c = OpVariable %_ptr_Function_uint Function %38
%r_0 = OpVariable %_ptr_Function_uint Function %74 %loadIndex = OpVariable %_ptr_Function_v2int Function %119
%c_0 = OpVariable %_ptr_Function_uint Function %74 %r_0 = OpVariable %_ptr_Function_uint Function %38
%writeIndex = OpVariable %_ptr_Function_v2int Function %102 %c_0 = OpVariable %_ptr_Function_uint Function %38
%acc = OpVariable %_ptr_Function_v3float Function %196 %writeIndex = OpVariable %_ptr_Function_v2int Function %119
%f = OpVariable %_ptr_Function_uint Function %74 %acc = OpVariable %_ptr_Function_v3float Function %56
%i = OpVariable %_ptr_Function_uint Function %74 %f = OpVariable %_ptr_Function_uint Function %38
%i = OpVariable %_ptr_Function_uint Function %38
%35 = OpLoad %uint %tint_symbol_2 %35 = OpLoad %uint %tint_symbol_2
%37 = OpIEqual %bool %35 %uint_0 OpStore %idx %35
OpSelectionMerge %39 None
OpBranchConditional %37 %40 %39
%40 = OpLabel
OpStore %tile %41
OpBranch %39 OpBranch %39
%39 = OpLabel %39 = OpLabel
OpLoopMerge %40 %41 None
OpBranch %42
%42 = OpLabel
%44 = OpLoad %uint %idx
%46 = OpULessThan %bool %44 %uint_1024
%43 = OpLogicalNot %bool %46
OpSelectionMerge %48 None
OpBranchConditional %43 %49 %48
%49 = OpLabel
OpBranch %40
%48 = OpLabel
%50 = OpLoad %uint %idx
%51 = OpUDiv %uint %50 %uint_256
%52 = OpLoad %uint %idx
%53 = OpUMod %uint %52 %uint_256
%55 = OpAccessChain %_ptr_Workgroup_v3float %tile %51 %53
OpStore %55 %56
OpBranch %41
%41 = OpLabel
%57 = OpLoad %uint %idx
%59 = OpIAdd %uint %57 %uint_64
OpStore %idx %59
OpBranch %39
%40 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%46 = OpAccessChain %_ptr_Uniform_uint %params %uint_0 %65 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
%47 = OpLoad %uint %46 %66 = OpLoad %uint %65
%49 = OpISub %uint %47 %uint_1 %68 = OpISub %uint %66 %uint_1
%50 = OpUDiv %uint %49 %uint_2 %69 = OpUDiv %uint %68 %uint_2
%54 = OpLoad %10 %inputTex %73 = OpLoad %10 %inputTex
%51 = OpImageQuerySizeLod %v2int %54 %int_0 %70 = OpImageQuerySizeLod %v2int %73 %int_0
%58 = OpLoad %v3uint %tint_symbol %77 = OpLoad %v3uint %tint_symbol
%59 = OpVectorShuffle %v2uint %58 %58 0 1 %78 = OpVectorShuffle %v2uint %77 %77 0 1
%60 = OpAccessChain %_ptr_Uniform_uint %params %uint_1 %79 = OpAccessChain %_ptr_Uniform_uint %params %uint_1
%61 = OpLoad %uint %60 %80 = OpLoad %uint %79
%62 = OpCompositeConstruct %v2uint %61 %uint_4 %81 = OpCompositeConstruct %v2uint %80 %uint_4
%63 = OpIMul %v2uint %59 %62 %82 = OpIMul %v2uint %78 %81
%64 = OpLoad %v3uint %tint_symbol_1 %83 = OpLoad %v3uint %tint_symbol_1
%65 = OpVectorShuffle %v2uint %64 %64 0 1 %84 = OpVectorShuffle %v2uint %83 %83 0 1
%67 = OpIMul %v2uint %65 %66 %86 = OpIMul %v2uint %84 %85
%68 = OpIAdd %v2uint %63 %67 %87 = OpIAdd %v2uint %82 %86
%56 = OpBitcast %v2int %68 %75 = OpBitcast %v2int %87
%69 = OpBitcast %int %50 %88 = OpBitcast %int %69
%70 = OpCompositeConstruct %v2int %69 %int_0 %89 = OpCompositeConstruct %v2int %88 %int_0
%71 = OpISub %v2int %56 %70 %90 = OpISub %v2int %75 %89
OpStore %r %uint_0 OpStore %r %uint_0
OpBranch %75 OpBranch %92
%75 = OpLabel
OpLoopMerge %76 %77 None
OpBranch %78
%78 = OpLabel
%80 = OpLoad %uint %r
%81 = OpULessThan %bool %80 %uint_4
%79 = OpLogicalNot %bool %81
OpSelectionMerge %82 None
OpBranchConditional %79 %83 %82
%83 = OpLabel
OpBranch %76
%82 = OpLabel
OpStore %c %uint_0
OpBranch %85
%85 = OpLabel
OpLoopMerge %86 %87 None
OpBranch %88
%88 = OpLabel
%90 = OpLoad %uint %c
%91 = OpULessThan %bool %90 %uint_4
%89 = OpLogicalNot %bool %91
OpSelectionMerge %92 None
OpBranchConditional %89 %93 %92
%93 = OpLabel
OpBranch %86
%92 = OpLabel %92 = OpLabel
%95 = OpLoad %uint %c OpLoopMerge %93 %94 None
%94 = OpBitcast %int %95 OpBranch %95
%95 = OpLabel
%97 = OpLoad %uint %r %97 = OpLoad %uint %r
%96 = OpBitcast %int %97 %98 = OpULessThan %bool %97 %uint_4
%98 = OpCompositeConstruct %v2int %94 %96 %96 = OpLogicalNot %bool %98
%99 = OpIAdd %v2int %71 %98 OpSelectionMerge %99 None
OpStore %loadIndex %99 OpBranchConditional %96 %100 %99
%103 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0 %100 = OpLabel
%104 = OpLoad %uint %103 OpBranch %93
%105 = OpINotEqual %bool %104 %uint_0 %99 = OpLabel
OpSelectionMerge %106 None OpStore %c %uint_0
OpBranchConditional %105 %107 %106 OpBranch %102
%107 = OpLabel %102 = OpLabel
%108 = OpLoad %v2int %loadIndex OpLoopMerge %103 %104 None
%109 = OpVectorShuffle %v2int %108 %108 1 0 OpBranch %105
OpStore %loadIndex %109 %105 = OpLabel
OpBranch %106 %107 = OpLoad %uint %c
%106 = OpLabel %108 = OpULessThan %bool %107 %uint_4
%110 = OpLoad %uint %r %106 = OpLogicalNot %bool %108
%111 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0 OpSelectionMerge %109 None
%112 = OpLoad %uint %111 OpBranchConditional %106 %110 %109
%113 = OpIMul %uint %uint_4 %112 %110 = OpLabel
%114 = OpLoad %uint %c OpBranch %103
%115 = OpIAdd %uint %113 %114 %109 = OpLabel
%117 = OpAccessChain %_ptr_Workgroup_v3float %tile %110 %115 %112 = OpLoad %uint %c
%120 = OpLoad %3 %samp %111 = OpBitcast %int %112
%121 = OpLoad %10 %inputTex %114 = OpLoad %uint %r
%123 = OpSampledImage %122 %121 %120 %113 = OpBitcast %int %114
%126 = OpLoad %v2int %loadIndex %115 = OpCompositeConstruct %v2int %111 %113
%124 = OpConvertSToF %v2float %126 %116 = OpIAdd %v2int %90 %115
%129 = OpFAdd %v2float %124 %128 OpStore %loadIndex %116
%130 = OpConvertSToF %v2float %51 %120 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
%131 = OpFDiv %v2float %129 %130 %121 = OpLoad %uint %120
%118 = OpImageSampleExplicitLod %v4float %123 %131 Lod %float_0 %122 = OpINotEqual %bool %121 %uint_0
%133 = OpVectorShuffle %v3float %118 %118 0 1 2 OpSelectionMerge %123 None
OpStore %117 %133 OpBranchConditional %122 %124 %123
OpBranch %87 %124 = OpLabel
%87 = OpLabel %125 = OpLoad %v2int %loadIndex
%134 = OpLoad %uint %c %126 = OpVectorShuffle %v2int %125 %125 1 0
%135 = OpIAdd %uint %134 %uint_1 OpStore %loadIndex %126
OpStore %c %135 OpBranch %123
OpBranch %85 %123 = OpLabel
%86 = OpLabel %127 = OpLoad %uint %r
OpBranch %77 %128 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
%77 = OpLabel %129 = OpLoad %uint %128
%136 = OpLoad %uint %r %130 = OpIMul %uint %uint_4 %129
%137 = OpIAdd %uint %136 %uint_1 %131 = OpLoad %uint %c
OpStore %r %137 %132 = OpIAdd %uint %130 %131
OpBranch %75 %133 = OpAccessChain %_ptr_Workgroup_v3float %tile %127 %132
%76 = OpLabel %136 = OpLoad %3 %samp
%137 = OpLoad %10 %inputTex
%139 = OpSampledImage %138 %137 %136
%142 = OpLoad %v2int %loadIndex
%140 = OpConvertSToF %v2float %142
%145 = OpFAdd %v2float %140 %144
%146 = OpConvertSToF %v2float %70
%147 = OpFDiv %v2float %145 %146
%134 = OpImageSampleExplicitLod %v4float %139 %147 Lod %float_0
%149 = OpVectorShuffle %v3float %134 %134 0 1 2
OpStore %133 %149
OpBranch %104
%104 = OpLabel
%150 = OpLoad %uint %c
%151 = OpIAdd %uint %150 %uint_1
OpStore %c %151
OpBranch %102
%103 = OpLabel
OpBranch %94
%94 = OpLabel
%152 = OpLoad %uint %r
%153 = OpIAdd %uint %152 %uint_1
OpStore %r %153
OpBranch %92
%93 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
OpStore %r_0 %uint_0 OpStore %r_0 %uint_0
OpBranch %140 OpBranch %156
%140 = OpLabel %156 = OpLabel
OpLoopMerge %141 %142 None OpLoopMerge %157 %158 None
OpBranch %143 OpBranch %159
%143 = OpLabel %159 = OpLabel
%145 = OpLoad %uint %r_0 %161 = OpLoad %uint %r_0
%146 = OpULessThan %bool %145 %uint_4 %162 = OpULessThan %bool %161 %uint_4
%144 = OpLogicalNot %bool %146 %160 = OpLogicalNot %bool %162
OpSelectionMerge %147 None OpSelectionMerge %163 None
OpBranchConditional %144 %148 %147 OpBranchConditional %160 %164 %163
%148 = OpLabel %164 = OpLabel
OpBranch %141 OpBranch %157
%147 = OpLabel %163 = OpLabel
OpStore %c_0 %uint_0 OpStore %c_0 %uint_0
OpBranch %150 OpBranch %166
%150 = OpLabel %166 = OpLabel
OpLoopMerge %151 %152 None OpLoopMerge %167 %168 None
OpBranch %153
%153 = OpLabel
%155 = OpLoad %uint %c_0
%156 = OpULessThan %bool %155 %uint_4
%154 = OpLogicalNot %bool %156
OpSelectionMerge %157 None
OpBranchConditional %154 %158 %157
%158 = OpLabel
OpBranch %151
%157 = OpLabel
%160 = OpLoad %uint %c_0
%159 = OpBitcast %int %160
%162 = OpLoad %uint %r_0
%161 = OpBitcast %int %162
%163 = OpCompositeConstruct %v2int %159 %161
%164 = OpIAdd %v2int %71 %163
OpStore %writeIndex %164
%166 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
%167 = OpLoad %uint %166
%168 = OpINotEqual %bool %167 %uint_0
OpSelectionMerge %169 None
OpBranchConditional %168 %170 %169
%170 = OpLabel
%171 = OpLoad %v2int %writeIndex
%172 = OpVectorShuffle %v2int %171 %171 1 0
OpStore %writeIndex %172
OpBranch %169 OpBranch %169
%169 = OpLabel %169 = OpLabel
%173 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0 %171 = OpLoad %uint %c_0
%174 = OpLoad %uint %173 %172 = OpULessThan %bool %171 %uint_4
%175 = OpIMul %uint %uint_4 %174 %170 = OpLogicalNot %bool %172
OpSelectionMerge %173 None
OpBranchConditional %170 %174 %173
%174 = OpLabel
OpBranch %167
%173 = OpLabel
%176 = OpLoad %uint %c_0 %176 = OpLoad %uint %c_0
%177 = OpIAdd %uint %175 %176 %175 = OpBitcast %int %176
%178 = OpUGreaterThanEqual %bool %177 %50 %178 = OpLoad %uint %r_0
OpSelectionMerge %179 None %177 = OpBitcast %int %178
OpBranchConditional %178 %180 %179 %179 = OpCompositeConstruct %v2int %175 %177
%180 = OpLabel %180 = OpIAdd %v2int %90 %179
%181 = OpISub %uint %uint_256 %50 OpStore %writeIndex %180
%182 = OpULessThan %bool %177 %181 %182 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
OpBranch %179 %183 = OpLoad %uint %182
%179 = OpLabel %184 = OpINotEqual %bool %183 %uint_0
%183 = OpPhi %bool %178 %169 %182 %180 OpSelectionMerge %185 None
OpSelectionMerge %184 None OpBranchConditional %184 %186 %185
OpBranchConditional %183 %185 %184 %186 = OpLabel
%185 = OpLabel
%187 = OpLoad %v2int %writeIndex %187 = OpLoad %v2int %writeIndex
%188 = OpSLessThan %v2bool %187 %51 %188 = OpVectorShuffle %v2int %187 %187 1 0
%186 = OpAll %bool %188 OpStore %writeIndex %188
OpBranch %184 OpBranch %185
%184 = OpLabel %185 = OpLabel
%190 = OpPhi %bool %183 %179 %186 %185 %189 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
OpSelectionMerge %191 None %190 = OpLoad %uint %189
OpBranchConditional %190 %192 %191 %191 = OpIMul %uint %uint_4 %190
%192 = OpLabel %192 = OpLoad %uint %c_0
OpStore %acc %193 %193 = OpIAdd %uint %191 %192
OpStore %f %uint_0 %194 = OpUGreaterThanEqual %bool %193 %69
OpBranch %198 OpSelectionMerge %195 None
%198 = OpLabel OpBranchConditional %194 %196 %195
OpLoopMerge %199 %200 None %196 = OpLabel
OpBranch %201 %197 = OpISub %uint %uint_256 %69
%198 = OpULessThan %bool %193 %197
OpBranch %195
%195 = OpLabel
%199 = OpPhi %bool %194 %185 %198 %196
OpSelectionMerge %200 None
OpBranchConditional %199 %201 %200
%201 = OpLabel %201 = OpLabel
%203 = OpLoad %uint %f %203 = OpLoad %v2int %writeIndex
%204 = OpAccessChain %_ptr_Uniform_uint %params %uint_0 %204 = OpSLessThan %v2bool %203 %70
%205 = OpLoad %uint %204 %202 = OpAll %bool %204
%206 = OpULessThan %bool %203 %205
%202 = OpLogicalNot %bool %206
OpSelectionMerge %207 None
OpBranchConditional %202 %208 %207
%208 = OpLabel
OpBranch %199
%207 = OpLabel
%209 = OpLoad %uint %f
%210 = OpIAdd %uint %177 %209
%211 = OpISub %uint %210 %50
OpStore %i %211
%213 = OpLoad %v3float %acc
%216 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
%217 = OpLoad %uint %216
%215 = OpConvertUToF %float %217
%218 = OpFDiv %float %float_1 %215
%219 = OpLoad %uint %r_0
%220 = OpLoad %uint %i
%221 = OpAccessChain %_ptr_Workgroup_v3float %tile %219 %220
%222 = OpLoad %v3float %221
%223 = OpVectorTimesScalar %v3float %222 %218
%224 = OpFAdd %v3float %213 %223
OpStore %acc %224
OpBranch %200 OpBranch %200
%200 = OpLabel %200 = OpLabel
%225 = OpLoad %uint %f %206 = OpPhi %bool %199 %195 %202 %201
%226 = OpIAdd %uint %225 %uint_1 OpSelectionMerge %207 None
OpStore %f %226 OpBranchConditional %206 %208 %207
OpBranch %198 %208 = OpLabel
%199 = OpLabel OpStore %acc %209
%228 = OpLoad %14 %outputTex OpStore %f %uint_0
%229 = OpLoad %v2int %writeIndex OpBranch %213
%230 = OpLoad %v3float %acc %213 = OpLabel
%231 = OpCompositeExtract %float %230 0 OpLoopMerge %214 %215 None
%232 = OpCompositeExtract %float %230 1 OpBranch %216
%233 = OpCompositeExtract %float %230 2 %216 = OpLabel
%234 = OpCompositeConstruct %v4float %231 %232 %233 %float_1 %218 = OpLoad %uint %f
OpImageWrite %228 %229 %234 %219 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
OpBranch %191 %220 = OpLoad %uint %219
%191 = OpLabel %221 = OpULessThan %bool %218 %220
OpBranch %152 %217 = OpLogicalNot %bool %221
%152 = OpLabel OpSelectionMerge %222 None
%235 = OpLoad %uint %c_0 OpBranchConditional %217 %223 %222
%236 = OpIAdd %uint %235 %uint_1 %223 = OpLabel
OpStore %c_0 %236 OpBranch %214
OpBranch %150 %222 = OpLabel
%151 = OpLabel %224 = OpLoad %uint %f
OpBranch %142 %225 = OpIAdd %uint %193 %224
%142 = OpLabel %226 = OpISub %uint %225 %69
%237 = OpLoad %uint %r_0 OpStore %i %226
%238 = OpIAdd %uint %237 %uint_1 %228 = OpLoad %v3float %acc
OpStore %r_0 %238 %231 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
OpBranch %140 %232 = OpLoad %uint %231
%141 = OpLabel %230 = OpConvertUToF %float %232
%233 = OpFDiv %float %float_1 %230
%234 = OpLoad %uint %r_0
%235 = OpLoad %uint %i
%236 = OpAccessChain %_ptr_Workgroup_v3float %tile %234 %235
%237 = OpLoad %v3float %236
%238 = OpVectorTimesScalar %v3float %237 %233
%239 = OpFAdd %v3float %228 %238
OpStore %acc %239
OpBranch %215
%215 = OpLabel
%240 = OpLoad %uint %f
%241 = OpIAdd %uint %240 %uint_1
OpStore %f %241
OpBranch %213
%214 = OpLabel
%243 = OpLoad %14 %outputTex
%244 = OpLoad %v2int %writeIndex
%245 = OpLoad %v3float %acc
%246 = OpCompositeExtract %float %245 0
%247 = OpCompositeExtract %float %245 1
%248 = OpCompositeExtract %float %245 2
%249 = OpCompositeConstruct %v4float %246 %247 %248 %float_1
OpImageWrite %243 %244 %249
OpBranch %207
%207 = OpLabel
OpBranch %168
%168 = OpLabel
%250 = OpLoad %uint %c_0
%251 = OpIAdd %uint %250 %uint_1
OpStore %c_0 %251
OpBranch %166
%167 = OpLabel
OpBranch %158
%158 = OpLabel
%252 = OpLoad %uint %r_0
%253 = OpIAdd %uint %252 %uint_1
OpStore %r_0 %253
OpBranch %156
%157 = OpLabel
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -350,21 +350,16 @@ void main(tint_symbol_1 tint_symbol) {
const uint3 gl_LocalInvocationID_param = tint_symbol.gl_LocalInvocationID_param; const uint3 gl_LocalInvocationID_param = tint_symbol.gl_LocalInvocationID_param;
const uint3 gl_GlobalInvocationID_param = tint_symbol.gl_GlobalInvocationID_param; const uint3 gl_GlobalInvocationID_param = tint_symbol.gl_GlobalInvocationID_param;
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
{ const uint i_1 = local_invocation_index;
for(int i = 0; (i < 64); i = (i + 1)) { const uint i_2 = (local_invocation_index % 1u);
{ mm_Bsub[i_1][i_2] = 0.0f;
for(int i_1 = 0; (i_1 < 64); i_1 = (i_1 + 1)) { }
mm_Asub[i][i_1] = 0.0f; {
} for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 64u)) {
} const uint i = (idx / 64u);
} const uint i_1 = (idx % 64u);
} mm_Asub[i][i_1] = 0.0f;
{
for(int i_2 = 0; (i_2 < 64); i_2 = (i_2 + 1)) {
const float tint_symbol_6[1] = (float[1])0;
mm_Bsub[i_2] = tint_symbol_6;
}
} }
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -53,7 +53,7 @@ bool coordsInBounds_vi2_vi2_(thread int2* const coord, thread int2* const shape)
return x_88; return x_88;
} }
float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread int* const row, thread int* const col, thread int* const tint_symbol_5, thread int* const tint_symbol_6, thread int* const tint_symbol_7) { float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread int* const row, thread int* const col, thread int* const tint_symbol_3, thread int* const tint_symbol_4, thread int* const tint_symbol_5) {
int batchASize = 0; int batchASize = 0;
int2 param_10 = 0; int2 param_10 = 0;
int2 param_11 = 0; int2 param_11 = 0;
@ -63,16 +63,16 @@ float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread
batchASize = (x_417 * x_419); batchASize = (x_417 * x_419);
int const x_421 = *(row); int const x_421 = *(row);
int const x_422 = *(col); int const x_422 = *(col);
int const x_424 = *(tint_symbol_5); int const x_424 = *(tint_symbol_3);
int const x_425 = *(tint_symbol_6); int const x_425 = *(tint_symbol_4);
param_10 = int2(x_421, x_422); param_10 = int2(x_421, x_422);
param_11 = int2(x_424, x_425); param_11 = int2(x_424, x_425);
bool const x_429 = coordsInBounds_vi2_vi2_(&(param_10), &(param_11)); bool const x_429 = coordsInBounds_vi2_vi2_(&(param_10), &(param_11));
if (x_429) { if (x_429) {
int const x_438 = *(tint_symbol_7); int const x_438 = *(tint_symbol_5);
int const x_439 = batchASize; int const x_439 = batchASize;
int const x_441 = *(row); int const x_441 = *(row);
int const x_442 = *(tint_symbol_6); int const x_442 = *(tint_symbol_4);
int const x_445 = *(col); int const x_445 = *(col);
float const x_448 = x_165.A[(((x_438 * x_439) + (x_441 * x_442)) + x_445)]; float const x_448 = x_165.A[(((x_438 * x_439) + (x_441 * x_442)) + x_445)];
x_430 = x_448; x_430 = x_448;
@ -83,7 +83,7 @@ float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread
return x_450; return x_450;
} }
float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread int* const row_1, thread int* const col_1, thread int* const tint_symbol_8, thread int* const tint_symbol_9, thread int* const tint_symbol_10) { float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread int* const row_1, thread int* const col_1, thread int* const tint_symbol_6, thread int* const tint_symbol_7, thread int* const tint_symbol_8) {
int batchBSize = 0; int batchBSize = 0;
int2 param_12 = 0; int2 param_12 = 0;
int2 param_13 = 0; int2 param_13 = 0;
@ -93,16 +93,16 @@ float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread
batchBSize = (x_455 * x_457); batchBSize = (x_455 * x_457);
int const x_459 = *(row_1); int const x_459 = *(row_1);
int const x_460 = *(col_1); int const x_460 = *(col_1);
int const x_462 = *(tint_symbol_8); int const x_462 = *(tint_symbol_6);
int const x_463 = *(tint_symbol_9); int const x_463 = *(tint_symbol_7);
param_12 = int2(x_459, x_460); param_12 = int2(x_459, x_460);
param_13 = int2(x_462, x_463); param_13 = int2(x_462, x_463);
bool const x_467 = coordsInBounds_vi2_vi2_(&(param_12), &(param_13)); bool const x_467 = coordsInBounds_vi2_vi2_(&(param_12), &(param_13));
if (x_467) { if (x_467) {
int const x_475 = *(tint_symbol_10); int const x_475 = *(tint_symbol_8);
int const x_476 = batchBSize; int const x_476 = batchBSize;
int const x_478 = *(row_1); int const x_478 = *(row_1);
int const x_479 = *(tint_symbol_9); int const x_479 = *(tint_symbol_7);
int const x_482 = *(col_1); int const x_482 = *(col_1);
float const x_485 = x_185.B[(((x_475 * x_476) + (x_478 * x_479)) + x_482)]; float const x_485 = x_185.B[(((x_475 * x_476) + (x_478 * x_479)) + x_482)];
x_468 = x_485; x_468 = x_485;
@ -146,17 +146,17 @@ void setOutput_i1_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread
return; return;
} }
void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int* const row_2, thread int* const col_2, thread float* const value_2, thread int* const tint_symbol_11) { void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int* const row_2, thread int* const col_2, thread float* const value_2, thread int* const tint_symbol_9) {
int3 outCoord = 0; int3 outCoord = 0;
int param_14 = 0; int param_14 = 0;
int param_15 = 0; int param_15 = 0;
int param_16 = 0; int param_16 = 0;
float param_17 = 0.0f; float param_17 = 0.0f;
int const x_491 = *(tint_symbol_11); int const x_491 = *(tint_symbol_9);
int const x_492 = *(row_2); int const x_492 = *(row_2);
int const x_493 = *(col_2); int const x_493 = *(col_2);
outCoord = int3(x_491, x_492, x_493); outCoord = int3(x_491, x_492, x_493);
int const x_496 = *(tint_symbol_11); int const x_496 = *(tint_symbol_9);
param_14 = x_496; param_14 = x_496;
int const x_498 = *(row_2); int const x_498 = *(row_2);
param_15 = x_498; param_15 = x_498;
@ -168,7 +168,7 @@ void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int
return; return;
} }
void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const dimAOuter, thread int* const dimInner, thread int* const dimBOuter, thread uint3* const tint_symbol_12, thread uint3* const tint_symbol_13, thread int* const tint_symbol_14, thread int* const tint_symbol_15, thread int* const tint_symbol_16, threadgroup tint_array_wrapper* const tint_symbol_17, thread int* const tint_symbol_18, threadgroup tint_array_wrapper_2* const tint_symbol_19) { void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const dimAOuter, thread int* const dimInner, thread int* const dimBOuter, thread uint3* const tint_symbol_10, thread uint3* const tint_symbol_11, thread int* const tint_symbol_12, thread int* const tint_symbol_13, thread int* const tint_symbol_14, threadgroup tint_array_wrapper* const tint_symbol_15, thread int* const tint_symbol_16, threadgroup tint_array_wrapper_2* const tint_symbol_17) {
int tileRow = 0; int tileRow = 0;
int tileCol = 0; int tileCol = 0;
int globalRow = 0; int globalRow = 0;
@ -203,13 +203,13 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int param_7 = 0; int param_7 = 0;
int param_8 = 0; int param_8 = 0;
float param_9 = 0.0f; float param_9 = 0.0f;
uint const x_132 = (*(tint_symbol_12)).y; uint const x_132 = (*(tint_symbol_10)).y;
tileRow = (as_type<int>(x_132) * 1); tileRow = (as_type<int>(x_132) * 1);
uint const x_137 = (*(tint_symbol_12)).x; uint const x_137 = (*(tint_symbol_10)).x;
tileCol = (as_type<int>(x_137) * 1); tileCol = (as_type<int>(x_137) * 1);
uint const x_143 = (*(tint_symbol_13)).y; uint const x_143 = (*(tint_symbol_11)).y;
globalRow = (as_type<int>(x_143) * 1); globalRow = (as_type<int>(x_143) * 1);
uint const x_148 = (*(tint_symbol_13)).x; uint const x_148 = (*(tint_symbol_11)).x;
globalCol = (as_type<int>(x_148) * 1); globalCol = (as_type<int>(x_148) * 1);
int const x_152 = *(dimInner); int const x_152 = *(dimInner);
numTiles = (((x_152 - 1) / 64) + 1); numTiles = (((x_152 - 1) / 64) + 1);
@ -240,9 +240,9 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
innerRow = (x_183 + 1); innerRow = (x_183 + 1);
} }
} }
uint const x_187 = (*(tint_symbol_12)).x; uint const x_187 = (*(tint_symbol_10)).x;
tileColA = (as_type<int>(x_187) * 64); tileColA = (as_type<int>(x_187) * 64);
uint const x_192 = (*(tint_symbol_12)).y; uint const x_192 = (*(tint_symbol_10)).y;
tileRowB = (as_type<int>(x_192) * 1); tileRowB = (as_type<int>(x_192) * 1);
t = 0; t = 0;
while (true) { while (true) {
@ -280,8 +280,8 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int const x_240 = inputCol; int const x_240 = inputCol;
param_3 = (x_235 + x_236); param_3 = (x_235 + x_236);
param_4 = ((x_238 * 64) + x_240); param_4 = ((x_238 * 64) + x_240);
float const x_244 = mm_readA_i1_i1_(x_48, x_165, &(param_3), &(param_4), tint_symbol_14, tint_symbol_15, tint_symbol_16); float const x_244 = mm_readA_i1_i1_(x_48, x_165, &(param_3), &(param_4), tint_symbol_12, tint_symbol_13, tint_symbol_14);
(*(tint_symbol_17)).arr[x_233].arr[x_234] = x_244; (*(tint_symbol_15)).arr[x_233].arr[x_234] = x_244;
{ {
int const x_247 = innerCol_1; int const x_247 = innerCol_1;
innerCol_1 = (x_247 + 1); innerCol_1 = (x_247 + 1);
@ -320,8 +320,8 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int const x_285 = innerCol_2; int const x_285 = innerCol_2;
param_5 = ((x_280 * 64) + x_282); param_5 = ((x_280 * 64) + x_282);
param_6 = (x_284 + x_285); param_6 = (x_284 + x_285);
float const x_289 = mm_readB_i1_i1_(x_48, x_185, &(param_5), &(param_6), tint_symbol_15, tint_symbol_18, tint_symbol_16); float const x_289 = mm_readB_i1_i1_(x_48, x_185, &(param_5), &(param_6), tint_symbol_13, tint_symbol_16, tint_symbol_14);
(*(tint_symbol_19)).arr[x_278].arr[x_279] = x_289; (*(tint_symbol_17)).arr[x_278].arr[x_279] = x_289;
{ {
int const x_291 = innerCol_2; int const x_291 = innerCol_2;
innerCol_2 = (x_291 + 1); innerCol_2 = (x_291 + 1);
@ -351,7 +351,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int const x_315 = k; int const x_315 = k;
int const x_316 = tileCol; int const x_316 = tileCol;
int const x_317 = inner; int const x_317 = inner;
float const x_320 = (*(tint_symbol_19)).arr[x_315].arr[(x_316 + x_317)]; float const x_320 = (*(tint_symbol_17)).arr[x_315].arr[(x_316 + x_317)];
BCached.arr[x_314] = x_320; BCached.arr[x_314] = x_320;
{ {
int const x_322 = inner; int const x_322 = inner;
@ -368,7 +368,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int const x_333 = tileRow; int const x_333 = tileRow;
int const x_334 = innerRow_3; int const x_334 = innerRow_3;
int const x_336 = k; int const x_336 = k;
float const x_338 = (*(tint_symbol_17)).arr[(x_333 + x_334)].arr[x_336]; float const x_338 = (*(tint_symbol_15)).arr[(x_333 + x_334)].arr[x_336];
ACached = x_338; ACached = x_338;
innerCol_3 = 0; innerCol_3 = 0;
while (true) { while (true) {
@ -445,7 +445,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
param_8 = (x_400 + x_401); param_8 = (x_400 + x_401);
float const x_409 = acc.arr[x_403].arr[x_404]; float const x_409 = acc.arr[x_403].arr[x_404];
param_9 = x_409; param_9 = x_409;
mm_write_i1_i1_f1_(x_48, x_54, &(param_7), &(param_8), &(param_9), tint_symbol_16); mm_write_i1_i1_f1_(x_48, x_54, &(param_7), &(param_8), &(param_9), tint_symbol_14);
} }
{ {
int const x_411 = innerCol_4; int const x_411 = innerCol_4;
@ -460,47 +460,51 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
return; return;
} }
void main_1(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const tint_symbol_20, thread int* const tint_symbol_21, thread int* const tint_symbol_22, thread uint3* const tint_symbol_23, thread int* const tint_symbol_24, thread uint3* const tint_symbol_25, threadgroup tint_array_wrapper* const tint_symbol_26, threadgroup tint_array_wrapper_2* const tint_symbol_27) { void main_1(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const tint_symbol_18, thread int* const tint_symbol_19, thread int* const tint_symbol_20, thread uint3* const tint_symbol_21, thread int* const tint_symbol_22, thread uint3* const tint_symbol_23, threadgroup tint_array_wrapper* const tint_symbol_24, threadgroup tint_array_wrapper_2* const tint_symbol_25) {
int param_18 = 0; int param_18 = 0;
int param_19 = 0; int param_19 = 0;
int param_20 = 0; int param_20 = 0;
int const x_67 = x_48.aShape.y; int const x_67 = x_48.aShape.y;
*(tint_symbol_20) = x_67; *(tint_symbol_18) = x_67;
int const x_71 = x_48.aShape.z; int const x_71 = x_48.aShape.z;
*(tint_symbol_21) = x_71; *(tint_symbol_19) = x_71;
int const x_75 = x_48.bShape.z; int const x_75 = x_48.bShape.z;
*(tint_symbol_22) = x_75; *(tint_symbol_20) = x_75;
uint const x_505 = (*(tint_symbol_23)).z; uint const x_505 = (*(tint_symbol_21)).z;
*(tint_symbol_24) = as_type<int>(x_505); *(tint_symbol_22) = as_type<int>(x_505);
int const x_508 = *(tint_symbol_20); int const x_508 = *(tint_symbol_18);
param_18 = x_508; param_18 = x_508;
int const x_510 = *(tint_symbol_21); int const x_510 = *(tint_symbol_19);
param_19 = x_510; param_19 = x_510;
int const x_512 = *(tint_symbol_22); int const x_512 = *(tint_symbol_20);
param_20 = x_512; param_20 = x_512;
mm_matMul_i1_i1_i1_(x_48, x_165, x_185, x_54, &(param_18), &(param_19), &(param_20), tint_symbol_25, tint_symbol_23, tint_symbol_20, tint_symbol_21, tint_symbol_24, tint_symbol_26, tint_symbol_22, tint_symbol_27); mm_matMul_i1_i1_i1_(x_48, x_165, x_185, x_54, &(param_18), &(param_19), &(param_20), tint_symbol_23, tint_symbol_21, tint_symbol_18, tint_symbol_19, tint_symbol_22, tint_symbol_24, tint_symbol_20, tint_symbol_25);
return; return;
} }
kernel void tint_symbol_1(uint3 gl_LocalInvocationID_param [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID_param [[thread_position_in_grid]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Uniforms& x_48 [[buffer(3)]], const device ssbA& x_165 [[buffer(1)]], const device ssbB& x_185 [[buffer(2)]], device ssbOut& x_54 [[buffer(0)]]) { kernel void tint_symbol_1(uint3 gl_LocalInvocationID_param [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID_param [[thread_position_in_grid]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Uniforms& x_48 [[buffer(3)]], const device ssbA& x_165 [[buffer(1)]], const device ssbB& x_185 [[buffer(2)]], device ssbOut& x_54 [[buffer(0)]]) {
threadgroup tint_array_wrapper tint_symbol_28; threadgroup tint_array_wrapper_2 tint_symbol_26;
threadgroup tint_array_wrapper_2 tint_symbol_29; threadgroup tint_array_wrapper tint_symbol_27;
thread uint3 tint_symbol_30 = 0u; thread uint3 tint_symbol_28 = 0u;
thread uint3 tint_symbol_31 = 0u; thread uint3 tint_symbol_29 = 0u;
thread int tint_symbol_30 = 0;
thread int tint_symbol_31 = 0;
thread int tint_symbol_32 = 0; thread int tint_symbol_32 = 0;
thread int tint_symbol_33 = 0; thread int tint_symbol_33 = 0;
thread int tint_symbol_34 = 0; {
thread int tint_symbol_35 = 0; uint const i_1 = local_invocation_index;
if ((local_invocation_index == 0u)) { uint const i_2 = (local_invocation_index % 1u);
tint_array_wrapper const tint_symbol_3 = {.arr={}}; tint_symbol_26.arr[i_1].arr[i_2] = float();
tint_symbol_28 = tint_symbol_3; }
tint_array_wrapper_2 const tint_symbol_4 = {.arr={}}; for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 64u)) {
tint_symbol_29 = tint_symbol_4; uint const i = (idx / 64u);
uint const i_1 = (idx % 64u);
tint_symbol_27.arr[i].arr[i_1] = float();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
tint_symbol_30 = gl_LocalInvocationID_param; tint_symbol_28 = gl_LocalInvocationID_param;
tint_symbol_31 = gl_GlobalInvocationID_param; tint_symbol_29 = gl_GlobalInvocationID_param;
main_1(x_48, x_165, x_185, x_54, &(tint_symbol_32), &(tint_symbol_33), &(tint_symbol_34), &(tint_symbol_31), &(tint_symbol_35), &(tint_symbol_30), &(tint_symbol_28), &(tint_symbol_29)); main_1(x_48, x_165, x_185, x_54, &(tint_symbol_30), &(tint_symbol_31), &(tint_symbol_32), &(tint_symbol_29), &(tint_symbol_33), &(tint_symbol_28), &(tint_symbol_27), &(tint_symbol_26));
return; return;
} }

View File

@ -1,7 +1,7 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 609 ; Bound: 628
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
@ -121,6 +121,7 @@
OpName %param_19 "param_19" OpName %param_19 "param_19"
OpName %param_20 "param_20" OpName %param_20 "param_20"
OpName %main "main" OpName %main "main"
OpName %idx "idx"
OpDecorate %Uniforms Block OpDecorate %Uniforms Block
OpMemberDecorate %Uniforms 0 Offset 0 OpMemberDecorate %Uniforms 0 Offset 0
OpMemberDecorate %Uniforms 1 Offset 16 OpMemberDecorate %Uniforms 1 Offset 16
@ -236,8 +237,9 @@
%_ptr_Workgroup_float = OpTypePointer Workgroup %float %_ptr_Workgroup_float = OpTypePointer Workgroup %float
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%575 = OpTypeFunction %void %575 = OpTypeFunction %void
%603 = OpConstantNull %_arr__arr_float_uint_64_uint_64 %_ptr_Function_uint = OpTypePointer Function %uint
%604 = OpConstantNull %_arr__arr_float_uint_1_uint_64 %606 = OpConstantNull %uint
%uint_4096 = OpConstant %uint 4096
%coordsInBounds_vi2_vi2_ = OpFunction %bool None %45 %coordsInBounds_vi2_vi2_ = OpFunction %bool None %45
%coord = OpFunctionParameter %_ptr_Function_v2int %coord = OpFunctionParameter %_ptr_Function_v2int
%shape = OpFunctionParameter %_ptr_Function_v2int %shape = OpFunctionParameter %_ptr_Function_v2int
@ -941,20 +943,45 @@
OpFunctionEnd OpFunctionEnd
%main = OpFunction %void None %575 %main = OpFunction %void None %575
%598 = OpLabel %598 = OpLabel
%idx = OpVariable %_ptr_Function_uint Function %606
%599 = OpLoad %uint %tint_symbol_2 %599 = OpLoad %uint %tint_symbol_2
%600 = OpIEqual %bool %599 %uint_0 %600 = OpLoad %uint %tint_symbol_2
OpSelectionMerge %601 None %601 = OpUMod %uint %600 %uint_1
OpBranchConditional %600 %602 %601 %602 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %599 %601
%602 = OpLabel OpStore %602 %85
OpStore %mm_Asub %603 %603 = OpLoad %uint %tint_symbol_2
OpStore %mm_Bsub %604 OpStore %idx %603
OpBranch %601 OpBranch %607
%601 = OpLabel %607 = OpLabel
OpLoopMerge %608 %609 None
OpBranch %610
%610 = OpLabel
%612 = OpLoad %uint %idx
%614 = OpULessThan %bool %612 %uint_4096
%611 = OpLogicalNot %bool %614
OpSelectionMerge %615 None
OpBranchConditional %611 %616 %615
%616 = OpLabel
OpBranch %608
%615 = OpLabel
%617 = OpLoad %uint %idx
%618 = OpUDiv %uint %617 %uint_64
%619 = OpLoad %uint %idx
%620 = OpUMod %uint %619 %uint_64
%621 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %618 %620
OpStore %621 %85
OpBranch %609
%609 = OpLabel
%622 = OpLoad %uint %idx
%623 = OpIAdd %uint %622 %uint_64
OpStore %idx %623
OpBranch %607
%608 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%606 = OpLoad %v3uint %tint_symbol %625 = OpLoad %v3uint %tint_symbol
OpStore %gl_LocalInvocationID %606 OpStore %gl_LocalInvocationID %625
%607 = OpLoad %v3uint %tint_symbol_1 %626 = OpLoad %v3uint %tint_symbol_1
OpStore %gl_GlobalInvocationID %607 OpStore %gl_GlobalInvocationID %626
%608 = OpFunctionCall %void %main_1 %627 = OpFunctionCall %void %main_1
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicAdd_794055(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 31 ; Bound: 26
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int %_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int %18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicAdd_794055 = OpFunction %void None %7 %atomicAdd_794055 = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%20 = OpLabel %20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18 OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicAdd_794055 %25 = OpFunctionCall %void %atomicAdd_794055
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicAdd_d5db1d(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 25
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint %17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicAdd_d5db1d = OpFunction %void None %6 %atomicAdd_d5db1d = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17 OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicAdd_d5db1d %24 = OpFunctionCall %void %atomicAdd_d5db1d
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicAnd_34edd3(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 25
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint %17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicAnd_34edd3 = OpFunction %void None %6 %atomicAnd_34edd3 = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17 OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicAnd_34edd3 %24 = OpFunctionCall %void %atomicAnd_34edd3
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicAnd_45a819(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 31 ; Bound: 26
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int %_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int %18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicAnd_45a819 = OpFunction %void None %7 %atomicAnd_45a819 = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%20 = OpLabel %20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18 OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicAnd_45a819 %25 = OpFunctionCall %void %atomicAnd_45a819
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -15,7 +15,7 @@ void atomicCompareExchangeWeak_89ea3b(threadgroup atomic_int* const tint_symbol_
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 37 ; Bound: 33
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -29,7 +29,7 @@
%int_0 = OpConstant %int 0 %int_0 = OpConstant %int 0
%_ptr_Function_v2int = OpTypePointer Function %v2int %_ptr_Function_v2int = OpTypePointer Function %v2int
%24 = OpConstantNull %v2int %24 = OpConstantNull %v2int
%33 = OpConstantNull %int %29 = OpConstantNull %int
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicCompareExchangeWeak_89ea3b = OpFunction %void None %7 %atomicCompareExchangeWeak_89ea3b = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -43,15 +43,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%26 = OpLabel %26 = OpLabel
%27 = OpLoad %uint %tint_symbol OpAtomicStore %arg_0 %uint_2 %uint_0 %29
%28 = OpIEqual %bool %27 %uint_0
OpSelectionMerge %29 None
OpBranchConditional %28 %30 %29
%30 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %33
OpBranch %29
%29 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%36 = OpFunctionCall %void %atomicCompareExchangeWeak_89ea3b %32 = OpFunctionCall %void %atomicCompareExchangeWeak_89ea3b
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -15,7 +15,7 @@ void atomicCompareExchangeWeak_b2ab2c(threadgroup atomic_uint* const tint_symbol
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 35 ; Bound: 31
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -27,7 +27,7 @@
%bool = OpTypeBool %bool = OpTypeBool
%_ptr_Function_v2uint = OpTypePointer Function %v2uint %_ptr_Function_v2uint = OpTypePointer Function %v2uint
%22 = OpConstantNull %v2uint %22 = OpConstantNull %v2uint
%31 = OpConstantNull %uint %27 = OpConstantNull %uint
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicCompareExchangeWeak_b2ab2c = OpFunction %void None %6 %atomicCompareExchangeWeak_b2ab2c = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -41,15 +41,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%24 = OpLabel %24 = OpLabel
%25 = OpLoad %uint %tint_symbol OpAtomicStore %arg_0 %uint_2 %uint_0 %27
%26 = OpIEqual %bool %25 %uint_0
OpSelectionMerge %27 None
OpBranchConditional %26 %28 %27
%28 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %31
OpBranch %27
%27 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%34 = OpFunctionCall %void %atomicCompareExchangeWeak_b2ab2c %30 = OpFunctionCall %void %atomicCompareExchangeWeak_b2ab2c
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicExchange_0a5dca(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 25
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint %17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicExchange_0a5dca = OpFunction %void None %6 %atomicExchange_0a5dca = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17 OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicExchange_0a5dca %24 = OpFunctionCall %void %atomicExchange_0a5dca
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicExchange_e114ba(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 31 ; Bound: 26
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int %_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int %18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicExchange_e114ba = OpFunction %void None %7 %atomicExchange_e114ba = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%20 = OpLabel %20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18 OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicExchange_e114ba %25 = OpFunctionCall %void %atomicExchange_e114ba
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicLoad_361bf1(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 29 ; Bound: 24
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -24,7 +24,6 @@
%uint_0 = OpConstant %uint 0 %uint_0 = OpConstant %uint 0
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%16 = OpConstantNull %uint %16 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicLoad_361bf1 = OpFunction %void None %6 %atomicLoad_361bf1 = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -35,15 +34,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%18 = OpLabel %18 = OpLabel
%19 = OpLoad %uint %tint_symbol
%20 = OpIEqual %bool %19 %uint_0
OpSelectionMerge %22 None
OpBranchConditional %20 %23 %22
%23 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %16 OpAtomicStore %arg_0 %uint_2 %uint_0 %16
OpBranch %22
%22 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%28 = OpFunctionCall %void %atomicLoad_361bf1 %23 = OpFunctionCall %void %atomicLoad_361bf1
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicLoad_afcc03(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 25
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_0 = OpConstant %uint 0 %uint_0 = OpConstant %uint 0
%_ptr_Function_int = OpTypePointer Function %int %_ptr_Function_int = OpTypePointer Function %int
%17 = OpConstantNull %int %17 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicLoad_afcc03 = OpFunction %void None %7 %atomicLoad_afcc03 = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17 OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicLoad_afcc03 %24 = OpFunctionCall %void %atomicLoad_afcc03
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicMax_a89cc3(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 31 ; Bound: 26
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int %_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int %18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicMax_a89cc3 = OpFunction %void None %7 %atomicMax_a89cc3 = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%20 = OpLabel %20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18 OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicMax_a89cc3 %25 = OpFunctionCall %void %atomicMax_a89cc3
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicMax_beccfc(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 25
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint %17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicMax_beccfc = OpFunction %void None %6 %atomicMax_beccfc = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17 OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicMax_beccfc %24 = OpFunctionCall %void %atomicMax_beccfc
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicMin_278235(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 31 ; Bound: 26
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int %_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int %18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicMin_278235 = OpFunction %void None %7 %atomicMin_278235 = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%20 = OpLabel %20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18 OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicMin_278235 %25 = OpFunctionCall %void %atomicMin_278235
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicMin_69d383(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 25
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint %17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicMin_69d383 = OpFunction %void None %6 %atomicMin_69d383 = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17 OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicMin_69d383 %24 = OpFunctionCall %void %atomicMin_69d383
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicOr_5e3d61(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 25
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint %17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicOr_5e3d61 = OpFunction %void None %6 %atomicOr_5e3d61 = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17 OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicOr_5e3d61 %24 = OpFunctionCall %void %atomicOr_5e3d61
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicOr_d09248(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 31 ; Bound: 26
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int %_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int %18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicOr_d09248 = OpFunction %void None %7 %atomicOr_d09248 = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%20 = OpLabel %20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18 OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicOr_d09248 %25 = OpFunctionCall %void %atomicOr_d09248
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -12,7 +12,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicStore_726882(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 28 ; Bound: 23
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -22,8 +22,7 @@
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_0 = OpConstant %uint 0 %uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%bool = OpTypeBool %19 = OpConstantNull %uint
%24 = OpConstantNull %uint
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicStore_726882 = OpFunction %void None %6 %atomicStore_726882 = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -32,15 +31,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%16 = OpLabel %16 = OpLabel
%17 = OpLoad %uint %tint_symbol OpAtomicStore %arg_0 %uint_2 %uint_0 %19
%18 = OpIEqual %bool %17 %uint_0
OpSelectionMerge %20 None
OpBranchConditional %18 %21 %20
%21 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %24
OpBranch %20
%20 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%27 = OpFunctionCall %void %atomicStore_726882 %22 = OpFunctionCall %void %atomicStore_726882
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -12,7 +12,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicStore_8bea94(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 29 ; Bound: 24
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -23,8 +23,7 @@
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_0 = OpConstant %uint 0 %uint_0 = OpConstant %uint 0
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%bool = OpTypeBool %20 = OpConstantNull %int
%25 = OpConstantNull %int
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicStore_8bea94 = OpFunction %void None %7 %atomicStore_8bea94 = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -33,15 +32,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%17 = OpLabel %17 = OpLabel
%18 = OpLoad %uint %tint_symbol OpAtomicStore %arg_0 %uint_2 %uint_0 %20
%19 = OpIEqual %bool %18 %uint_0
OpSelectionMerge %21 None
OpBranchConditional %19 %22 %21
%22 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %25
OpBranch %21
%21 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%28 = OpFunctionCall %void %atomicStore_8bea94 %23 = OpFunctionCall %void %atomicStore_8bea94
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
int atomic_result_1 = 0; int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1); InterlockedExchange(arg_0, 0, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicXor_75dc95(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2; threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 31 ; Bound: 26
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1 %int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int %_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int %18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicXor_75dc95 = OpFunction %void None %7 %atomicXor_75dc95 = OpFunction %void None %7
%10 = OpLabel %10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%20 = OpLabel %20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18 OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicXor_75dc95 %25 = OpFunctionCall %void %atomicXor_75dc95
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
uint atomic_result_1 = 0u; uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1); InterlockedExchange(arg_0, 0u, atomic_result_1);
} }

View File

@ -7,7 +7,7 @@ void atomicXor_c8e6be(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2; threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) { {
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed); atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 25
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0" OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1 %uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint %_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint %17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%atomicXor_c8e6be = OpFunction %void None %6 %atomicXor_c8e6be = OpFunction %void None %6
%9 = OpLabel %9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %6 %compute_main = OpFunction %void None %6
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17 OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicXor_c8e6be %24 = OpFunctionCall %void %atomicXor_c8e6be
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -22,7 +22,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
arg_1 = 0; arg_1 = 0;
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void frexp_0da285(threadgroup int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup int tint_symbol_2; threadgroup int tint_symbol_2;
if ((local_invocation_index == 0u)) { {
tint_symbol_2 = int(); tint_symbol_2 = int();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 32 ; Bound: 26
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
%13 = OpExtInstImport "GLSL.std.450" %13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1" OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -30,9 +30,7 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
%float_1 = OpConstant %float 1 %float_1 = OpConstant %float 1
%_ptr_Function_float = OpTypePointer Function %float %_ptr_Function_float = OpTypePointer Function %float
%18 = OpConstantNull %float %18 = OpConstantNull %float
%uint_0 = OpConstant %uint 0 %21 = OpConstantNull %int
%bool = OpTypeBool
%27 = OpConstantNull %int
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%frexp_0da285 = OpFunction %void None %7 %frexp_0da285 = OpFunction %void None %7
@ -44,15 +42,8 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %7 %compute_main = OpFunction %void None %7
%20 = OpLabel %20 = OpLabel
%21 = OpLoad %uint %tint_symbol OpStore %arg_1 %21
%23 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %25 None
OpBranchConditional %23 %26 %25
%26 = OpLabel
OpStore %arg_1 %27
OpBranch %25
%25 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%31 = OpFunctionCall %void %frexp_0da285 %25 = OpFunctionCall %void %frexp_0da285
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -22,7 +22,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
arg_1 = int3(0, 0, 0); arg_1 = int3(0, 0, 0);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void frexp_40fc9b(threadgroup int3* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup int3 tint_symbol_2; threadgroup int3 tint_symbol_2;
if ((local_invocation_index == 0u)) { {
tint_symbol_2 = int3(); tint_symbol_2 = int3();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 33 ; Bound: 27
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
%15 = OpExtInstImport "GLSL.std.450" %15 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1" OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -31,9 +31,7 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
%v3float = OpTypeVector %float 3 %v3float = OpTypeVector %float 3
%16 = OpConstantNull %v3float %16 = OpConstantNull %v3float
%_ptr_Function_v3float = OpTypePointer Function %v3float %_ptr_Function_v3float = OpTypePointer Function %v3float
%uint_0 = OpConstant %uint 0 %22 = OpConstantNull %v3int
%bool = OpTypeBool
%28 = OpConstantNull %v3int
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%frexp_40fc9b = OpFunction %void None %8 %frexp_40fc9b = OpFunction %void None %8
@ -45,15 +43,8 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %8 %compute_main = OpFunction %void None %8
%21 = OpLabel %21 = OpLabel
%22 = OpLoad %uint %tint_symbol OpStore %arg_1 %22
%24 = OpIEqual %bool %22 %uint_0
OpSelectionMerge %26 None
OpBranchConditional %24 %27 %26
%27 = OpLabel
OpStore %arg_1 %28
OpBranch %26
%26 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%32 = OpFunctionCall %void %frexp_40fc9b %26 = OpFunctionCall %void %frexp_40fc9b
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -22,7 +22,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
arg_1 = int2(0, 0); arg_1 = int2(0, 0);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void frexp_a3f940(threadgroup int2* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup int2 tint_symbol_2; threadgroup int2 tint_symbol_2;
if ((local_invocation_index == 0u)) { {
tint_symbol_2 = int2(); tint_symbol_2 = int2();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 33 ; Bound: 27
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
%15 = OpExtInstImport "GLSL.std.450" %15 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1" OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -31,9 +31,7 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
%v2float = OpTypeVector %float 2 %v2float = OpTypeVector %float 2
%16 = OpConstantNull %v2float %16 = OpConstantNull %v2float
%_ptr_Function_v2float = OpTypePointer Function %v2float %_ptr_Function_v2float = OpTypePointer Function %v2float
%uint_0 = OpConstant %uint 0 %22 = OpConstantNull %v2int
%bool = OpTypeBool
%28 = OpConstantNull %v2int
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%frexp_a3f940 = OpFunction %void None %8 %frexp_a3f940 = OpFunction %void None %8
@ -45,15 +43,8 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %8 %compute_main = OpFunction %void None %8
%21 = OpLabel %21 = OpLabel
%22 = OpLoad %uint %tint_symbol OpStore %arg_1 %22
%24 = OpIEqual %bool %22 %uint_0
OpSelectionMerge %26 None
OpBranchConditional %24 %27 %26
%27 = OpLabel
OpStore %arg_1 %28
OpBranch %26
%26 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%32 = OpFunctionCall %void %frexp_a3f940 %26 = OpFunctionCall %void %frexp_a3f940
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -22,7 +22,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
arg_1 = int4(0, 0, 0, 0); arg_1 = int4(0, 0, 0, 0);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void frexp_b87f4e(threadgroup int4* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup int4 tint_symbol_2; threadgroup int4 tint_symbol_2;
if ((local_invocation_index == 0u)) { {
tint_symbol_2 = int4(); tint_symbol_2 = int4();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 33 ; Bound: 27
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
%15 = OpExtInstImport "GLSL.std.450" %15 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1" OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -31,9 +31,7 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
%v4float = OpTypeVector %float 4 %v4float = OpTypeVector %float 4
%16 = OpConstantNull %v4float %16 = OpConstantNull %v4float
%_ptr_Function_v4float = OpTypePointer Function %v4float %_ptr_Function_v4float = OpTypePointer Function %v4float
%uint_0 = OpConstant %uint 0 %22 = OpConstantNull %v4int
%bool = OpTypeBool
%28 = OpConstantNull %v4int
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%frexp_b87f4e = OpFunction %void None %8 %frexp_b87f4e = OpFunction %void None %8
@ -45,15 +43,8 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %8 %compute_main = OpFunction %void None %8
%21 = OpLabel %21 = OpLabel
%22 = OpLoad %uint %tint_symbol OpStore %arg_1 %22
%24 = OpIEqual %bool %22 %uint_0
OpSelectionMerge %26 None
OpBranchConditional %24 %27 %26
%27 = OpLabel
OpStore %arg_1 %28
OpBranch %26
%26 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%32 = OpFunctionCall %void %frexp_b87f4e %26 = OpFunctionCall %void %frexp_b87f4e
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
arg_1 = float4(0.0f, 0.0f, 0.0f, 0.0f); arg_1 = float4(0.0f, 0.0f, 0.0f, 0.0f);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void modf_1d59e5(threadgroup float4* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup float4 tint_symbol_2; threadgroup float4 tint_symbol_2;
if ((local_invocation_index == 0u)) { {
tint_symbol_2 = float4(); tint_symbol_2 = float4();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 24
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
%13 = OpExtInstImport "GLSL.std.450" %13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1" OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -29,8 +29,6 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
%8 = OpTypeFunction %void %8 = OpTypeFunction %void
%14 = OpConstantNull %v4float %14 = OpConstantNull %v4float
%_ptr_Function_v4float = OpTypePointer Function %v4float %_ptr_Function_v4float = OpTypePointer Function %v4float
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%modf_1d59e5 = OpFunction %void None %8 %modf_1d59e5 = OpFunction %void None %8
@ -42,15 +40,8 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %8 %compute_main = OpFunction %void None %8
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpStore %arg_1 %14 OpStore %arg_1 %14
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %modf_1d59e5 %23 = OpFunctionCall %void %modf_1d59e5
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
arg_1 = float2(0.0f, 0.0f); arg_1 = float2(0.0f, 0.0f);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void modf_a128ab(threadgroup float2* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup float2 tint_symbol_2; threadgroup float2 tint_symbol_2;
if ((local_invocation_index == 0u)) { {
tint_symbol_2 = float2(); tint_symbol_2 = float2();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V ; SPIR-V
; Version: 1.3 ; Version: 1.3
; Generator: Google Tint Compiler; 0 ; Generator: Google Tint Compiler; 0
; Bound: 30 ; Bound: 24
; Schema: 0 ; Schema: 0
OpCapability Shader OpCapability Shader
%13 = OpExtInstImport "GLSL.std.450" %13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1 OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1" OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol" OpName %tint_symbol "tint_symbol"
@ -29,8 +29,6 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
%8 = OpTypeFunction %void %8 = OpTypeFunction %void
%14 = OpConstantNull %v2float %14 = OpConstantNull %v2float
%_ptr_Function_v2float = OpTypePointer Function %v2float %_ptr_Function_v2float = OpTypePointer Function %v2float
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%uint_2 = OpConstant %uint 2 %uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264 %uint_264 = OpConstant %uint 264
%modf_a128ab = OpFunction %void None %8 %modf_a128ab = OpFunction %void None %8
@ -42,15 +40,8 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd OpFunctionEnd
%compute_main = OpFunction %void None %8 %compute_main = OpFunction %void None %8
%19 = OpLabel %19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpStore %arg_1 %14 OpStore %arg_1 %14
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264 OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %modf_a128ab %23 = OpFunctionCall %void %modf_a128ab
OpReturn OpReturn
OpFunctionEnd OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) { void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index; const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) { {
arg_1 = float3(0.0f, 0.0f, 0.0f); arg_1 = float3(0.0f, 0.0f, 0.0f);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void modf_bb9088(threadgroup float3* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) { kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup float3 tint_symbol_2; threadgroup float3 tint_symbol_2;
if ((local_invocation_index == 0u)) { {
tint_symbol_2 = float3(); tint_symbol_2 = float3();
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);

Some files were not shown because too many files have changed in this diff Show More