transform: Optimize ZeroInitWorkgroupMemory for arrays

Spread the array zeroing across as many workgroup invocations as possible.

Bug: tint:910
Change-Id: I1cb5a6aaafd2a0a4093ea3b9797c173378bc5605
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60203
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Reviewed-by: David Neto <dneto@google.com>
This commit is contained in:
Ben Clayton 2021-07-30 14:08:06 +00:00 committed by Tint LUCI CQ
parent 669c57f3d1
commit 89a0bde59c
124 changed files with 1812 additions and 1596 deletions

View File

@ -72,9 +72,6 @@ Output Hlsl::Run(const Program* in, const DataMap& inputs) {
manager.Add<PromoteInitializersToConstVar>();
manager.Add<PadArrayElements>();
ZeroInitWorkgroupMemory::Config zero_init_cfg;
zero_init_cfg.init_arrays_with_loop_size_threshold = 32; // 8 scalars
data.Add<ZeroInitWorkgroupMemory::Config>(zero_init_cfg);
data.Add<CanonicalizeEntryPointIO::Config>(
CanonicalizeEntryPointIO::BuiltinStyle::kStructMember);
auto out = manager.Run(in, data);

View File

@ -38,7 +38,7 @@ fn main() {
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_1 : f32;
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_2 : f32;
if ((local_invocation_index == 0u)) {
{
tint_symbol_1 = f32();
}
workgroupBarrier();
@ -95,7 +95,7 @@ fn foo(a : f32, tint_symbol_3 : ptr<private, f32>, tint_symbol_4 : ptr<workgroup
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_5 : f32;
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_6 : f32;
if ((local_invocation_index == 0u)) {
{
tint_symbol_5 = f32();
}
workgroupBarrier();
@ -152,7 +152,7 @@ fn main() {
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_1 : f32;
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_2 : f32;
if ((local_invocation_index == 0u)) {
{
tint_symbol_1 = f32();
}
workgroupBarrier();

View File

@ -14,17 +14,21 @@
#include "src/transform/zero_init_workgroup_memory.h"
#include <algorithm>
#include <map>
#include <unordered_map>
#include <utility>
#include <vector>
#include "src/ast/workgroup_decoration.h"
#include "src/program_builder.h"
#include "src/sem/atomic_type.h"
#include "src/sem/function.h"
#include "src/sem/variable.h"
#include "src/utils/get_or_create.h"
#include "src/utils/unique_vector.h"
TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory);
TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory::Config);
namespace tint {
namespace transform {
@ -33,152 +37,112 @@ namespace transform {
struct ZeroInitWorkgroupMemory::State {
/// The clone context
CloneContext& ctx;
/// The config
Config cfg;
/// Zero() generates the statements required to zero initialize the workgroup
/// storage expression of type `ty`.
/// @param ty the expression type
/// @param stmts the built statements
/// @param get_expr a function that builds the AST nodes for the expression
void Zero(const sem::Type* ty,
ast::StatementList& stmts,
const std::function<ast::Expression*()>& get_expr) {
if (CanZero(ty)) {
auto* var = get_expr();
auto* zero_init = ctx.dst->Construct(CreateASTTypeFor(ctx, ty));
stmts.emplace_back(
ctx.dst->create<ast::AssignmentStatement>(var, zero_init));
return;
/// An alias to *ctx.dst
ProgramBuilder& b = *ctx.dst;
/// The constant size of the workgroup. If 0, then #workgroup_size_expr should
/// be used instead.
uint32_t workgroup_size_const = 0;
/// The size of the workgroup as an expression generator. Use if
/// #workgroup_size_const is 0.
std::function<ast::Expression*()> workgroup_size_expr;
/// ArrayIndex represents a function on the local invocation index, of
/// the form: `array_index = (local_invocation_index % modulo) / division`
struct ArrayIndex {
/// The RHS of the modulus part of the expression
uint32_t modulo = 1;
/// The RHS of the division part of the expression
uint32_t division = 1;
/// Equality operator
/// @param i the ArrayIndex to compare to this ArrayIndex
/// @returns true if `i` and this ArrayIndex are equal
bool operator==(const ArrayIndex& i) const {
return modulo == i.modulo && division == i.division;
}
if (auto* atomic = ty->As<sem::Atomic>()) {
auto* zero_init =
ctx.dst->Construct(CreateASTTypeFor(ctx, atomic->Type()));
auto* store = ctx.dst->Call("atomicStore", ctx.dst->AddressOf(get_expr()),
zero_init);
stmts.emplace_back(ctx.dst->create<ast::CallStatement>(store));
return;
}
if (auto* str = ty->As<sem::Struct>()) {
for (auto* member : str->Members()) {
auto name = ctx.Clone(member->Declaration()->symbol());
Zero(member->Type(), stmts,
[&] { return ctx.dst->MemberAccessor(get_expr(), name); });
/// Hash function for the ArrayIndex type
struct Hasher {
/// @param i the ArrayIndex to calculate a hash for
/// @returns the hash value for the ArrayIndex `i`
size_t operator()(const ArrayIndex& i) const {
return utils::Hash(i.modulo, i.division);
}
return;
}
};
};
if (auto* arr = ty->As<sem::Array>()) {
if (ShouldEmitForLoop(arr)) {
auto i = ctx.dst->Symbols().New("i");
auto* i_decl = ctx.dst->Decl(ctx.dst->Var(i, ctx.dst->ty.i32()));
auto* cond = ctx.dst->create<ast::BinaryExpression>(
ast::BinaryOp::kLessThan, ctx.dst->Expr(i),
ctx.dst->Expr(static_cast<int>(arr->Count())));
auto* inc = ctx.dst->Assign(i, ctx.dst->Add(i, 1));
ast::StatementList for_stmts;
Zero(arr->ElemType(), for_stmts,
[&] { return ctx.dst->IndexAccessor(get_expr(), i); });
auto* body = ctx.dst->Block(for_stmts);
stmts.emplace_back(ctx.dst->For(i_decl, cond, inc, body));
} else {
for (size_t i = 0; i < arr->Count(); i++) {
Zero(arr->ElemType(), stmts, [&] {
return ctx.dst->IndexAccessor(get_expr(),
static_cast<ProgramBuilder::u32>(i));
});
}
}
return;
}
/// A list of unique ArrayIndex
using ArrayIndices = UniqueVector<ArrayIndex, ArrayIndex::Hasher>;
TINT_UNREACHABLE(Transform, ctx.dst->Diagnostics())
<< "could not zero workgroup type: " << ty->type_name();
}
/// Expression holds information about an expression that is being built for a
/// statement will zero workgroup values.
struct Expression {
/// The AST expression node
ast::Expression* expr = nullptr;
/// The number of iterations required to zero the value
uint32_t num_iterations = 0;
/// All array indices used by this expression
ArrayIndices array_indices;
};
/// @returns true if the type `ty` can be zeroed with a simple zero-value
/// expression in the form of a type constructor without operands. If
/// CanZero() returns false, then the type needs to be initialized by
/// decomposing the initialization into multiple sub-initializations.
/// @param ty the type to inspect
bool CanZero(const sem::Type* ty) {
if (ty->Is<sem::Atomic>()) {
return false;
}
if (auto* str = ty->As<sem::Struct>()) {
for (auto* member : str->Members()) {
if (!CanZero(member->Type())) {
return false;
}
}
}
if (auto* arr = ty->As<sem::Array>()) {
if (ShouldEmitForLoop(arr) || !CanZero(arr->ElemType())) {
return false;
}
}
return true;
}
/// Statement holds information about a statement that will zero workgroup
/// values.
struct Statement {
/// The AST statement node
ast::Statement* stmt;
/// The number of iterations required to zero the value
uint32_t num_iterations;
/// All array indices used by this statement
ArrayIndices array_indices;
};
/// @returns true if the array should be emitted as a for-loop instead of
/// using zero-initializer statements.
/// @param array the array
bool ShouldEmitForLoop(const sem::Array* array) {
// TODO(bclayton): If array sizes become pipeline-overridable then this
// we need to return true for these arrays.
// See https://github.com/gpuweb/gpuweb/pull/1792
return (cfg.init_arrays_with_loop_size_threshold != 0) &&
(array->Size() >= cfg.init_arrays_with_loop_size_threshold);
}
};
/// All statements that zero workgroup memory
std::vector<Statement> statements;
ZeroInitWorkgroupMemory::ZeroInitWorkgroupMemory() = default;
/// A map of ArrayIndex to the name reserved for the `let` declaration of that
/// index.
std::unordered_map<ArrayIndex, Symbol, ArrayIndex::Hasher> array_index_names;
ZeroInitWorkgroupMemory::~ZeroInitWorkgroupMemory() = default;
/// Constructor
/// @param c the CloneContext used for the transform
explicit State(CloneContext& c) : ctx(c) {}
void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
const DataMap& inputs,
DataMap&) {
auto& sem = ctx.src->Sem();
/// Run inserts the workgroup memory zero-initialization logic at the top of
/// the given function
/// @param fn a compute shader entry point function
void Run(ast::Function* fn) {
auto& sem = ctx.src->Sem();
Config cfg;
if (auto* c = inputs.Get<Config>()) {
cfg = *c;
}
CalculateWorkgroupSize(
ast::GetDecoration<ast::WorkgroupDecoration>(fn->decorations()));
for (auto* ast_func : ctx.src->AST().Functions()) {
if (!ast_func->IsEntryPoint()) {
continue;
}
// Generate a list of statements to zero initialize each of the workgroup
// storage variables.
ast::StatementList stmts;
auto* func = sem.Get(ast_func);
// Generate a list of statements to zero initialize each of the
// workgroup storage variables used by `fn`. This will populate #statements.
auto* func = sem.Get(fn);
for (auto* var : func->ReferencedModuleVariables()) {
if (var->StorageClass() != ast::StorageClass::kWorkgroup) {
continue;
if (var->StorageClass() == ast::StorageClass::kWorkgroup) {
BuildZeroingStatements(
var->Type()->UnwrapRef(), [&](uint32_t num_values) {
auto var_name = ctx.Clone(var->Declaration()->symbol());
return Expression{b.Expr(var_name), num_values, ArrayIndices{}};
});
}
State{ctx, cfg}.Zero(var->Type()->UnwrapRef(), stmts, [&] {
auto var_name = ctx.Clone(var->Declaration()->symbol());
return ctx.dst->Expr(var_name);
});
}
if (stmts.empty()) {
continue; // No workgroup variables to initialize.
if (statements.empty()) {
return; // No workgroup variables to initialize.
}
// Scan the entry point for an existing local_invocation_index builtin
// parameter
ast::Expression* local_index = nullptr;
for (auto* param : ast_func->params()) {
std::function<ast::Expression*()> local_index;
for (auto* param : fn->params()) {
if (auto* builtin = ast::GetDecoration<ast::BuiltinDecoration>(
param->decorations())) {
if (builtin->value() == ast::Builtin::kLocalInvocationIndex) {
local_index = ctx.dst->Expr(ctx.Clone(param->symbol()));
local_index = [=] { return b.Expr(ctx.Clone(param->symbol())); };
break;
}
}
@ -188,9 +152,11 @@ void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
if (auto* builtin = ast::GetDecoration<ast::BuiltinDecoration>(
member->Declaration()->decorations())) {
if (builtin->value() == ast::Builtin::kLocalInvocationIndex) {
auto* param_expr = ctx.dst->Expr(ctx.Clone(param->symbol()));
auto member_name = ctx.Clone(member->Declaration()->symbol());
local_index = ctx.dst->MemberAccessor(param_expr, member_name);
local_index = [=] {
auto* param_expr = b.Expr(ctx.Clone(param->symbol()));
auto member_name = ctx.Clone(member->Declaration()->symbol());
return b.MemberAccessor(param_expr, member_name);
};
break;
}
}
@ -199,37 +165,283 @@ void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
}
if (!local_index) {
// No existing local index parameter. Append one to the entry point.
auto* param = ctx.dst->Param(
ctx.dst->Symbols().New("local_invocation_index"), ctx.dst->ty.u32(),
{ctx.dst->Builtin(ast::Builtin::kLocalInvocationIndex)});
ctx.InsertBack(ast_func->params(), param);
local_index = ctx.dst->Expr(param->symbol());
auto* param =
b.Param(b.Symbols().New("local_invocation_index"), b.ty.u32(),
{b.Builtin(ast::Builtin::kLocalInvocationIndex)});
ctx.InsertBack(fn->params(), param);
local_index = [=] { return b.Expr(param->symbol()); };
}
// We only want to zero-initialize the workgroup memory with the first
// shader invocation. Construct an if statement that holds stmts.
// TODO(crbug.com/tint/910): We should attempt to optimize this for arrays.
auto* if_zero_local_index = ctx.dst->create<ast::BinaryExpression>(
ast::BinaryOp::kEqual, local_index, ctx.dst->Expr(0u));
auto* if_stmt = ctx.dst->If(if_zero_local_index, ctx.dst->Block(stmts));
// Take the zeroing statements and bin them by the number of iterations
// required to zero the workgroup data. We then emit these in blocks,
// possibly wrapped in if-statements or for-loops.
std::unordered_map<uint32_t, std::vector<Statement>>
stmts_by_num_iterations;
std::vector<uint32_t> num_sorted_iterations;
for (auto& s : statements) {
auto& stmts = stmts_by_num_iterations[s.num_iterations];
if (stmts.empty()) {
num_sorted_iterations.emplace_back(s.num_iterations);
}
stmts.emplace_back(s);
}
std::sort(num_sorted_iterations.begin(), num_sorted_iterations.end());
// Insert this if-statement at the top of the entry point.
ctx.InsertFront(ast_func->body()->statements(), if_stmt);
// Loop over the statements, grouped by num_iterations.
for (auto num_iterations : num_sorted_iterations) {
auto& stmts = stmts_by_num_iterations[num_iterations];
// Append a single workgroup barrier after the if statement.
ctx.InsertFront(
ast_func->body()->statements(),
ctx.dst->create<ast::CallStatement>(ctx.dst->Call("workgroupBarrier")));
// Gather all the array indices used by all the statements in the block.
ArrayIndices array_indices;
for (auto& s : stmts) {
for (auto& idx : s.array_indices) {
array_indices.add(idx);
}
}
// Determine the block type used to emit these statements.
if (workgroup_size_const == 0 || num_iterations > workgroup_size_const) {
// Either the workgroup size is dynamic, or smaller than num_iterations.
// In either case, we need to generate a for loop to ensure we
// initialize all the array elements.
//
// for (var idx : u32 = local_index;
// idx < num_iterations;
// idx += workgroup_size) {
// ...
// }
auto idx = b.Symbols().New("idx");
auto* init = b.Decl(b.Var(idx, b.ty.u32(), local_index()));
auto* cond = b.create<ast::BinaryExpression>(
ast::BinaryOp::kLessThan, b.Expr(idx), b.Expr(num_iterations));
auto* cont = b.Assign(
idx, b.Add(idx, workgroup_size_const ? b.Expr(workgroup_size_const)
: workgroup_size_expr()));
auto block = DeclareArrayIndices(num_iterations, array_indices,
[&] { return b.Expr(idx); });
for (auto& s : stmts) {
block.emplace_back(s.stmt);
}
auto* for_loop = b.For(init, cond, cont, b.Block(block));
ctx.InsertFront(fn->body()->statements(), for_loop);
} else if (num_iterations < workgroup_size_const) {
// Workgroup size is a known constant, but is greater than
// num_iterations. Emit an if statement:
//
// if (local_index < num_iterations) {
// ...
// }
auto* cond = b.create<ast::BinaryExpression>(
ast::BinaryOp::kLessThan, local_index(), b.Expr(num_iterations));
auto block = DeclareArrayIndices(num_iterations, array_indices,
[&] { return b.Expr(local_index()); });
for (auto& s : stmts) {
block.emplace_back(s.stmt);
}
auto* if_stmt = b.If(cond, b.Block(block));
ctx.InsertFront(fn->body()->statements(), if_stmt);
} else {
// Workgroup size exactly equals num_iterations.
// No need for any conditionals. Just emit a basic block:
//
// {
// ...
// }
auto block = DeclareArrayIndices(num_iterations, array_indices,
[&] { return b.Expr(local_index()); });
for (auto& s : stmts) {
block.emplace_back(s.stmt);
}
ctx.InsertFront(fn->body()->statements(), b.Block(block));
}
}
// Append a single workgroup barrier after the zero initialization.
ctx.InsertFront(fn->body()->statements(),
b.create<ast::CallStatement>(b.Call("workgroupBarrier")));
}
/// BuildZeroingExpr is a function that builds a sub-expression used to zero
/// workgroup values. `num_values` is the number of elements that the
/// expression will be used to zero. Returns the expression.
using BuildZeroingExpr = std::function<Expression(uint32_t num_values)>;
/// BuildZeroingStatements() generates the statements required to zero
/// initialize the workgroup storage expression of type `ty`.
/// @param ty the expression type
/// @param get_expr a function that builds the AST nodes for the expression.
void BuildZeroingStatements(const sem::Type* ty,
const BuildZeroingExpr& get_expr) {
if (CanTriviallyZero(ty)) {
auto var = get_expr(1u);
auto* zero_init = b.Construct(CreateASTTypeFor(ctx, ty));
statements.emplace_back(Statement{b.Assign(var.expr, zero_init),
var.num_iterations, var.array_indices});
return;
}
if (auto* atomic = ty->As<sem::Atomic>()) {
auto* zero_init = b.Construct(CreateASTTypeFor(ctx, atomic->Type()));
auto expr = get_expr(1u);
auto* store = b.Call("atomicStore", b.AddressOf(expr.expr), zero_init);
statements.emplace_back(Statement{b.create<ast::CallStatement>(store),
expr.num_iterations,
expr.array_indices});
return;
}
if (auto* str = ty->As<sem::Struct>()) {
for (auto* member : str->Members()) {
auto name = ctx.Clone(member->Declaration()->symbol());
BuildZeroingStatements(member->Type(), [&](uint32_t num_values) {
auto s = get_expr(num_values);
return Expression{b.MemberAccessor(s.expr, name), s.num_iterations,
s.array_indices};
});
}
return;
}
if (auto* arr = ty->As<sem::Array>()) {
BuildZeroingStatements(arr->ElemType(), [&](uint32_t num_values) {
// num_values is the number of values to zero for the element type.
// The number of iterations required to zero the array and its elements
// is:
// `num_values * arr->Count()`
// The index for this array is:
// `(idx % modulo) / division`
auto modulo = num_values * arr->Count();
auto division = num_values;
auto a = get_expr(modulo);
auto array_indices = a.array_indices;
array_indices.add(ArrayIndex{modulo, division});
auto index =
utils::GetOrCreate(array_index_names, ArrayIndex{modulo, division},
[&] { return b.Symbols().New("i"); });
return Expression{b.IndexAccessor(a.expr, index), a.num_iterations,
array_indices};
});
return;
}
TINT_UNREACHABLE(Transform, b.Diagnostics())
<< "could not zero workgroup type: " << ty->type_name();
}
/// DeclareArrayIndices returns a list of statements that contain the `let`
/// declarations for all of the ArrayIndices.
/// @param num_iterations the number of iterations for the block
/// @param array_indices the list of array indices to generate `let`
/// declarations for
/// @param iteration a function that returns the index of the current
/// iteration.
/// @returns the list of `let` statements that declare the array indices
ast::StatementList DeclareArrayIndices(
uint32_t num_iterations,
const ArrayIndices& array_indices,
const std::function<ast::Expression*()>& iteration) {
ast::StatementList stmts;
std::map<Symbol, ArrayIndex> indices_by_name;
for (auto index : array_indices) {
auto name = array_index_names.at(index);
auto* mod =
(num_iterations > index.modulo)
? b.create<ast::BinaryExpression>(
ast::BinaryOp::kModulo, iteration(), b.Expr(index.modulo))
: iteration();
auto* div = (index.division != 1u) ? b.Div(mod, index.division) : mod;
auto* decl = b.Decl(b.Const(name, b.ty.u32(), div));
stmts.emplace_back(decl);
}
return stmts;
}
/// CalculateWorkgroupSize initializes the members #workgroup_size_const and
/// #workgroup_size_expr with the linear workgroup size.
/// @param deco the workgroup decoration applied to the entry point function
void CalculateWorkgroupSize(const ast::WorkgroupDecoration* deco) {
bool is_signed = false;
workgroup_size_const = 1u;
workgroup_size_expr = nullptr;
for (auto* expr : deco->values()) {
if (!expr) {
continue;
}
auto* sem = ctx.src->Sem().Get(expr);
if (auto c = sem->ConstantValue()) {
if (c.ElementType()->Is<sem::I32>()) {
workgroup_size_const *= static_cast<uint32_t>(c.Elements()[0].i32);
continue;
} else if (c.ElementType()->Is<sem::U32>()) {
workgroup_size_const *= c.Elements()[0].u32;
continue;
}
}
// Constant value could not be found. Build expression instead.
workgroup_size_expr = [this, expr, size = workgroup_size_expr] {
auto* e = ctx.Clone(expr);
if (ctx.src->TypeOf(expr)->UnwrapRef()->Is<sem::I32>()) {
e = b.Construct<ProgramBuilder::u32>(e);
}
return size ? b.Mul(size(), e) : e;
};
}
if (workgroup_size_expr) {
if (workgroup_size_const != 1) {
// Fold workgroup_size_const in to workgroup_size_expr
workgroup_size_expr = [this, is_signed,
const_size = workgroup_size_const,
expr_size = workgroup_size_expr] {
return is_signed
? b.Mul(expr_size(), static_cast<int32_t>(const_size))
: b.Mul(expr_size(), const_size);
};
}
// Indicate that workgroup_size_expr should be used instead of the
// constant.
workgroup_size_const = 0;
}
}
/// @returns true if a variable with store type `ty` can be efficiently zeroed
/// by assignment of a type constructor without operands. If
/// CanTriviallyZero() returns false, then the type needs to be
/// initialized by decomposing the initialization into multiple
/// sub-initializations.
/// @param ty the type to inspect
bool CanTriviallyZero(const sem::Type* ty) {
if (ty->Is<sem::Atomic>()) {
return false;
}
if (auto* str = ty->As<sem::Struct>()) {
for (auto* member : str->Members()) {
if (!CanTriviallyZero(member->Type())) {
return false;
}
}
}
if (ty->Is<sem::Array>()) {
return false;
}
// True for all other storable types
return true;
}
};
ZeroInitWorkgroupMemory::ZeroInitWorkgroupMemory() = default;
ZeroInitWorkgroupMemory::~ZeroInitWorkgroupMemory() = default;
void ZeroInitWorkgroupMemory::Run(CloneContext& ctx, const DataMap&, DataMap&) {
for (auto* fn : ctx.src->AST().Functions()) {
if (fn->pipeline_stage() == ast::PipelineStage::kCompute) {
State{ctx}.Run(fn);
}
}
ctx.Clone();
}
ZeroInitWorkgroupMemory::Config::Config() = default;
ZeroInitWorkgroupMemory::Config::Config(const Config&) = default;
ZeroInitWorkgroupMemory::Config::~Config() = default;
ZeroInitWorkgroupMemory::Config& ZeroInitWorkgroupMemory::Config::operator=(
const Config&) = default;
} // namespace transform
} // namespace tint

View File

@ -26,27 +26,6 @@ namespace transform {
class ZeroInitWorkgroupMemory
: public Castable<ZeroInitWorkgroupMemory, Transform> {
public:
/// Configuration options for the transform
struct Config : public Castable<Config, Data> {
/// Constructor
Config();
/// Copy constructor
Config(const Config&);
/// Destructor
~Config() override;
/// Assignment operator
/// @returns this Config
Config& operator=(const Config&);
/// If greater than 0, then arrays of at least this size in bytes will be
/// zero initialized using a for loop. If 0, then the array is assigned a
/// zero initialized array with a single statement.
uint32_t init_arrays_with_loop_size_threshold = 0;
};
/// Constructor
ZeroInitWorkgroupMemory();

View File

@ -76,7 +76,7 @@ TEST_F(ZeroInitWorkgroupMemoryTest, SingleWorkgroupVar_ExistingLocalIndex) {
var<workgroup> v : i32;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) {
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(v); // Initialization should be inserted above this statement
}
)";
@ -84,8 +84,8 @@ fn f([[builtin(local_invocation_index)]] idx : u32) {
var<workgroup> v : i32;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) {
if ((idx == 0u)) {
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
{
v = i32();
}
workgroupBarrier();
@ -104,7 +104,7 @@ TEST_F(ZeroInitWorkgroupMemoryTest,
var<workgroup> v : i32;
struct Params {
[[builtin(local_invocation_index)]] idx : u32;
[[builtin(local_invocation_index)]] local_idx : u32;
};
[[stage(compute), workgroup_size(1)]]
@ -117,12 +117,12 @@ var<workgroup> v : i32;
struct Params {
[[builtin(local_invocation_index)]]
idx : u32;
local_idx : u32;
};
[[stage(compute), workgroup_size(1)]]
fn f(params : Params) {
if ((params.idx == 0u)) {
{
v = i32();
}
workgroupBarrier();
@ -149,7 +149,7 @@ var<workgroup> v : i32;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
{
v = i32();
}
workgroupBarrier();
@ -162,7 +162,8 @@ fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
EXPECT_EQ(expect, str(got));
}
TEST_F(ZeroInitWorkgroupMemoryTest, MultipleWorkgroupVar_ExistingLocalIndex) {
TEST_F(ZeroInitWorkgroupMemoryTest,
MultipleWorkgroupVar_ExistingLocalIndex_Size1) {
auto* src = R"(
struct S {
x : i32;
@ -176,7 +177,7 @@ var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) {
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(a); // Initialization should be inserted above this statement
ignore(b);
ignore(c);
@ -195,11 +196,246 @@ var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) {
if ((idx == 0u)) {
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
{
a = i32();
b = S();
c = array<S, 32>();
b.x = i32();
}
for(var idx : u32 = local_idx; (idx < 8u); idx = (idx + 1u)) {
let i : u32 = idx;
b.y[i] = i32();
}
for(var idx_1 : u32 = local_idx; (idx_1 < 32u); idx_1 = (idx_1 + 1u)) {
let i_1 : u32 = idx_1;
c[i_1].x = i32();
}
for(var idx_2 : u32 = local_idx; (idx_2 < 256u); idx_2 = (idx_2 + 1u)) {
let i_2 : u32 = (idx_2 / 8u);
let i : u32 = (idx_2 % 8u);
c[i_2].y[i] = i32();
}
workgroupBarrier();
ignore(a);
ignore(b);
ignore(c);
}
)";
auto got = Run<ZeroInitWorkgroupMemory>(src);
EXPECT_EQ(expect, str(got));
}
TEST_F(ZeroInitWorkgroupMemoryTest,
MultipleWorkgroupVar_ExistingLocalIndex_Size_2_3) {
auto* src = R"(
struct S {
x : i32;
y : array<i32, 8>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(2, 3)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(a); // Initialization should be inserted above this statement
ignore(b);
ignore(c);
}
)";
auto* expect = R"(
struct S {
x : i32;
y : array<i32, 8>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(2, 3)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
if ((local_idx < 1u)) {
a = i32();
b.x = i32();
}
for(var idx : u32 = local_idx; (idx < 8u); idx = (idx + 6u)) {
let i : u32 = idx;
b.y[i] = i32();
}
for(var idx_1 : u32 = local_idx; (idx_1 < 32u); idx_1 = (idx_1 + 6u)) {
let i_1 : u32 = idx_1;
c[i_1].x = i32();
}
for(var idx_2 : u32 = local_idx; (idx_2 < 256u); idx_2 = (idx_2 + 6u)) {
let i_2 : u32 = (idx_2 / 8u);
let i : u32 = (idx_2 % 8u);
c[i_2].y[i] = i32();
}
workgroupBarrier();
ignore(a);
ignore(b);
ignore(c);
}
)";
auto got = Run<ZeroInitWorkgroupMemory>(src);
EXPECT_EQ(expect, str(got));
}
TEST_F(ZeroInitWorkgroupMemoryTest,
MultipleWorkgroupVar_ExistingLocalIndex_Size_2_3_X) {
auto* src = R"(
struct S {
x : i32;
y : array<i32, 8>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[override(1)]] let X : i32;
[[stage(compute), workgroup_size(2, 3, X)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(a); // Initialization should be inserted above this statement
ignore(b);
ignore(c);
}
)";
auto* expect =
R"(
struct S {
x : i32;
y : array<i32, 8>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[override(1)]] let X : i32;
[[stage(compute), workgroup_size(2, 3, X)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
for(var idx : u32 = local_idx; (idx < 1u); idx = (idx + (u32(X) * 6u))) {
a = i32();
b.x = i32();
}
for(var idx_1 : u32 = local_idx; (idx_1 < 8u); idx_1 = (idx_1 + (u32(X) * 6u))) {
let i : u32 = idx_1;
b.y[i] = i32();
}
for(var idx_2 : u32 = local_idx; (idx_2 < 32u); idx_2 = (idx_2 + (u32(X) * 6u))) {
let i_1 : u32 = idx_2;
c[i_1].x = i32();
}
for(var idx_3 : u32 = local_idx; (idx_3 < 256u); idx_3 = (idx_3 + (u32(X) * 6u))) {
let i_2 : u32 = (idx_3 / 8u);
let i : u32 = (idx_3 % 8u);
c[i_2].y[i] = i32();
}
workgroupBarrier();
ignore(a);
ignore(b);
ignore(c);
}
)";
auto got = Run<ZeroInitWorkgroupMemory>(src);
EXPECT_EQ(expect, str(got));
}
TEST_F(ZeroInitWorkgroupMemoryTest,
MultipleWorkgroupVar_ExistingLocalIndex_Size_5u_X_10u) {
auto* src = R"(
struct S {
x : array<array<i32, 8>, 10>;
y : array<i32, 8>;
z : array<array<array<i32, 8>, 10>, 20>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[override(1)]] let X : u32;
[[stage(compute), workgroup_size(5u, X, 10u)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
ignore(a); // Initialization should be inserted above this statement
ignore(b);
ignore(c);
}
)";
auto* expect =
R"(
struct S {
x : array<array<i32, 8>, 10>;
y : array<i32, 8>;
z : array<array<array<i32, 8>, 10>, 20>;
};
var<workgroup> a : i32;
var<workgroup> b : S;
var<workgroup> c : array<S, 32>;
[[override(1)]] let X : u32;
[[stage(compute), workgroup_size(5u, X, 10u)]]
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
for(var idx : u32 = local_idx; (idx < 1u); idx = (idx + (X * 50u))) {
a = i32();
}
for(var idx_1 : u32 = local_idx; (idx_1 < 8u); idx_1 = (idx_1 + (X * 50u))) {
let i_1 : u32 = idx_1;
b.y[i_1] = i32();
}
for(var idx_2 : u32 = local_idx; (idx_2 < 80u); idx_2 = (idx_2 + (X * 50u))) {
let i : u32 = (idx_2 / 8u);
let i_1 : u32 = (idx_2 % 8u);
b.x[i][i_1] = i32();
}
for(var idx_3 : u32 = local_idx; (idx_3 < 256u); idx_3 = (idx_3 + (X * 50u))) {
let i_4 : u32 = (idx_3 / 8u);
let i_1 : u32 = (idx_3 % 8u);
c[i_4].y[i_1] = i32();
}
for(var idx_4 : u32 = local_idx; (idx_4 < 1600u); idx_4 = (idx_4 + (X * 50u))) {
let i_2 : u32 = (idx_4 / 80u);
let i : u32 = ((idx_4 % 80u) / 8u);
let i_1 : u32 = (idx_4 % 8u);
b.z[i_2][i][i_1] = i32();
}
for(var idx_5 : u32 = local_idx; (idx_5 < 2560u); idx_5 = (idx_5 + (X * 50u))) {
let i_3 : u32 = (idx_5 / 80u);
let i : u32 = ((idx_5 % 80u) / 8u);
let i_1 : u32 = (idx_5 % 8u);
c[i_3].x[i][i_1] = i32();
}
for(var idx_6 : u32 = local_idx; (idx_6 < 51200u); idx_6 = (idx_6 + (X * 50u))) {
let i_5 : u32 = (idx_6 / 1600u);
let i_2 : u32 = ((idx_6 % 1600u) / 80u);
let i : u32 = ((idx_6 % 80u) / 8u);
let i_1 : u32 = (idx_6 % 8u);
c[i_5].z[i_2][i][i_1] = i32();
}
workgroupBarrier();
ignore(a);
@ -247,10 +483,22 @@ var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>, [[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
{
a = i32();
b = S();
c = array<S, 32>();
b.x = i32();
}
for(var idx : u32 = local_invocation_index; (idx < 8u); idx = (idx + 1u)) {
let i : u32 = idx;
b.y[i] = i32();
}
for(var idx_1 : u32 = local_invocation_index; (idx_1 < 32u); idx_1 = (idx_1 + 1u)) {
let i_1 : u32 = idx_1;
c[i_1].x = i32();
}
for(var idx_2 : u32 = local_invocation_index; (idx_2 < 256u); idx_2 = (idx_2 + 1u)) {
let i_2 : u32 = (idx_2 / 8u);
let i : u32 = (idx_2 % 8u);
c[i_2].y[i] = i32();
}
workgroupBarrier();
ignore(a);
@ -283,12 +531,12 @@ fn f1() {
ignore(c);
}
[[stage(compute), workgroup_size(1)]]
[[stage(compute), workgroup_size(1, 2, 3)]]
fn f2([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>) {
ignore(b); // Initialization should be inserted above this statement
}
[[stage(compute), workgroup_size(1)]]
[[stage(compute), workgroup_size(4, 5, 6)]]
fn f3() {
ignore(c); // Initialization should be inserted above this statement
ignore(a);
@ -308,30 +556,50 @@ var<workgroup> c : array<S, 32>;
[[stage(compute), workgroup_size(1)]]
fn f1([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
{
a = i32();
c = array<S, 32>();
}
for(var idx : u32 = local_invocation_index; (idx < 32u); idx = (idx + 1u)) {
let i : u32 = idx;
c[i].x = i32();
}
for(var idx_1 : u32 = local_invocation_index; (idx_1 < 256u); idx_1 = (idx_1 + 1u)) {
let i_1 : u32 = (idx_1 / 8u);
let i_2 : u32 = (idx_1 % 8u);
c[i_1].y[i_2] = i32();
}
workgroupBarrier();
ignore(a);
ignore(c);
}
[[stage(compute), workgroup_size(1)]]
[[stage(compute), workgroup_size(1, 2, 3)]]
fn f2([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>, [[builtin(local_invocation_index)]] local_invocation_index_1 : u32) {
if ((local_invocation_index_1 == 0u)) {
b = S();
if ((local_invocation_index_1 < 1u)) {
b.x = i32();
}
for(var idx_2 : u32 = local_invocation_index_1; (idx_2 < 8u); idx_2 = (idx_2 + 6u)) {
let i_3 : u32 = idx_2;
b.y[i_3] = i32();
}
workgroupBarrier();
ignore(b);
}
[[stage(compute), workgroup_size(1)]]
[[stage(compute), workgroup_size(4, 5, 6)]]
fn f3([[builtin(local_invocation_index)]] local_invocation_index_2 : u32) {
if ((local_invocation_index_2 == 0u)) {
c = array<S, 32>();
if ((local_invocation_index_2 < 1u)) {
a = i32();
}
if ((local_invocation_index_2 < 32u)) {
let i_4 : u32 = local_invocation_index_2;
c[i_4].x = i32();
}
for(var idx_3 : u32 = local_invocation_index_2; (idx_3 < 256u); idx_3 = (idx_3 + 120u)) {
let i_5 : u32 = (idx_3 / 8u);
let i_6 : u32 = (idx_3 % 8u);
c[i_5].y[i_6] = i32();
}
workgroupBarrier();
ignore(c);
ignore(a);
@ -356,7 +624,7 @@ fn call_use_v() {
}
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) {
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
call_use_v(); // Initialization should be inserted above this statement
}
)";
@ -372,8 +640,8 @@ fn call_use_v() {
}
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] idx : u32) {
if ((idx == 0u)) {
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
{
v = i32();
}
workgroupBarrier();
@ -404,7 +672,7 @@ var<workgroup> u : atomic<u32>;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
{
atomicStore(&(i), i32());
atomicStore(&(u), u32());
}
@ -449,7 +717,7 @@ var<workgroup> w : S;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
{
w.a = i32();
atomicStore(&(w.i), i32());
w.b = f32();
@ -480,11 +748,9 @@ var<workgroup> w : array<atomic<u32>, 4>;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
atomicStore(&(w[0u]), u32());
atomicStore(&(w[1u]), u32());
atomicStore(&(w[2u]), u32());
atomicStore(&(w[3u]), u32());
for(var idx : u32 = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
let i : u32 = idx;
atomicStore(&(w[i]), u32());
}
workgroupBarrier();
ignore(w);
@ -526,27 +792,13 @@ var<workgroup> w : array<S, 4>;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
w[0u].a = i32();
atomicStore(&(w[0u].i), i32());
w[0u].b = f32();
atomicStore(&(w[0u].u), u32());
w[0u].c = u32();
w[1u].a = i32();
atomicStore(&(w[1u].i), i32());
w[1u].b = f32();
atomicStore(&(w[1u].u), u32());
w[1u].c = u32();
w[2u].a = i32();
atomicStore(&(w[2u].i), i32());
w[2u].b = f32();
atomicStore(&(w[2u].u), u32());
w[2u].c = u32();
w[3u].a = i32();
atomicStore(&(w[3u].i), i32());
w[3u].b = f32();
atomicStore(&(w[3u].u), u32());
w[3u].c = u32();
for(var idx : u32 = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
let i_1 : u32 = idx;
w[i_1].a = i32();
atomicStore(&(w[i_1].i), i32());
w[i_1].b = f32();
atomicStore(&(w[i_1].u), u32());
w[i_1].c = u32();
}
workgroupBarrier();
ignore(w);
@ -558,56 +810,6 @@ fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
EXPECT_EQ(expect, str(got));
}
TEST_F(ZeroInitWorkgroupMemoryTest, WorkgroupArray_InitWithLoop) {
auto* src = R"(
struct S {
a : array<i32, 3>; // size: 12, less than the loop threshold
b : array<i32, 4>; // size: 16, equal to the loop threshold
c : array<i32, 5>; // size: 20, greater than the loop threshold
};
var<workgroup> w : S;
[[stage(compute), workgroup_size(1)]]
fn f() {
ignore(w); // Initialization should be inserted above this statement
}
)";
auto* expect = R"(
struct S {
a : array<i32, 3>;
b : array<i32, 4>;
c : array<i32, 5>;
};
var<workgroup> w : S;
[[stage(compute), workgroup_size(1)]]
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
if ((local_invocation_index == 0u)) {
w.a = array<i32, 3>();
for(var i : i32; (i < 4); i = (i + 1)) {
w.b[i] = i32();
}
for(var i_1 : i32; (i_1 < 5); i_1 = (i_1 + 1)) {
w.c[i_1] = i32();
}
}
workgroupBarrier();
ignore(w);
}
)";
ZeroInitWorkgroupMemory::Config cfg;
cfg.init_arrays_with_loop_size_threshold = 16;
DataMap data;
data.Add<ZeroInitWorkgroupMemory::Config>(cfg);
auto got = Run<ZeroInitWorkgroupMemory>(src, data);
EXPECT_EQ(expect, str(got));
}
} // namespace
} // namespace transform
} // namespace tint

View File

@ -16,11 +16,10 @@ struct tint_symbol_2 {
[numthreads(1, 1, 1)]
void f(tint_symbol_2 tint_symbol_1) {
const uint local_invocation_index = tint_symbol_1.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
for(int i = 0; (i < 64); i = (i + 1)) {
s.data[i] = 0;
}
{
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
const uint i = idx;
s.data[i] = 0;
}
}
GroupMemoryBarrierWithGroupSync();

View File

@ -15,13 +15,13 @@ struct Result {
};
kernel void f(uint local_invocation_index [[thread_index_in_threadgroup]], constant UBO& ubo [[buffer(0)]], device Result& result [[buffer(1)]]) {
threadgroup S tint_symbol_2;
if ((local_invocation_index == 0u)) {
S const tint_symbol_1 = {};
tint_symbol_2 = tint_symbol_1;
threadgroup S tint_symbol_1;
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
uint const i = idx;
tint_symbol_1.data.arr[i] = int();
}
threadgroup_barrier(mem_flags::mem_threadgroup);
result.out = tint_symbol_2.data.arr[ubo.dynamic_idx];
result.out = tint_symbol_1.data.arr[ubo.dynamic_idx];
return;
}

View File

@ -1,7 +1,7 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 38
; Bound: 52
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
@ -18,6 +18,7 @@
OpName %s "s"
OpName %tint_symbol "tint_symbol"
OpName %f "f"
OpName %idx "idx"
OpDecorate %UBO Block
OpMemberDecorate %UBO 0 Offset 0
OpDecorate %ubo NonWritable
@ -47,30 +48,51 @@
%tint_symbol = OpVariable %_ptr_Input_uint Input
%void = OpTypeVoid
%16 = OpTypeFunction %void
%uint_0 = OpConstant %uint 0
%_ptr_Function_uint = OpTypePointer Function %uint
%23 = OpConstantNull %uint
%bool = OpTypeBool
%26 = OpConstantNull %S
%uint_0 = OpConstant %uint 0
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
%38 = OpConstantNull %int
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
%_ptr_Uniform_int = OpTypePointer Uniform %int
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
%f = OpFunction %void None %16
%19 = OpLabel
%idx = OpVariable %_ptr_Function_uint Function %23
%20 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpStore %s %26
OpStore %idx %20
OpBranch %24
%24 = OpLabel
OpLoopMerge %25 %26 None
OpBranch %27
%27 = OpLabel
%29 = OpLoad %uint %idx
%30 = OpULessThan %bool %29 %uint_64
%28 = OpLogicalNot %bool %30
OpSelectionMerge %32 None
OpBranchConditional %28 %33 %32
%33 = OpLabel
OpBranch %25
%32 = OpLabel
%34 = OpLoad %uint %idx
%37 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34
OpStore %37 %38
OpBranch %26
%26 = OpLabel
%39 = OpLoad %uint %idx
%41 = OpIAdd %uint %39 %uint_1
OpStore %idx %41
OpBranch %24
%25 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%31 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
%33 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
%34 = OpLoad %int %33
%36 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34
%37 = OpLoad %int %36
OpStore %31 %37
%46 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
%48 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
%49 = OpLoad %int %48
%50 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %49
%51 = OpLoad %int %50
OpStore %46 %51
OpReturn
OpFunctionEnd

View File

@ -16,11 +16,10 @@ struct tint_symbol_2 {
[numthreads(1, 1, 1)]
void f(tint_symbol_2 tint_symbol_1) {
const uint local_invocation_index = tint_symbol_1.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
for(int i = 0; (i < 64); i = (i + 1)) {
s.data[i] = 0;
}
{
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
const uint i = idx;
s.data[i] = 0;
}
}
GroupMemoryBarrierWithGroupSync();

View File

@ -15,14 +15,14 @@ struct Result {
};
kernel void f(uint local_invocation_index [[thread_index_in_threadgroup]], constant UBO& ubo [[buffer(0)]], device Result& result [[buffer(1)]]) {
threadgroup S tint_symbol_2;
if ((local_invocation_index == 0u)) {
S const tint_symbol_1 = {};
tint_symbol_2 = tint_symbol_1;
threadgroup S tint_symbol_1;
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
uint const i = idx;
tint_symbol_1.data.arr[i] = int();
}
threadgroup_barrier(mem_flags::mem_threadgroup);
tint_symbol_2.data.arr[ubo.dynamic_idx] = 1;
result.out = tint_symbol_2.data.arr[3];
tint_symbol_1.data.arr[ubo.dynamic_idx] = 1;
result.out = tint_symbol_1.data.arr[3];
return;
}

View File

@ -1,7 +1,7 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 41
; Bound: 55
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
@ -18,6 +18,7 @@
OpName %s "s"
OpName %tint_symbol "tint_symbol"
OpName %f "f"
OpName %idx "idx"
OpDecorate %UBO Block
OpMemberDecorate %UBO 0 Offset 0
OpDecorate %ubo NonWritable
@ -47,34 +48,55 @@
%tint_symbol = OpVariable %_ptr_Input_uint Input
%void = OpTypeVoid
%16 = OpTypeFunction %void
%uint_0 = OpConstant %uint 0
%_ptr_Function_uint = OpTypePointer Function %uint
%23 = OpConstantNull %uint
%bool = OpTypeBool
%26 = OpConstantNull %S
%uint_0 = OpConstant %uint 0
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
%38 = OpConstantNull %int
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%_ptr_Uniform_int = OpTypePointer Uniform %int
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
%int_1 = OpConstant %int 1
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
%int_3 = OpConstant %int 3
%f = OpFunction %void None %16
%19 = OpLabel
%idx = OpVariable %_ptr_Function_uint Function %23
%20 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpStore %s %26
OpStore %idx %20
OpBranch %24
%24 = OpLabel
OpLoopMerge %25 %26 None
OpBranch %27
%27 = OpLabel
%29 = OpLoad %uint %idx
%30 = OpULessThan %bool %29 %uint_64
%28 = OpLogicalNot %bool %30
OpSelectionMerge %32 None
OpBranchConditional %28 %33 %32
%33 = OpLabel
OpBranch %25
%32 = OpLabel
%34 = OpLoad %uint %idx
%37 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34
OpStore %37 %38
OpBranch %26
%26 = OpLabel
%39 = OpLoad %uint %idx
%41 = OpIAdd %uint %39 %uint_1
OpStore %idx %41
OpBranch %24
%25 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%31 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
%32 = OpLoad %int %31
%34 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %32
OpStore %34 %int_1
%37 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
%39 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %int_3
%40 = OpLoad %int %39
OpStore %37 %40
%46 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
%47 = OpLoad %int %46
%48 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %47
OpStore %48 %int_1
%51 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
%53 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %int_3
%54 = OpLoad %int %53
OpStore %51 %54
OpReturn
OpFunctionEnd

View File

@ -59,24 +59,12 @@ void main(tint_symbol_1 tint_symbol) {
const uint3 local_id = tint_symbol.local_id;
const uint3 global_id = tint_symbol.global_id;
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
for(int i = 0; (i < 64); i = (i + 1)) {
{
for(int i_1 = 0; (i_1 < 64); i_1 = (i_1 + 1)) {
mm_Asub[i][i_1] = 0.0f;
}
}
}
}
{
for(int i_2 = 0; (i_2 < 64); i_2 = (i_2 + 1)) {
{
for(int i_3 = 0; (i_3 < 64); i_3 = (i_3 + 1)) {
mm_Bsub[i_2][i_3] = 0.0f;
}
}
}
{
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 256u)) {
const uint i = (idx / 64u);
const uint i_1 = (idx % 64u);
mm_Asub[i][i_1] = 0.0f;
mm_Bsub[i][i_1] = 0.0f;
}
}
GroupMemoryBarrierWithGroupSync();

View File

@ -51,13 +51,13 @@ void mm_write(constant Uniforms& uniforms, device Matrix& resultMatrix, uint row
}
kernel void tint_symbol(uint3 local_id [[thread_position_in_threadgroup]], uint3 global_id [[thread_position_in_grid]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Uniforms& uniforms [[buffer(3)]], const device Matrix& firstMatrix [[buffer(0)]], const device Matrix& secondMatrix [[buffer(1)]], device Matrix& resultMatrix [[buffer(2)]]) {
threadgroup tint_array_wrapper tint_symbol_4;
threadgroup tint_array_wrapper tint_symbol_5;
if ((local_invocation_index == 0u)) {
tint_array_wrapper const tint_symbol_2 = {.arr={}};
tint_symbol_4 = tint_symbol_2;
tint_array_wrapper const tint_symbol_3 = {.arr={}};
tint_symbol_5 = tint_symbol_3;
threadgroup tint_array_wrapper tint_symbol_2;
threadgroup tint_array_wrapper tint_symbol_3;
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 256u)) {
uint const i = (idx / 64u);
uint const i_1 = (idx % 64u);
tint_symbol_2.arr[i].arr[i_1] = float();
tint_symbol_3.arr[i].arr[i_1] = float();
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint const tileRow = (local_id.y * RowPerThread);
@ -80,23 +80,23 @@ kernel void tint_symbol(uint3 local_id [[thread_position_in_threadgroup]], uint3
for(uint innerCol = 0u; (innerCol < ColPerThreadA); innerCol = (innerCol + 1u)) {
uint const inputRow = (tileRow + innerRow);
uint const inputCol = (tileColA + innerCol);
tint_symbol_4.arr[inputRow].arr[inputCol] = mm_readA(uniforms, firstMatrix, (globalRow + innerRow), ((t * TileInner) + inputCol));
tint_symbol_2.arr[inputRow].arr[inputCol] = mm_readA(uniforms, firstMatrix, (globalRow + innerRow), ((t * TileInner) + inputCol));
}
}
for(uint innerRow = 0u; (innerRow < RowPerThreadB); innerRow = (innerRow + 1u)) {
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
uint const inputRow = (tileRowB + innerRow);
uint const inputCol = (tileCol + innerCol);
tint_symbol_5.arr[innerCol].arr[inputCol] = mm_readB(uniforms, secondMatrix, ((t * TileInner) + inputRow), (globalCol + innerCol));
tint_symbol_3.arr[innerCol].arr[inputCol] = mm_readB(uniforms, secondMatrix, ((t * TileInner) + inputRow), (globalCol + innerCol));
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
for(uint k = 0u; (k < TileInner); k = (k + 1u)) {
for(uint inner = 0u; (inner < ColPerThread); inner = (inner + 1u)) {
BCached.arr[inner] = tint_symbol_5.arr[k].arr[(tileCol + inner)];
BCached.arr[inner] = tint_symbol_3.arr[k].arr[(tileCol + inner)];
}
for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
ACached = tint_symbol_4.arr[(tileRow + innerRow)].arr[k];
ACached = tint_symbol_2.arr[(tileRow + innerRow)].arr[k];
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
uint const index = ((innerRow * ColPerThread) + innerCol);
acc.arr[index] = (acc.arr[index] + (ACached * BCached.arr[innerCol]));

View File

@ -1,7 +1,7 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 356
; Bound: 372
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
@ -38,6 +38,7 @@
OpName %col_1 "col"
OpName %value "value"
OpName %main "main"
OpName %idx "idx"
OpName %acc "acc"
OpName %ACached "ACached"
OpName %BCached "BCached"
@ -114,20 +115,21 @@
%void = OpTypeVoid
%75 = OpTypeFunction %void %uint %uint %float
%98 = OpTypeFunction %void
%105 = OpConstantNull %_arr__arr_float_TileAOuter_TileAOuter
%_ptr_Function_uint = OpTypePointer Function %uint
%104 = OpConstantNull %uint
%uint_4096 = OpConstant %uint 4096
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%121 = OpConstantNull %float
%uint_256 = OpConstant %uint 256
%uint_264 = OpConstant %uint 264
%uint_16 = OpConstant %uint 16
%_arr_float_uint_16 = OpTypeArray %float %uint_16
%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16
%129 = OpConstantNull %_arr_float_uint_16
%149 = OpConstantNull %_arr_float_uint_16
%_ptr_Function_float = OpTypePointer Function %float
%132 = OpConstantNull %float
%_arr_float_RowPerThread = OpTypeArray %float %RowPerThread
%_ptr_Function__arr_float_RowPerThread = OpTypePointer Function %_arr_float_RowPerThread
%136 = OpConstantNull %_arr_float_RowPerThread
%_ptr_Function_uint = OpTypePointer Function %uint
%139 = OpConstantNull %uint
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%155 = OpConstantNull %_arr_float_RowPerThread
%mm_readA = OpFunction %float None %25
%row = OpFunctionParameter %uint
%col = OpFunctionParameter %uint
@ -218,377 +220,399 @@
OpFunctionEnd
%main = OpFunction %void None %98
%100 = OpLabel
%acc = OpVariable %_ptr_Function__arr_float_uint_16 Function %129
%ACached = OpVariable %_ptr_Function_float Function %132
%BCached = OpVariable %_ptr_Function__arr_float_RowPerThread Function %136
%index = OpVariable %_ptr_Function_uint Function %139
%t = OpVariable %_ptr_Function_uint Function %139
%innerRow = OpVariable %_ptr_Function_uint Function %139
%innerCol = OpVariable %_ptr_Function_uint Function %139
%innerRow_0 = OpVariable %_ptr_Function_uint Function %139
%innerCol_0 = OpVariable %_ptr_Function_uint Function %139
%k = OpVariable %_ptr_Function_uint Function %139
%inner = OpVariable %_ptr_Function_uint Function %139
%innerRow_1 = OpVariable %_ptr_Function_uint Function %139
%innerCol_1 = OpVariable %_ptr_Function_uint Function %139
%innerRow_2 = OpVariable %_ptr_Function_uint Function %139
%innerCol_2 = OpVariable %_ptr_Function_uint Function %139
%idx = OpVariable %_ptr_Function_uint Function %104
%acc = OpVariable %_ptr_Function__arr_float_uint_16 Function %149
%ACached = OpVariable %_ptr_Function_float Function %121
%BCached = OpVariable %_ptr_Function__arr_float_RowPerThread Function %155
%index = OpVariable %_ptr_Function_uint Function %104
%t = OpVariable %_ptr_Function_uint Function %104
%innerRow = OpVariable %_ptr_Function_uint Function %104
%innerCol = OpVariable %_ptr_Function_uint Function %104
%innerRow_0 = OpVariable %_ptr_Function_uint Function %104
%innerCol_0 = OpVariable %_ptr_Function_uint Function %104
%k = OpVariable %_ptr_Function_uint Function %104
%inner = OpVariable %_ptr_Function_uint Function %104
%innerRow_1 = OpVariable %_ptr_Function_uint Function %104
%innerCol_1 = OpVariable %_ptr_Function_uint Function %104
%innerRow_2 = OpVariable %_ptr_Function_uint Function %104
%innerCol_2 = OpVariable %_ptr_Function_uint Function %104
%101 = OpLoad %uint %tint_symbol_2
%102 = OpIEqual %bool %101 %uint_0
OpSelectionMerge %103 None
OpBranchConditional %102 %104 %103
%104 = OpLabel
OpStore %mm_Asub %105
OpStore %mm_Bsub %105
OpBranch %103
%103 = OpLabel
OpStore %idx %101
OpBranch %105
%105 = OpLabel
OpLoopMerge %106 %107 None
OpBranch %108
%108 = OpLabel
%110 = OpLoad %uint %idx
%112 = OpULessThan %bool %110 %uint_4096
%109 = OpLogicalNot %bool %112
OpSelectionMerge %113 None
OpBranchConditional %109 %114 %113
%114 = OpLabel
OpBranch %106
%113 = OpLabel
%115 = OpLoad %uint %idx
%116 = OpUDiv %uint %115 %TileAOuter
%117 = OpLoad %uint %idx
%118 = OpUMod %uint %117 %TileAOuter
%120 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %116 %118
OpStore %120 %121
%122 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %116 %118
OpStore %122 %121
OpBranch %107
%107 = OpLabel
%123 = OpLoad %uint %idx
%125 = OpIAdd %uint %123 %uint_256
OpStore %idx %125
OpBranch %105
%106 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%108 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
%109 = OpLoad %uint %108
%110 = OpIMul %uint %109 %RowPerThread
%111 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
%112 = OpLoad %uint %111
%113 = OpIMul %uint %112 %RowPerThread
%114 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_1
%115 = OpLoad %uint %114
%116 = OpIMul %uint %115 %RowPerThread
%117 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
%118 = OpLoad %uint %117
%119 = OpIMul %uint %118 %RowPerThread
%120 = OpAccessChain %_ptr_Uniform_uint %uniforms %uint_1
%121 = OpLoad %uint %120
%122 = OpISub %uint %121 %uint_1
%123 = OpUDiv %uint %122 %TileAOuter
%124 = OpIAdd %uint %123 %uint_1
%128 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
%129 = OpLoad %uint %128
%130 = OpIMul %uint %129 %RowPerThread
%131 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
%132 = OpLoad %uint %131
%133 = OpIMul %uint %132 %RowPerThread
%134 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_1
%135 = OpLoad %uint %134
%136 = OpIMul %uint %135 %RowPerThread
%137 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
%138 = OpLoad %uint %137
%139 = OpIMul %uint %138 %RowPerThread
%140 = OpAccessChain %_ptr_Uniform_uint %uniforms %uint_1
%141 = OpLoad %uint %140
%142 = OpISub %uint %141 %uint_1
%143 = OpUDiv %uint %142 %TileAOuter
%144 = OpIAdd %uint %143 %uint_1
OpStore %index %uint_0
OpBranch %140
%140 = OpLabel
OpLoopMerge %141 %142 None
OpBranch %143
%143 = OpLabel
%145 = OpLoad %uint %index
%146 = OpIMul %uint %RowPerThread %RowPerThread
%147 = OpULessThan %bool %145 %146
%144 = OpLogicalNot %bool %147
OpSelectionMerge %148 None
OpBranchConditional %144 %149 %148
%149 = OpLabel
OpBranch %141
%148 = OpLabel
%150 = OpLoad %uint %index
%151 = OpAccessChain %_ptr_Function_float %acc %150
OpStore %151 %float_0
OpBranch %142
%142 = OpLabel
%152 = OpLoad %uint %index
%153 = OpIAdd %uint %152 %uint_1
OpStore %index %153
OpBranch %140
%141 = OpLabel
%154 = OpUDiv %uint %TileAOuter %uint_16
%155 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
%156 = OpLoad %uint %155
%157 = OpIMul %uint %156 %154
%158 = OpUDiv %uint %TileAOuter %uint_16
%159 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
%160 = OpLoad %uint %159
%161 = OpIMul %uint %160 %158
OpStore %t %uint_0
OpBranch %163
%163 = OpLabel
OpLoopMerge %164 %165 None
OpBranch %166
OpBranch %157
%157 = OpLabel
OpLoopMerge %158 %159 None
OpBranch %160
%160 = OpLabel
%162 = OpLoad %uint %index
%163 = OpIMul %uint %RowPerThread %RowPerThread
%164 = OpULessThan %bool %162 %163
%161 = OpLogicalNot %bool %164
OpSelectionMerge %165 None
OpBranchConditional %161 %166 %165
%166 = OpLabel
%168 = OpLoad %uint %t
%169 = OpULessThan %bool %168 %124
%167 = OpLogicalNot %bool %169
OpSelectionMerge %170 None
OpBranchConditional %167 %171 %170
%171 = OpLabel
OpBranch %164
%170 = OpLabel
OpStore %innerRow %uint_0
OpBranch %173
%173 = OpLabel
OpLoopMerge %174 %175 None
OpBranch %176
%176 = OpLabel
%178 = OpLoad %uint %innerRow
%179 = OpULessThan %bool %178 %RowPerThread
%177 = OpLogicalNot %bool %179
OpSelectionMerge %180 None
OpBranchConditional %177 %181 %180
%181 = OpLabel
OpBranch %174
OpBranch %158
%165 = OpLabel
%167 = OpLoad %uint %index
%168 = OpAccessChain %_ptr_Function_float %acc %167
OpStore %168 %float_0
OpBranch %159
%159 = OpLabel
%169 = OpLoad %uint %index
%170 = OpIAdd %uint %169 %uint_1
OpStore %index %170
OpBranch %157
%158 = OpLabel
%171 = OpUDiv %uint %TileAOuter %uint_16
%172 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
%173 = OpLoad %uint %172
%174 = OpIMul %uint %173 %171
%175 = OpUDiv %uint %TileAOuter %uint_16
%176 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
%177 = OpLoad %uint %176
%178 = OpIMul %uint %177 %175
OpStore %t %uint_0
OpBranch %180
%180 = OpLabel
OpStore %innerCol %uint_0
OpLoopMerge %181 %182 None
OpBranch %183
%183 = OpLabel
OpLoopMerge %184 %185 None
OpBranch %186
%186 = OpLabel
%188 = OpLoad %uint %innerCol
%189 = OpULessThan %bool %188 %154
%187 = OpLogicalNot %bool %189
OpSelectionMerge %190 None
OpBranchConditional %187 %191 %190
%191 = OpLabel
OpBranch %184
%185 = OpLoad %uint %t
%186 = OpULessThan %bool %185 %144
%184 = OpLogicalNot %bool %186
OpSelectionMerge %187 None
OpBranchConditional %184 %188 %187
%188 = OpLabel
OpBranch %181
%187 = OpLabel
OpStore %innerRow %uint_0
OpBranch %190
%190 = OpLabel
%192 = OpLoad %uint %innerRow
%193 = OpIAdd %uint %110 %192
%194 = OpLoad %uint %innerCol
%195 = OpIAdd %uint %157 %194
%197 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %193 %195
%199 = OpLoad %uint %innerRow
%200 = OpIAdd %uint %116 %199
%201 = OpLoad %uint %t
%202 = OpIMul %uint %201 %TileAOuter
%203 = OpIAdd %uint %202 %195
%198 = OpFunctionCall %float %mm_readA %200 %203
OpStore %197 %198
OpBranch %185
%185 = OpLabel
%204 = OpLoad %uint %innerCol
%205 = OpIAdd %uint %204 %uint_1
OpStore %innerCol %205
OpBranch %183
%184 = OpLabel
OpBranch %175
%175 = OpLabel
%206 = OpLoad %uint %innerRow
%207 = OpIAdd %uint %206 %uint_1
OpStore %innerRow %207
OpBranch %173
%174 = OpLabel
OpLoopMerge %191 %192 None
OpBranch %193
%193 = OpLabel
%195 = OpLoad %uint %innerRow
%196 = OpULessThan %bool %195 %RowPerThread
%194 = OpLogicalNot %bool %196
OpSelectionMerge %197 None
OpBranchConditional %194 %198 %197
%198 = OpLabel
OpBranch %191
%197 = OpLabel
OpStore %innerCol %uint_0
OpBranch %200
%200 = OpLabel
OpLoopMerge %201 %202 None
OpBranch %203
%203 = OpLabel
%205 = OpLoad %uint %innerCol
%206 = OpULessThan %bool %205 %171
%204 = OpLogicalNot %bool %206
OpSelectionMerge %207 None
OpBranchConditional %204 %208 %207
%208 = OpLabel
OpBranch %201
%207 = OpLabel
%209 = OpLoad %uint %innerRow
%210 = OpIAdd %uint %130 %209
%211 = OpLoad %uint %innerCol
%212 = OpIAdd %uint %174 %211
%213 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %210 %212
%215 = OpLoad %uint %innerRow
%216 = OpIAdd %uint %136 %215
%217 = OpLoad %uint %t
%218 = OpIMul %uint %217 %TileAOuter
%219 = OpIAdd %uint %218 %212
%214 = OpFunctionCall %float %mm_readA %216 %219
OpStore %213 %214
OpBranch %202
%202 = OpLabel
%220 = OpLoad %uint %innerCol
%221 = OpIAdd %uint %220 %uint_1
OpStore %innerCol %221
OpBranch %200
%201 = OpLabel
OpBranch %192
%192 = OpLabel
%222 = OpLoad %uint %innerRow
%223 = OpIAdd %uint %222 %uint_1
OpStore %innerRow %223
OpBranch %190
%191 = OpLabel
OpStore %innerRow_0 %uint_0
OpBranch %209
%209 = OpLabel
OpLoopMerge %210 %211 None
OpBranch %212
%212 = OpLabel
%214 = OpLoad %uint %innerRow_0
%215 = OpULessThan %bool %214 %158
%213 = OpLogicalNot %bool %215
OpSelectionMerge %216 None
OpBranchConditional %213 %217 %216
%217 = OpLabel
OpBranch %210
%216 = OpLabel
OpBranch %225
%225 = OpLabel
OpLoopMerge %226 %227 None
OpBranch %228
%228 = OpLabel
%230 = OpLoad %uint %innerRow_0
%231 = OpULessThan %bool %230 %175
%229 = OpLogicalNot %bool %231
OpSelectionMerge %232 None
OpBranchConditional %229 %233 %232
%233 = OpLabel
OpBranch %226
%232 = OpLabel
OpStore %innerCol_0 %uint_0
OpBranch %219
%219 = OpLabel
OpLoopMerge %220 %221 None
OpBranch %222
%222 = OpLabel
%224 = OpLoad %uint %innerCol_0
%225 = OpULessThan %bool %224 %RowPerThread
%223 = OpLogicalNot %bool %225
OpSelectionMerge %226 None
OpBranchConditional %223 %227 %226
%227 = OpLabel
OpBranch %220
%226 = OpLabel
%228 = OpLoad %uint %innerRow_0
%229 = OpIAdd %uint %161 %228
%230 = OpLoad %uint %innerCol_0
%231 = OpIAdd %uint %113 %230
%232 = OpLoad %uint %innerCol_0
%233 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %232 %231
%235 = OpLoad %uint %t
%236 = OpIMul %uint %235 %TileAOuter
%237 = OpIAdd %uint %236 %229
%238 = OpLoad %uint %innerCol_0
%239 = OpIAdd %uint %119 %238
%234 = OpFunctionCall %float %mm_readB %237 %239
OpStore %233 %234
OpBranch %221
%221 = OpLabel
OpBranch %235
%235 = OpLabel
OpLoopMerge %236 %237 None
OpBranch %238
%238 = OpLabel
%240 = OpLoad %uint %innerCol_0
%241 = OpIAdd %uint %240 %uint_1
OpStore %innerCol_0 %241
OpBranch %219
%220 = OpLabel
OpBranch %211
%211 = OpLabel
%242 = OpLoad %uint %innerRow_0
%243 = OpIAdd %uint %242 %uint_1
OpStore %innerRow_0 %243
OpBranch %209
%210 = OpLabel
%241 = OpULessThan %bool %240 %RowPerThread
%239 = OpLogicalNot %bool %241
OpSelectionMerge %242 None
OpBranchConditional %239 %243 %242
%243 = OpLabel
OpBranch %236
%242 = OpLabel
%244 = OpLoad %uint %innerRow_0
%245 = OpIAdd %uint %178 %244
%246 = OpLoad %uint %innerCol_0
%247 = OpIAdd %uint %133 %246
%248 = OpLoad %uint %innerCol_0
%249 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %248 %247
%251 = OpLoad %uint %t
%252 = OpIMul %uint %251 %TileAOuter
%253 = OpIAdd %uint %252 %245
%254 = OpLoad %uint %innerCol_0
%255 = OpIAdd %uint %139 %254
%250 = OpFunctionCall %float %mm_readB %253 %255
OpStore %249 %250
OpBranch %237
%237 = OpLabel
%256 = OpLoad %uint %innerCol_0
%257 = OpIAdd %uint %256 %uint_1
OpStore %innerCol_0 %257
OpBranch %235
%236 = OpLabel
OpBranch %227
%227 = OpLabel
%258 = OpLoad %uint %innerRow_0
%259 = OpIAdd %uint %258 %uint_1
OpStore %innerRow_0 %259
OpBranch %225
%226 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
OpStore %k %uint_0
OpBranch %246
%246 = OpLabel
OpLoopMerge %247 %248 None
OpBranch %249
%249 = OpLabel
%251 = OpLoad %uint %k
%252 = OpULessThan %bool %251 %TileAOuter
%250 = OpLogicalNot %bool %252
OpSelectionMerge %253 None
OpBranchConditional %250 %254 %253
%254 = OpLabel
OpBranch %247
%253 = OpLabel
OpStore %inner %uint_0
OpBranch %256
%256 = OpLabel
OpLoopMerge %257 %258 None
OpBranch %259
%259 = OpLabel
%261 = OpLoad %uint %inner
%262 = OpULessThan %bool %261 %RowPerThread
%260 = OpLogicalNot %bool %262
OpSelectionMerge %263 None
OpBranchConditional %260 %264 %263
%264 = OpLabel
OpBranch %257
%263 = OpLabel
%265 = OpLoad %uint %inner
%266 = OpAccessChain %_ptr_Function_float %BCached %265
OpBranch %262
%262 = OpLabel
OpLoopMerge %263 %264 None
OpBranch %265
%265 = OpLabel
%267 = OpLoad %uint %k
%268 = OpLoad %uint %inner
%269 = OpIAdd %uint %113 %268
%270 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %267 %269
%271 = OpLoad %float %270
OpStore %266 %271
OpBranch %258
%258 = OpLabel
%272 = OpLoad %uint %inner
%273 = OpIAdd %uint %272 %uint_1
OpStore %inner %273
OpBranch %256
%257 = OpLabel
OpStore %innerRow_1 %uint_0
%268 = OpULessThan %bool %267 %TileAOuter
%266 = OpLogicalNot %bool %268
OpSelectionMerge %269 None
OpBranchConditional %266 %270 %269
%270 = OpLabel
OpBranch %263
%269 = OpLabel
OpStore %inner %uint_0
OpBranch %272
%272 = OpLabel
OpLoopMerge %273 %274 None
OpBranch %275
%275 = OpLabel
OpLoopMerge %276 %277 None
OpBranch %278
%278 = OpLabel
%280 = OpLoad %uint %innerRow_1
%281 = OpULessThan %bool %280 %RowPerThread
%279 = OpLogicalNot %bool %281
OpSelectionMerge %282 None
OpBranchConditional %279 %283 %282
%283 = OpLabel
OpBranch %276
%282 = OpLabel
%284 = OpLoad %uint %innerRow_1
%285 = OpIAdd %uint %110 %284
%286 = OpLoad %uint %k
%287 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %285 %286
%288 = OpLoad %float %287
OpStore %ACached %288
%277 = OpLoad %uint %inner
%278 = OpULessThan %bool %277 %RowPerThread
%276 = OpLogicalNot %bool %278
OpSelectionMerge %279 None
OpBranchConditional %276 %280 %279
%280 = OpLabel
OpBranch %273
%279 = OpLabel
%281 = OpLoad %uint %inner
%282 = OpAccessChain %_ptr_Function_float %BCached %281
%283 = OpLoad %uint %k
%284 = OpLoad %uint %inner
%285 = OpIAdd %uint %133 %284
%286 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %283 %285
%287 = OpLoad %float %286
OpStore %282 %287
OpBranch %274
%274 = OpLabel
%288 = OpLoad %uint %inner
%289 = OpIAdd %uint %288 %uint_1
OpStore %inner %289
OpBranch %272
%273 = OpLabel
OpStore %innerRow_1 %uint_0
OpBranch %291
%291 = OpLabel
OpLoopMerge %292 %293 None
OpBranch %294
%294 = OpLabel
%296 = OpLoad %uint %innerRow_1
%297 = OpULessThan %bool %296 %RowPerThread
%295 = OpLogicalNot %bool %297
OpSelectionMerge %298 None
OpBranchConditional %295 %299 %298
%299 = OpLabel
OpBranch %292
%298 = OpLabel
%300 = OpLoad %uint %innerRow_1
%301 = OpIAdd %uint %130 %300
%302 = OpLoad %uint %k
%303 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %301 %302
%304 = OpLoad %float %303
OpStore %ACached %304
OpStore %innerCol_1 %uint_0
OpBranch %290
%290 = OpLabel
OpLoopMerge %291 %292 None
OpBranch %306
%306 = OpLabel
OpLoopMerge %307 %308 None
OpBranch %309
%309 = OpLabel
%311 = OpLoad %uint %innerCol_1
%312 = OpULessThan %bool %311 %RowPerThread
%310 = OpLogicalNot %bool %312
OpSelectionMerge %313 None
OpBranchConditional %310 %314 %313
%314 = OpLabel
OpBranch %307
%313 = OpLabel
%315 = OpLoad %uint %innerRow_1
%316 = OpIMul %uint %315 %RowPerThread
%317 = OpLoad %uint %innerCol_1
%318 = OpIAdd %uint %316 %317
%319 = OpAccessChain %_ptr_Function_float %acc %318
%320 = OpAccessChain %_ptr_Function_float %acc %318
%321 = OpLoad %float %320
%322 = OpLoad %float %ACached
%323 = OpLoad %uint %innerCol_1
%324 = OpAccessChain %_ptr_Function_float %BCached %323
%325 = OpLoad %float %324
%326 = OpFMul %float %322 %325
%327 = OpFAdd %float %321 %326
OpStore %319 %327
OpBranch %308
%308 = OpLabel
%328 = OpLoad %uint %innerCol_1
%329 = OpIAdd %uint %328 %uint_1
OpStore %innerCol_1 %329
OpBranch %306
%307 = OpLabel
OpBranch %293
%293 = OpLabel
%295 = OpLoad %uint %innerCol_1
%296 = OpULessThan %bool %295 %RowPerThread
%294 = OpLogicalNot %bool %296
OpSelectionMerge %297 None
OpBranchConditional %294 %298 %297
%298 = OpLabel
%330 = OpLoad %uint %innerRow_1
%331 = OpIAdd %uint %330 %uint_1
OpStore %innerRow_1 %331
OpBranch %291
%297 = OpLabel
%299 = OpLoad %uint %innerRow_1
%300 = OpIMul %uint %299 %RowPerThread
%301 = OpLoad %uint %innerCol_1
%302 = OpIAdd %uint %300 %301
%303 = OpAccessChain %_ptr_Function_float %acc %302
%304 = OpAccessChain %_ptr_Function_float %acc %302
%305 = OpLoad %float %304
%306 = OpLoad %float %ACached
%307 = OpLoad %uint %innerCol_1
%308 = OpAccessChain %_ptr_Function_float %BCached %307
%309 = OpLoad %float %308
%310 = OpFMul %float %306 %309
%311 = OpFAdd %float %305 %310
OpStore %303 %311
OpBranch %292
%292 = OpLabel
%312 = OpLoad %uint %innerCol_1
%313 = OpIAdd %uint %312 %uint_1
OpStore %innerCol_1 %313
OpBranch %290
%291 = OpLabel
OpBranch %277
%277 = OpLabel
%314 = OpLoad %uint %innerRow_1
%315 = OpIAdd %uint %314 %uint_1
OpStore %innerRow_1 %315
OpBranch %275
%276 = OpLabel
OpBranch %248
%248 = OpLabel
%316 = OpLoad %uint %k
%317 = OpIAdd %uint %316 %uint_1
OpStore %k %317
OpBranch %246
%247 = OpLabel
OpBranch %264
%264 = OpLabel
%332 = OpLoad %uint %k
%333 = OpIAdd %uint %332 %uint_1
OpStore %k %333
OpBranch %262
%263 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
OpBranch %165
%165 = OpLabel
%319 = OpLoad %uint %t
%320 = OpIAdd %uint %319 %uint_1
OpStore %t %320
OpBranch %163
%164 = OpLabel
OpBranch %182
%182 = OpLabel
%335 = OpLoad %uint %t
%336 = OpIAdd %uint %335 %uint_1
OpStore %t %336
OpBranch %180
%181 = OpLabel
OpStore %innerRow_2 %uint_0
OpBranch %322
%322 = OpLabel
OpLoopMerge %323 %324 None
OpBranch %325
%325 = OpLabel
%327 = OpLoad %uint %innerRow_2
%328 = OpULessThan %bool %327 %RowPerThread
%326 = OpLogicalNot %bool %328
OpSelectionMerge %329 None
OpBranchConditional %326 %330 %329
%330 = OpLabel
OpBranch %323
%329 = OpLabel
OpBranch %338
%338 = OpLabel
OpLoopMerge %339 %340 None
OpBranch %341
%341 = OpLabel
%343 = OpLoad %uint %innerRow_2
%344 = OpULessThan %bool %343 %RowPerThread
%342 = OpLogicalNot %bool %344
OpSelectionMerge %345 None
OpBranchConditional %342 %346 %345
%346 = OpLabel
OpBranch %339
%345 = OpLabel
OpStore %innerCol_2 %uint_0
OpBranch %332
%332 = OpLabel
OpLoopMerge %333 %334 None
OpBranch %335
%335 = OpLabel
%337 = OpLoad %uint %innerCol_2
%338 = OpULessThan %bool %337 %RowPerThread
%336 = OpLogicalNot %bool %338
OpSelectionMerge %339 None
OpBranchConditional %336 %340 %339
OpBranch %348
%348 = OpLabel
OpLoopMerge %349 %350 None
OpBranch %351
%351 = OpLabel
%353 = OpLoad %uint %innerCol_2
%354 = OpULessThan %bool %353 %RowPerThread
%352 = OpLogicalNot %bool %354
OpSelectionMerge %355 None
OpBranchConditional %352 %356 %355
%356 = OpLabel
OpBranch %349
%355 = OpLabel
%357 = OpLoad %uint %innerRow_2
%358 = OpIMul %uint %357 %RowPerThread
%359 = OpLoad %uint %innerCol_2
%360 = OpIAdd %uint %358 %359
%362 = OpLoad %uint %innerRow_2
%363 = OpIAdd %uint %136 %362
%364 = OpLoad %uint %innerCol_2
%365 = OpIAdd %uint %139 %364
%366 = OpAccessChain %_ptr_Function_float %acc %360
%367 = OpLoad %float %366
%361 = OpFunctionCall %void %mm_write %363 %365 %367
OpBranch %350
%350 = OpLabel
%368 = OpLoad %uint %innerCol_2
%369 = OpIAdd %uint %368 %uint_1
OpStore %innerCol_2 %369
OpBranch %348
%349 = OpLabel
OpBranch %340
%340 = OpLabel
OpBranch %333
%370 = OpLoad %uint %innerRow_2
%371 = OpIAdd %uint %370 %uint_1
OpStore %innerRow_2 %371
OpBranch %338
%339 = OpLabel
%341 = OpLoad %uint %innerRow_2
%342 = OpIMul %uint %341 %RowPerThread
%343 = OpLoad %uint %innerCol_2
%344 = OpIAdd %uint %342 %343
%346 = OpLoad %uint %innerRow_2
%347 = OpIAdd %uint %116 %346
%348 = OpLoad %uint %innerCol_2
%349 = OpIAdd %uint %119 %348
%350 = OpAccessChain %_ptr_Function_float %acc %344
%351 = OpLoad %float %350
%345 = OpFunctionCall %void %mm_write %347 %349 %351
OpBranch %334
%334 = OpLabel
%352 = OpLoad %uint %innerCol_2
%353 = OpIAdd %uint %352 %uint_1
OpStore %innerCol_2 %353
OpBranch %332
%333 = OpLabel
OpBranch %324
%324 = OpLabel
%354 = OpLoad %uint %innerRow_2
%355 = OpIAdd %uint %354 %uint_1
OpStore %innerRow_2 %355
OpBranch %322
%323 = OpLabel
OpReturn
OpFunctionEnd

View File

@ -21,15 +21,11 @@ void main(tint_symbol_1 tint_symbol) {
const uint3 WorkGroupID = tint_symbol.WorkGroupID;
const uint3 LocalInvocationID = tint_symbol.LocalInvocationID;
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
for(int i_1 = 0; (i_1 < 4); i_1 = (i_1 + 1)) {
{
for(int i_2 = 0; (i_2 < 256); i_2 = (i_2 + 1)) {
tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
}
}
}
{
for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
const uint i_1 = (idx / 256u);
const uint i_2 = (idx % 256u);
tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
}
}
GroupMemoryBarrierWithGroupSync();

View File

@ -15,15 +15,16 @@ struct tint_array_wrapper {
tint_array_wrapper_1 arr[4];
};
kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture(1)]], sampler tint_symbol_5 [[sampler(0)]], texture2d<float, access::write> tint_symbol_6 [[texture(2)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Params& params [[buffer(1)]], constant Flip& flip [[buffer(3)]]) {
threadgroup tint_array_wrapper tint_symbol_3;
if ((local_invocation_index == 0u)) {
tint_array_wrapper const tint_symbol_2 = {.arr={}};
tint_symbol_3 = tint_symbol_2;
kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_3 [[texture(1)]], sampler tint_symbol_4 [[sampler(0)]], texture2d<float, access::write> tint_symbol_5 [[texture(2)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Params& params [[buffer(1)]], constant Flip& flip [[buffer(3)]]) {
threadgroup tint_array_wrapper tint_symbol_2;
for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
uint const i_1 = (idx / 256u);
uint const i_2 = (idx % 256u);
tint_symbol_2.arr[i_1].arr[i_2] = float3();
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint const filterOffset = ((params.filterDim - 1u) / 2u);
int2 const dims = int2(tint_symbol_4.get_width(0), tint_symbol_4.get_height(0));
int2 const dims = int2(tint_symbol_3.get_width(0), tint_symbol_3.get_height(0));
int2 const baseIndex = (int2(((WorkGroupID.xy * uint2(params.blockDim, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u)))) - int2(int(filterOffset), 0));
for(uint r = 0u; (r < 4u); r = (r + 1u)) {
for(uint c = 0u; (c < 4u); c = (c + 1u)) {
@ -31,7 +32,7 @@ kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture
if ((flip.value != 0u)) {
loadIndex = loadIndex.yx;
}
tint_symbol_3.arr[r].arr[((4u * LocalInvocationID.x) + c)] = tint_symbol_4.sample(tint_symbol_5, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), level(0.0f)).rgb;
tint_symbol_2.arr[r].arr[((4u * LocalInvocationID.x) + c)] = tint_symbol_3.sample(tint_symbol_4, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), level(0.0f)).rgb;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
@ -46,9 +47,9 @@ kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture
float3 acc = float3(0.0f, 0.0f, 0.0f);
for(uint f = 0u; (f < params.filterDim); f = (f + 1u)) {
uint i = ((center + f) - filterOffset);
acc = (acc + ((1.0f / float(params.filterDim)) * tint_symbol_3.arr[r].arr[i]));
acc = (acc + ((1.0f / float(params.filterDim)) * tint_symbol_2.arr[r].arr[i]));
}
tint_symbol_6.write(float4(acc, 1.0f), uint2(writeIndex));
tint_symbol_5.write(float4(acc, 1.0f), uint2(writeIndex));
}
}
}

View File

@ -1,7 +1,7 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 239
; Bound: 254
; Schema: 0
OpCapability Shader
OpCapability ImageQuery
@ -23,6 +23,7 @@
OpName %tint_symbol_1 "tint_symbol_1"
OpName %tint_symbol_2 "tint_symbol_2"
OpName %main "main"
OpName %idx "idx"
OpName %r "r"
OpName %c "c"
OpName %loadIndex "loadIndex"
@ -87,288 +88,310 @@
%tint_symbol_2 = OpVariable %_ptr_Input_uint Input
%void = OpTypeVoid
%31 = OpTypeFunction %void
%uint_0 = OpConstant %uint 0
%_ptr_Function_uint = OpTypePointer Function %uint
%38 = OpConstantNull %uint
%uint_1024 = OpConstant %uint 1024
%bool = OpTypeBool
%41 = OpConstantNull %_arr__arr_v3float_uint_256_uint_4
%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
%56 = OpConstantNull %v3float
%uint_64 = OpConstant %uint 64
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%uint_0 = OpConstant %uint 0
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%uint_1 = OpConstant %uint 1
%int = OpTypeInt 32 1
%v2int = OpTypeVector %int 2
%int_0 = OpConstant %int 0
%v2uint = OpTypeVector %uint 2
%66 = OpConstantComposite %v2uint %uint_4 %uint_1
%_ptr_Function_uint = OpTypePointer Function %uint
%74 = OpConstantNull %uint
%85 = OpConstantComposite %v2uint %uint_4 %uint_1
%_ptr_Function_v2int = OpTypePointer Function %v2int
%102 = OpConstantNull %v2int
%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
%119 = OpConstantNull %v2int
%v4float = OpTypeVector %float 4
%122 = OpTypeSampledImage %10
%138 = OpTypeSampledImage %10
%v2float = OpTypeVector %float 2
%float_0_25 = OpConstant %float 0.25
%128 = OpConstantComposite %v2float %float_0_25 %float_0_25
%144 = OpConstantComposite %v2float %float_0_25 %float_0_25
%float_0 = OpConstant %float 0
%v2bool = OpTypeVector %bool 2
%193 = OpConstantComposite %v3float %float_0 %float_0 %float_0
%209 = OpConstantComposite %v3float %float_0 %float_0 %float_0
%_ptr_Function_v3float = OpTypePointer Function %v3float
%196 = OpConstantNull %v3float
%float_1 = OpConstant %float 1
%main = OpFunction %void None %31
%34 = OpLabel
%r = OpVariable %_ptr_Function_uint Function %74
%c = OpVariable %_ptr_Function_uint Function %74
%loadIndex = OpVariable %_ptr_Function_v2int Function %102
%r_0 = OpVariable %_ptr_Function_uint Function %74
%c_0 = OpVariable %_ptr_Function_uint Function %74
%writeIndex = OpVariable %_ptr_Function_v2int Function %102
%acc = OpVariable %_ptr_Function_v3float Function %196
%f = OpVariable %_ptr_Function_uint Function %74
%i = OpVariable %_ptr_Function_uint Function %74
%idx = OpVariable %_ptr_Function_uint Function %38
%r = OpVariable %_ptr_Function_uint Function %38
%c = OpVariable %_ptr_Function_uint Function %38
%loadIndex = OpVariable %_ptr_Function_v2int Function %119
%r_0 = OpVariable %_ptr_Function_uint Function %38
%c_0 = OpVariable %_ptr_Function_uint Function %38
%writeIndex = OpVariable %_ptr_Function_v2int Function %119
%acc = OpVariable %_ptr_Function_v3float Function %56
%f = OpVariable %_ptr_Function_uint Function %38
%i = OpVariable %_ptr_Function_uint Function %38
%35 = OpLoad %uint %tint_symbol_2
%37 = OpIEqual %bool %35 %uint_0
OpSelectionMerge %39 None
OpBranchConditional %37 %40 %39
%40 = OpLabel
OpStore %tile %41
OpStore %idx %35
OpBranch %39
%39 = OpLabel
OpLoopMerge %40 %41 None
OpBranch %42
%42 = OpLabel
%44 = OpLoad %uint %idx
%46 = OpULessThan %bool %44 %uint_1024
%43 = OpLogicalNot %bool %46
OpSelectionMerge %48 None
OpBranchConditional %43 %49 %48
%49 = OpLabel
OpBranch %40
%48 = OpLabel
%50 = OpLoad %uint %idx
%51 = OpUDiv %uint %50 %uint_256
%52 = OpLoad %uint %idx
%53 = OpUMod %uint %52 %uint_256
%55 = OpAccessChain %_ptr_Workgroup_v3float %tile %51 %53
OpStore %55 %56
OpBranch %41
%41 = OpLabel
%57 = OpLoad %uint %idx
%59 = OpIAdd %uint %57 %uint_64
OpStore %idx %59
OpBranch %39
%40 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%46 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
%47 = OpLoad %uint %46
%49 = OpISub %uint %47 %uint_1
%50 = OpUDiv %uint %49 %uint_2
%54 = OpLoad %10 %inputTex
%51 = OpImageQuerySizeLod %v2int %54 %int_0
%58 = OpLoad %v3uint %tint_symbol
%59 = OpVectorShuffle %v2uint %58 %58 0 1
%60 = OpAccessChain %_ptr_Uniform_uint %params %uint_1
%61 = OpLoad %uint %60
%62 = OpCompositeConstruct %v2uint %61 %uint_4
%63 = OpIMul %v2uint %59 %62
%64 = OpLoad %v3uint %tint_symbol_1
%65 = OpVectorShuffle %v2uint %64 %64 0 1
%67 = OpIMul %v2uint %65 %66
%68 = OpIAdd %v2uint %63 %67
%56 = OpBitcast %v2int %68
%69 = OpBitcast %int %50
%70 = OpCompositeConstruct %v2int %69 %int_0
%71 = OpISub %v2int %56 %70
%65 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
%66 = OpLoad %uint %65
%68 = OpISub %uint %66 %uint_1
%69 = OpUDiv %uint %68 %uint_2
%73 = OpLoad %10 %inputTex
%70 = OpImageQuerySizeLod %v2int %73 %int_0
%77 = OpLoad %v3uint %tint_symbol
%78 = OpVectorShuffle %v2uint %77 %77 0 1
%79 = OpAccessChain %_ptr_Uniform_uint %params %uint_1
%80 = OpLoad %uint %79
%81 = OpCompositeConstruct %v2uint %80 %uint_4
%82 = OpIMul %v2uint %78 %81
%83 = OpLoad %v3uint %tint_symbol_1
%84 = OpVectorShuffle %v2uint %83 %83 0 1
%86 = OpIMul %v2uint %84 %85
%87 = OpIAdd %v2uint %82 %86
%75 = OpBitcast %v2int %87
%88 = OpBitcast %int %69
%89 = OpCompositeConstruct %v2int %88 %int_0
%90 = OpISub %v2int %75 %89
OpStore %r %uint_0
OpBranch %75
%75 = OpLabel
OpLoopMerge %76 %77 None
OpBranch %78
%78 = OpLabel
%80 = OpLoad %uint %r
%81 = OpULessThan %bool %80 %uint_4
%79 = OpLogicalNot %bool %81
OpSelectionMerge %82 None
OpBranchConditional %79 %83 %82
%83 = OpLabel
OpBranch %76
%82 = OpLabel
OpStore %c %uint_0
OpBranch %85
%85 = OpLabel
OpLoopMerge %86 %87 None
OpBranch %88
%88 = OpLabel
%90 = OpLoad %uint %c
%91 = OpULessThan %bool %90 %uint_4
%89 = OpLogicalNot %bool %91
OpSelectionMerge %92 None
OpBranchConditional %89 %93 %92
%93 = OpLabel
OpBranch %86
OpBranch %92
%92 = OpLabel
%95 = OpLoad %uint %c
%94 = OpBitcast %int %95
OpLoopMerge %93 %94 None
OpBranch %95
%95 = OpLabel
%97 = OpLoad %uint %r
%96 = OpBitcast %int %97
%98 = OpCompositeConstruct %v2int %94 %96
%99 = OpIAdd %v2int %71 %98
OpStore %loadIndex %99
%103 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
%104 = OpLoad %uint %103
%105 = OpINotEqual %bool %104 %uint_0
OpSelectionMerge %106 None
OpBranchConditional %105 %107 %106
%107 = OpLabel
%108 = OpLoad %v2int %loadIndex
%109 = OpVectorShuffle %v2int %108 %108 1 0
OpStore %loadIndex %109
OpBranch %106
%106 = OpLabel
%110 = OpLoad %uint %r
%111 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
%112 = OpLoad %uint %111
%113 = OpIMul %uint %uint_4 %112
%114 = OpLoad %uint %c
%115 = OpIAdd %uint %113 %114
%117 = OpAccessChain %_ptr_Workgroup_v3float %tile %110 %115
%120 = OpLoad %3 %samp
%121 = OpLoad %10 %inputTex
%123 = OpSampledImage %122 %121 %120
%126 = OpLoad %v2int %loadIndex
%124 = OpConvertSToF %v2float %126
%129 = OpFAdd %v2float %124 %128
%130 = OpConvertSToF %v2float %51
%131 = OpFDiv %v2float %129 %130
%118 = OpImageSampleExplicitLod %v4float %123 %131 Lod %float_0
%133 = OpVectorShuffle %v3float %118 %118 0 1 2
OpStore %117 %133
OpBranch %87
%87 = OpLabel
%134 = OpLoad %uint %c
%135 = OpIAdd %uint %134 %uint_1
OpStore %c %135
OpBranch %85
%86 = OpLabel
OpBranch %77
%77 = OpLabel
%136 = OpLoad %uint %r
%137 = OpIAdd %uint %136 %uint_1
OpStore %r %137
OpBranch %75
%76 = OpLabel
%98 = OpULessThan %bool %97 %uint_4
%96 = OpLogicalNot %bool %98
OpSelectionMerge %99 None
OpBranchConditional %96 %100 %99
%100 = OpLabel
OpBranch %93
%99 = OpLabel
OpStore %c %uint_0
OpBranch %102
%102 = OpLabel
OpLoopMerge %103 %104 None
OpBranch %105
%105 = OpLabel
%107 = OpLoad %uint %c
%108 = OpULessThan %bool %107 %uint_4
%106 = OpLogicalNot %bool %108
OpSelectionMerge %109 None
OpBranchConditional %106 %110 %109
%110 = OpLabel
OpBranch %103
%109 = OpLabel
%112 = OpLoad %uint %c
%111 = OpBitcast %int %112
%114 = OpLoad %uint %r
%113 = OpBitcast %int %114
%115 = OpCompositeConstruct %v2int %111 %113
%116 = OpIAdd %v2int %90 %115
OpStore %loadIndex %116
%120 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
%121 = OpLoad %uint %120
%122 = OpINotEqual %bool %121 %uint_0
OpSelectionMerge %123 None
OpBranchConditional %122 %124 %123
%124 = OpLabel
%125 = OpLoad %v2int %loadIndex
%126 = OpVectorShuffle %v2int %125 %125 1 0
OpStore %loadIndex %126
OpBranch %123
%123 = OpLabel
%127 = OpLoad %uint %r
%128 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
%129 = OpLoad %uint %128
%130 = OpIMul %uint %uint_4 %129
%131 = OpLoad %uint %c
%132 = OpIAdd %uint %130 %131
%133 = OpAccessChain %_ptr_Workgroup_v3float %tile %127 %132
%136 = OpLoad %3 %samp
%137 = OpLoad %10 %inputTex
%139 = OpSampledImage %138 %137 %136
%142 = OpLoad %v2int %loadIndex
%140 = OpConvertSToF %v2float %142
%145 = OpFAdd %v2float %140 %144
%146 = OpConvertSToF %v2float %70
%147 = OpFDiv %v2float %145 %146
%134 = OpImageSampleExplicitLod %v4float %139 %147 Lod %float_0
%149 = OpVectorShuffle %v3float %134 %134 0 1 2
OpStore %133 %149
OpBranch %104
%104 = OpLabel
%150 = OpLoad %uint %c
%151 = OpIAdd %uint %150 %uint_1
OpStore %c %151
OpBranch %102
%103 = OpLabel
OpBranch %94
%94 = OpLabel
%152 = OpLoad %uint %r
%153 = OpIAdd %uint %152 %uint_1
OpStore %r %153
OpBranch %92
%93 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
OpStore %r_0 %uint_0
OpBranch %140
%140 = OpLabel
OpLoopMerge %141 %142 None
OpBranch %143
%143 = OpLabel
%145 = OpLoad %uint %r_0
%146 = OpULessThan %bool %145 %uint_4
%144 = OpLogicalNot %bool %146
OpSelectionMerge %147 None
OpBranchConditional %144 %148 %147
%148 = OpLabel
OpBranch %141
%147 = OpLabel
OpBranch %156
%156 = OpLabel
OpLoopMerge %157 %158 None
OpBranch %159
%159 = OpLabel
%161 = OpLoad %uint %r_0
%162 = OpULessThan %bool %161 %uint_4
%160 = OpLogicalNot %bool %162
OpSelectionMerge %163 None
OpBranchConditional %160 %164 %163
%164 = OpLabel
OpBranch %157
%163 = OpLabel
OpStore %c_0 %uint_0
OpBranch %150
%150 = OpLabel
OpLoopMerge %151 %152 None
OpBranch %153
%153 = OpLabel
%155 = OpLoad %uint %c_0
%156 = OpULessThan %bool %155 %uint_4
%154 = OpLogicalNot %bool %156
OpSelectionMerge %157 None
OpBranchConditional %154 %158 %157
%158 = OpLabel
OpBranch %151
%157 = OpLabel
%160 = OpLoad %uint %c_0
%159 = OpBitcast %int %160
%162 = OpLoad %uint %r_0
%161 = OpBitcast %int %162
%163 = OpCompositeConstruct %v2int %159 %161
%164 = OpIAdd %v2int %71 %163
OpStore %writeIndex %164
%166 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
%167 = OpLoad %uint %166
%168 = OpINotEqual %bool %167 %uint_0
OpSelectionMerge %169 None
OpBranchConditional %168 %170 %169
%170 = OpLabel
%171 = OpLoad %v2int %writeIndex
%172 = OpVectorShuffle %v2int %171 %171 1 0
OpStore %writeIndex %172
OpBranch %166
%166 = OpLabel
OpLoopMerge %167 %168 None
OpBranch %169
%169 = OpLabel
%173 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
%174 = OpLoad %uint %173
%175 = OpIMul %uint %uint_4 %174
%171 = OpLoad %uint %c_0
%172 = OpULessThan %bool %171 %uint_4
%170 = OpLogicalNot %bool %172
OpSelectionMerge %173 None
OpBranchConditional %170 %174 %173
%174 = OpLabel
OpBranch %167
%173 = OpLabel
%176 = OpLoad %uint %c_0
%177 = OpIAdd %uint %175 %176
%178 = OpUGreaterThanEqual %bool %177 %50
OpSelectionMerge %179 None
OpBranchConditional %178 %180 %179
%180 = OpLabel
%181 = OpISub %uint %uint_256 %50
%182 = OpULessThan %bool %177 %181
OpBranch %179
%179 = OpLabel
%183 = OpPhi %bool %178 %169 %182 %180
OpSelectionMerge %184 None
OpBranchConditional %183 %185 %184
%185 = OpLabel
%175 = OpBitcast %int %176
%178 = OpLoad %uint %r_0
%177 = OpBitcast %int %178
%179 = OpCompositeConstruct %v2int %175 %177
%180 = OpIAdd %v2int %90 %179
OpStore %writeIndex %180
%182 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
%183 = OpLoad %uint %182
%184 = OpINotEqual %bool %183 %uint_0
OpSelectionMerge %185 None
OpBranchConditional %184 %186 %185
%186 = OpLabel
%187 = OpLoad %v2int %writeIndex
%188 = OpSLessThan %v2bool %187 %51
%186 = OpAll %bool %188
OpBranch %184
%184 = OpLabel
%190 = OpPhi %bool %183 %179 %186 %185
OpSelectionMerge %191 None
OpBranchConditional %190 %192 %191
%192 = OpLabel
OpStore %acc %193
OpStore %f %uint_0
OpBranch %198
%198 = OpLabel
OpLoopMerge %199 %200 None
OpBranch %201
%188 = OpVectorShuffle %v2int %187 %187 1 0
OpStore %writeIndex %188
OpBranch %185
%185 = OpLabel
%189 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
%190 = OpLoad %uint %189
%191 = OpIMul %uint %uint_4 %190
%192 = OpLoad %uint %c_0
%193 = OpIAdd %uint %191 %192
%194 = OpUGreaterThanEqual %bool %193 %69
OpSelectionMerge %195 None
OpBranchConditional %194 %196 %195
%196 = OpLabel
%197 = OpISub %uint %uint_256 %69
%198 = OpULessThan %bool %193 %197
OpBranch %195
%195 = OpLabel
%199 = OpPhi %bool %194 %185 %198 %196
OpSelectionMerge %200 None
OpBranchConditional %199 %201 %200
%201 = OpLabel
%203 = OpLoad %uint %f
%204 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
%205 = OpLoad %uint %204
%206 = OpULessThan %bool %203 %205
%202 = OpLogicalNot %bool %206
OpSelectionMerge %207 None
OpBranchConditional %202 %208 %207
%208 = OpLabel
OpBranch %199
%207 = OpLabel
%209 = OpLoad %uint %f
%210 = OpIAdd %uint %177 %209
%211 = OpISub %uint %210 %50
OpStore %i %211
%213 = OpLoad %v3float %acc
%216 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
%217 = OpLoad %uint %216
%215 = OpConvertUToF %float %217
%218 = OpFDiv %float %float_1 %215
%219 = OpLoad %uint %r_0
%220 = OpLoad %uint %i
%221 = OpAccessChain %_ptr_Workgroup_v3float %tile %219 %220
%222 = OpLoad %v3float %221
%223 = OpVectorTimesScalar %v3float %222 %218
%224 = OpFAdd %v3float %213 %223
OpStore %acc %224
%203 = OpLoad %v2int %writeIndex
%204 = OpSLessThan %v2bool %203 %70
%202 = OpAll %bool %204
OpBranch %200
%200 = OpLabel
%225 = OpLoad %uint %f
%226 = OpIAdd %uint %225 %uint_1
OpStore %f %226
OpBranch %198
%199 = OpLabel
%228 = OpLoad %14 %outputTex
%229 = OpLoad %v2int %writeIndex
%230 = OpLoad %v3float %acc
%231 = OpCompositeExtract %float %230 0
%232 = OpCompositeExtract %float %230 1
%233 = OpCompositeExtract %float %230 2
%234 = OpCompositeConstruct %v4float %231 %232 %233 %float_1
OpImageWrite %228 %229 %234
OpBranch %191
%191 = OpLabel
OpBranch %152
%152 = OpLabel
%235 = OpLoad %uint %c_0
%236 = OpIAdd %uint %235 %uint_1
OpStore %c_0 %236
OpBranch %150
%151 = OpLabel
OpBranch %142
%142 = OpLabel
%237 = OpLoad %uint %r_0
%238 = OpIAdd %uint %237 %uint_1
OpStore %r_0 %238
OpBranch %140
%141 = OpLabel
%206 = OpPhi %bool %199 %195 %202 %201
OpSelectionMerge %207 None
OpBranchConditional %206 %208 %207
%208 = OpLabel
OpStore %acc %209
OpStore %f %uint_0
OpBranch %213
%213 = OpLabel
OpLoopMerge %214 %215 None
OpBranch %216
%216 = OpLabel
%218 = OpLoad %uint %f
%219 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
%220 = OpLoad %uint %219
%221 = OpULessThan %bool %218 %220
%217 = OpLogicalNot %bool %221
OpSelectionMerge %222 None
OpBranchConditional %217 %223 %222
%223 = OpLabel
OpBranch %214
%222 = OpLabel
%224 = OpLoad %uint %f
%225 = OpIAdd %uint %193 %224
%226 = OpISub %uint %225 %69
OpStore %i %226
%228 = OpLoad %v3float %acc
%231 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
%232 = OpLoad %uint %231
%230 = OpConvertUToF %float %232
%233 = OpFDiv %float %float_1 %230
%234 = OpLoad %uint %r_0
%235 = OpLoad %uint %i
%236 = OpAccessChain %_ptr_Workgroup_v3float %tile %234 %235
%237 = OpLoad %v3float %236
%238 = OpVectorTimesScalar %v3float %237 %233
%239 = OpFAdd %v3float %228 %238
OpStore %acc %239
OpBranch %215
%215 = OpLabel
%240 = OpLoad %uint %f
%241 = OpIAdd %uint %240 %uint_1
OpStore %f %241
OpBranch %213
%214 = OpLabel
%243 = OpLoad %14 %outputTex
%244 = OpLoad %v2int %writeIndex
%245 = OpLoad %v3float %acc
%246 = OpCompositeExtract %float %245 0
%247 = OpCompositeExtract %float %245 1
%248 = OpCompositeExtract %float %245 2
%249 = OpCompositeConstruct %v4float %246 %247 %248 %float_1
OpImageWrite %243 %244 %249
OpBranch %207
%207 = OpLabel
OpBranch %168
%168 = OpLabel
%250 = OpLoad %uint %c_0
%251 = OpIAdd %uint %250 %uint_1
OpStore %c_0 %251
OpBranch %166
%167 = OpLabel
OpBranch %158
%158 = OpLabel
%252 = OpLoad %uint %r_0
%253 = OpIAdd %uint %252 %uint_1
OpStore %r_0 %253
OpBranch %156
%157 = OpLabel
OpReturn
OpFunctionEnd

View File

@ -350,21 +350,16 @@ void main(tint_symbol_1 tint_symbol) {
const uint3 gl_LocalInvocationID_param = tint_symbol.gl_LocalInvocationID_param;
const uint3 gl_GlobalInvocationID_param = tint_symbol.gl_GlobalInvocationID_param;
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
for(int i = 0; (i < 64); i = (i + 1)) {
{
for(int i_1 = 0; (i_1 < 64); i_1 = (i_1 + 1)) {
mm_Asub[i][i_1] = 0.0f;
}
}
}
}
{
for(int i_2 = 0; (i_2 < 64); i_2 = (i_2 + 1)) {
const float tint_symbol_6[1] = (float[1])0;
mm_Bsub[i_2] = tint_symbol_6;
}
{
const uint i_1 = local_invocation_index;
const uint i_2 = (local_invocation_index % 1u);
mm_Bsub[i_1][i_2] = 0.0f;
}
{
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 64u)) {
const uint i = (idx / 64u);
const uint i_1 = (idx % 64u);
mm_Asub[i][i_1] = 0.0f;
}
}
GroupMemoryBarrierWithGroupSync();

View File

@ -53,7 +53,7 @@ bool coordsInBounds_vi2_vi2_(thread int2* const coord, thread int2* const shape)
return x_88;
}
float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread int* const row, thread int* const col, thread int* const tint_symbol_5, thread int* const tint_symbol_6, thread int* const tint_symbol_7) {
float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread int* const row, thread int* const col, thread int* const tint_symbol_3, thread int* const tint_symbol_4, thread int* const tint_symbol_5) {
int batchASize = 0;
int2 param_10 = 0;
int2 param_11 = 0;
@ -63,16 +63,16 @@ float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread
batchASize = (x_417 * x_419);
int const x_421 = *(row);
int const x_422 = *(col);
int const x_424 = *(tint_symbol_5);
int const x_425 = *(tint_symbol_6);
int const x_424 = *(tint_symbol_3);
int const x_425 = *(tint_symbol_4);
param_10 = int2(x_421, x_422);
param_11 = int2(x_424, x_425);
bool const x_429 = coordsInBounds_vi2_vi2_(&(param_10), &(param_11));
if (x_429) {
int const x_438 = *(tint_symbol_7);
int const x_438 = *(tint_symbol_5);
int const x_439 = batchASize;
int const x_441 = *(row);
int const x_442 = *(tint_symbol_6);
int const x_442 = *(tint_symbol_4);
int const x_445 = *(col);
float const x_448 = x_165.A[(((x_438 * x_439) + (x_441 * x_442)) + x_445)];
x_430 = x_448;
@ -83,7 +83,7 @@ float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread
return x_450;
}
float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread int* const row_1, thread int* const col_1, thread int* const tint_symbol_8, thread int* const tint_symbol_9, thread int* const tint_symbol_10) {
float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread int* const row_1, thread int* const col_1, thread int* const tint_symbol_6, thread int* const tint_symbol_7, thread int* const tint_symbol_8) {
int batchBSize = 0;
int2 param_12 = 0;
int2 param_13 = 0;
@ -93,16 +93,16 @@ float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread
batchBSize = (x_455 * x_457);
int const x_459 = *(row_1);
int const x_460 = *(col_1);
int const x_462 = *(tint_symbol_8);
int const x_463 = *(tint_symbol_9);
int const x_462 = *(tint_symbol_6);
int const x_463 = *(tint_symbol_7);
param_12 = int2(x_459, x_460);
param_13 = int2(x_462, x_463);
bool const x_467 = coordsInBounds_vi2_vi2_(&(param_12), &(param_13));
if (x_467) {
int const x_475 = *(tint_symbol_10);
int const x_475 = *(tint_symbol_8);
int const x_476 = batchBSize;
int const x_478 = *(row_1);
int const x_479 = *(tint_symbol_9);
int const x_479 = *(tint_symbol_7);
int const x_482 = *(col_1);
float const x_485 = x_185.B[(((x_475 * x_476) + (x_478 * x_479)) + x_482)];
x_468 = x_485;
@ -146,17 +146,17 @@ void setOutput_i1_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread
return;
}
void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int* const row_2, thread int* const col_2, thread float* const value_2, thread int* const tint_symbol_11) {
void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int* const row_2, thread int* const col_2, thread float* const value_2, thread int* const tint_symbol_9) {
int3 outCoord = 0;
int param_14 = 0;
int param_15 = 0;
int param_16 = 0;
float param_17 = 0.0f;
int const x_491 = *(tint_symbol_11);
int const x_491 = *(tint_symbol_9);
int const x_492 = *(row_2);
int const x_493 = *(col_2);
outCoord = int3(x_491, x_492, x_493);
int const x_496 = *(tint_symbol_11);
int const x_496 = *(tint_symbol_9);
param_14 = x_496;
int const x_498 = *(row_2);
param_15 = x_498;
@ -168,7 +168,7 @@ void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int
return;
}
void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const dimAOuter, thread int* const dimInner, thread int* const dimBOuter, thread uint3* const tint_symbol_12, thread uint3* const tint_symbol_13, thread int* const tint_symbol_14, thread int* const tint_symbol_15, thread int* const tint_symbol_16, threadgroup tint_array_wrapper* const tint_symbol_17, thread int* const tint_symbol_18, threadgroup tint_array_wrapper_2* const tint_symbol_19) {
void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const dimAOuter, thread int* const dimInner, thread int* const dimBOuter, thread uint3* const tint_symbol_10, thread uint3* const tint_symbol_11, thread int* const tint_symbol_12, thread int* const tint_symbol_13, thread int* const tint_symbol_14, threadgroup tint_array_wrapper* const tint_symbol_15, thread int* const tint_symbol_16, threadgroup tint_array_wrapper_2* const tint_symbol_17) {
int tileRow = 0;
int tileCol = 0;
int globalRow = 0;
@ -203,13 +203,13 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int param_7 = 0;
int param_8 = 0;
float param_9 = 0.0f;
uint const x_132 = (*(tint_symbol_12)).y;
uint const x_132 = (*(tint_symbol_10)).y;
tileRow = (as_type<int>(x_132) * 1);
uint const x_137 = (*(tint_symbol_12)).x;
uint const x_137 = (*(tint_symbol_10)).x;
tileCol = (as_type<int>(x_137) * 1);
uint const x_143 = (*(tint_symbol_13)).y;
uint const x_143 = (*(tint_symbol_11)).y;
globalRow = (as_type<int>(x_143) * 1);
uint const x_148 = (*(tint_symbol_13)).x;
uint const x_148 = (*(tint_symbol_11)).x;
globalCol = (as_type<int>(x_148) * 1);
int const x_152 = *(dimInner);
numTiles = (((x_152 - 1) / 64) + 1);
@ -240,9 +240,9 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
innerRow = (x_183 + 1);
}
}
uint const x_187 = (*(tint_symbol_12)).x;
uint const x_187 = (*(tint_symbol_10)).x;
tileColA = (as_type<int>(x_187) * 64);
uint const x_192 = (*(tint_symbol_12)).y;
uint const x_192 = (*(tint_symbol_10)).y;
tileRowB = (as_type<int>(x_192) * 1);
t = 0;
while (true) {
@ -280,8 +280,8 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int const x_240 = inputCol;
param_3 = (x_235 + x_236);
param_4 = ((x_238 * 64) + x_240);
float const x_244 = mm_readA_i1_i1_(x_48, x_165, &(param_3), &(param_4), tint_symbol_14, tint_symbol_15, tint_symbol_16);
(*(tint_symbol_17)).arr[x_233].arr[x_234] = x_244;
float const x_244 = mm_readA_i1_i1_(x_48, x_165, &(param_3), &(param_4), tint_symbol_12, tint_symbol_13, tint_symbol_14);
(*(tint_symbol_15)).arr[x_233].arr[x_234] = x_244;
{
int const x_247 = innerCol_1;
innerCol_1 = (x_247 + 1);
@ -320,8 +320,8 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int const x_285 = innerCol_2;
param_5 = ((x_280 * 64) + x_282);
param_6 = (x_284 + x_285);
float const x_289 = mm_readB_i1_i1_(x_48, x_185, &(param_5), &(param_6), tint_symbol_15, tint_symbol_18, tint_symbol_16);
(*(tint_symbol_19)).arr[x_278].arr[x_279] = x_289;
float const x_289 = mm_readB_i1_i1_(x_48, x_185, &(param_5), &(param_6), tint_symbol_13, tint_symbol_16, tint_symbol_14);
(*(tint_symbol_17)).arr[x_278].arr[x_279] = x_289;
{
int const x_291 = innerCol_2;
innerCol_2 = (x_291 + 1);
@ -351,7 +351,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int const x_315 = k;
int const x_316 = tileCol;
int const x_317 = inner;
float const x_320 = (*(tint_symbol_19)).arr[x_315].arr[(x_316 + x_317)];
float const x_320 = (*(tint_symbol_17)).arr[x_315].arr[(x_316 + x_317)];
BCached.arr[x_314] = x_320;
{
int const x_322 = inner;
@ -368,7 +368,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
int const x_333 = tileRow;
int const x_334 = innerRow_3;
int const x_336 = k;
float const x_338 = (*(tint_symbol_17)).arr[(x_333 + x_334)].arr[x_336];
float const x_338 = (*(tint_symbol_15)).arr[(x_333 + x_334)].arr[x_336];
ACached = x_338;
innerCol_3 = 0;
while (true) {
@ -445,7 +445,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
param_8 = (x_400 + x_401);
float const x_409 = acc.arr[x_403].arr[x_404];
param_9 = x_409;
mm_write_i1_i1_f1_(x_48, x_54, &(param_7), &(param_8), &(param_9), tint_symbol_16);
mm_write_i1_i1_f1_(x_48, x_54, &(param_7), &(param_8), &(param_9), tint_symbol_14);
}
{
int const x_411 = innerCol_4;
@ -460,47 +460,51 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
return;
}
void main_1(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const tint_symbol_20, thread int* const tint_symbol_21, thread int* const tint_symbol_22, thread uint3* const tint_symbol_23, thread int* const tint_symbol_24, thread uint3* const tint_symbol_25, threadgroup tint_array_wrapper* const tint_symbol_26, threadgroup tint_array_wrapper_2* const tint_symbol_27) {
void main_1(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const tint_symbol_18, thread int* const tint_symbol_19, thread int* const tint_symbol_20, thread uint3* const tint_symbol_21, thread int* const tint_symbol_22, thread uint3* const tint_symbol_23, threadgroup tint_array_wrapper* const tint_symbol_24, threadgroup tint_array_wrapper_2* const tint_symbol_25) {
int param_18 = 0;
int param_19 = 0;
int param_20 = 0;
int const x_67 = x_48.aShape.y;
*(tint_symbol_20) = x_67;
*(tint_symbol_18) = x_67;
int const x_71 = x_48.aShape.z;
*(tint_symbol_21) = x_71;
*(tint_symbol_19) = x_71;
int const x_75 = x_48.bShape.z;
*(tint_symbol_22) = x_75;
uint const x_505 = (*(tint_symbol_23)).z;
*(tint_symbol_24) = as_type<int>(x_505);
int const x_508 = *(tint_symbol_20);
*(tint_symbol_20) = x_75;
uint const x_505 = (*(tint_symbol_21)).z;
*(tint_symbol_22) = as_type<int>(x_505);
int const x_508 = *(tint_symbol_18);
param_18 = x_508;
int const x_510 = *(tint_symbol_21);
int const x_510 = *(tint_symbol_19);
param_19 = x_510;
int const x_512 = *(tint_symbol_22);
int const x_512 = *(tint_symbol_20);
param_20 = x_512;
mm_matMul_i1_i1_i1_(x_48, x_165, x_185, x_54, &(param_18), &(param_19), &(param_20), tint_symbol_25, tint_symbol_23, tint_symbol_20, tint_symbol_21, tint_symbol_24, tint_symbol_26, tint_symbol_22, tint_symbol_27);
mm_matMul_i1_i1_i1_(x_48, x_165, x_185, x_54, &(param_18), &(param_19), &(param_20), tint_symbol_23, tint_symbol_21, tint_symbol_18, tint_symbol_19, tint_symbol_22, tint_symbol_24, tint_symbol_20, tint_symbol_25);
return;
}
kernel void tint_symbol_1(uint3 gl_LocalInvocationID_param [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID_param [[thread_position_in_grid]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Uniforms& x_48 [[buffer(3)]], const device ssbA& x_165 [[buffer(1)]], const device ssbB& x_185 [[buffer(2)]], device ssbOut& x_54 [[buffer(0)]]) {
threadgroup tint_array_wrapper tint_symbol_28;
threadgroup tint_array_wrapper_2 tint_symbol_29;
thread uint3 tint_symbol_30 = 0u;
thread uint3 tint_symbol_31 = 0u;
threadgroup tint_array_wrapper_2 tint_symbol_26;
threadgroup tint_array_wrapper tint_symbol_27;
thread uint3 tint_symbol_28 = 0u;
thread uint3 tint_symbol_29 = 0u;
thread int tint_symbol_30 = 0;
thread int tint_symbol_31 = 0;
thread int tint_symbol_32 = 0;
thread int tint_symbol_33 = 0;
thread int tint_symbol_34 = 0;
thread int tint_symbol_35 = 0;
if ((local_invocation_index == 0u)) {
tint_array_wrapper const tint_symbol_3 = {.arr={}};
tint_symbol_28 = tint_symbol_3;
tint_array_wrapper_2 const tint_symbol_4 = {.arr={}};
tint_symbol_29 = tint_symbol_4;
{
uint const i_1 = local_invocation_index;
uint const i_2 = (local_invocation_index % 1u);
tint_symbol_26.arr[i_1].arr[i_2] = float();
}
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 64u)) {
uint const i = (idx / 64u);
uint const i_1 = (idx % 64u);
tint_symbol_27.arr[i].arr[i_1] = float();
}
threadgroup_barrier(mem_flags::mem_threadgroup);
tint_symbol_30 = gl_LocalInvocationID_param;
tint_symbol_31 = gl_GlobalInvocationID_param;
main_1(x_48, x_165, x_185, x_54, &(tint_symbol_32), &(tint_symbol_33), &(tint_symbol_34), &(tint_symbol_31), &(tint_symbol_35), &(tint_symbol_30), &(tint_symbol_28), &(tint_symbol_29));
tint_symbol_28 = gl_LocalInvocationID_param;
tint_symbol_29 = gl_GlobalInvocationID_param;
main_1(x_48, x_165, x_185, x_54, &(tint_symbol_30), &(tint_symbol_31), &(tint_symbol_32), &(tint_symbol_29), &(tint_symbol_33), &(tint_symbol_28), &(tint_symbol_27), &(tint_symbol_26));
return;
}

View File

@ -1,7 +1,7 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 609
; Bound: 628
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
@ -121,6 +121,7 @@
OpName %param_19 "param_19"
OpName %param_20 "param_20"
OpName %main "main"
OpName %idx "idx"
OpDecorate %Uniforms Block
OpMemberDecorate %Uniforms 0 Offset 0
OpMemberDecorate %Uniforms 1 Offset 16
@ -236,8 +237,9 @@
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%uint_264 = OpConstant %uint 264
%575 = OpTypeFunction %void
%603 = OpConstantNull %_arr__arr_float_uint_64_uint_64
%604 = OpConstantNull %_arr__arr_float_uint_1_uint_64
%_ptr_Function_uint = OpTypePointer Function %uint
%606 = OpConstantNull %uint
%uint_4096 = OpConstant %uint 4096
%coordsInBounds_vi2_vi2_ = OpFunction %bool None %45
%coord = OpFunctionParameter %_ptr_Function_v2int
%shape = OpFunctionParameter %_ptr_Function_v2int
@ -941,20 +943,45 @@
OpFunctionEnd
%main = OpFunction %void None %575
%598 = OpLabel
%idx = OpVariable %_ptr_Function_uint Function %606
%599 = OpLoad %uint %tint_symbol_2
%600 = OpIEqual %bool %599 %uint_0
OpSelectionMerge %601 None
OpBranchConditional %600 %602 %601
%602 = OpLabel
OpStore %mm_Asub %603
OpStore %mm_Bsub %604
OpBranch %601
%601 = OpLabel
%600 = OpLoad %uint %tint_symbol_2
%601 = OpUMod %uint %600 %uint_1
%602 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %599 %601
OpStore %602 %85
%603 = OpLoad %uint %tint_symbol_2
OpStore %idx %603
OpBranch %607
%607 = OpLabel
OpLoopMerge %608 %609 None
OpBranch %610
%610 = OpLabel
%612 = OpLoad %uint %idx
%614 = OpULessThan %bool %612 %uint_4096
%611 = OpLogicalNot %bool %614
OpSelectionMerge %615 None
OpBranchConditional %611 %616 %615
%616 = OpLabel
OpBranch %608
%615 = OpLabel
%617 = OpLoad %uint %idx
%618 = OpUDiv %uint %617 %uint_64
%619 = OpLoad %uint %idx
%620 = OpUMod %uint %619 %uint_64
%621 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %618 %620
OpStore %621 %85
OpBranch %609
%609 = OpLabel
%622 = OpLoad %uint %idx
%623 = OpIAdd %uint %622 %uint_64
OpStore %idx %623
OpBranch %607
%608 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%606 = OpLoad %v3uint %tint_symbol
OpStore %gl_LocalInvocationID %606
%607 = OpLoad %v3uint %tint_symbol_1
OpStore %gl_GlobalInvocationID %607
%608 = OpFunctionCall %void %main_1
%625 = OpLoad %v3uint %tint_symbol
OpStore %gl_LocalInvocationID %625
%626 = OpLoad %v3uint %tint_symbol_1
OpStore %gl_GlobalInvocationID %626
%627 = OpFunctionCall %void %main_1
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicAdd_794055(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 31
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicAdd_794055 = OpFunction %void None %7
%10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicAdd_794055
%25 = OpFunctionCall %void %atomicAdd_794055
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicAdd_d5db1d(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicAdd_d5db1d = OpFunction %void None %6
%9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicAdd_d5db1d
%24 = OpFunctionCall %void %atomicAdd_d5db1d
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicAnd_34edd3(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicAnd_34edd3 = OpFunction %void None %6
%9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicAnd_34edd3
%24 = OpFunctionCall %void %atomicAnd_34edd3
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicAnd_45a819(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 31
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicAnd_45a819 = OpFunction %void None %7
%10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicAnd_45a819
%25 = OpFunctionCall %void %atomicAnd_45a819
OpReturn
OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -15,7 +15,7 @@ void atomicCompareExchangeWeak_89ea3b(threadgroup atomic_int* const tint_symbol_
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 37
; Bound: 33
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -29,7 +29,7 @@
%int_0 = OpConstant %int 0
%_ptr_Function_v2int = OpTypePointer Function %v2int
%24 = OpConstantNull %v2int
%33 = OpConstantNull %int
%29 = OpConstantNull %int
%uint_264 = OpConstant %uint 264
%atomicCompareExchangeWeak_89ea3b = OpFunction %void None %7
%10 = OpLabel
@ -43,15 +43,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%26 = OpLabel
%27 = OpLoad %uint %tint_symbol
%28 = OpIEqual %bool %27 %uint_0
OpSelectionMerge %29 None
OpBranchConditional %28 %30 %29
%30 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %33
OpBranch %29
%29 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %29
OpControlBarrier %uint_2 %uint_2 %uint_264
%36 = OpFunctionCall %void %atomicCompareExchangeWeak_89ea3b
%32 = OpFunctionCall %void %atomicCompareExchangeWeak_89ea3b
OpReturn
OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -15,7 +15,7 @@ void atomicCompareExchangeWeak_b2ab2c(threadgroup atomic_uint* const tint_symbol
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 35
; Bound: 31
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -27,7 +27,7 @@
%bool = OpTypeBool
%_ptr_Function_v2uint = OpTypePointer Function %v2uint
%22 = OpConstantNull %v2uint
%31 = OpConstantNull %uint
%27 = OpConstantNull %uint
%uint_264 = OpConstant %uint 264
%atomicCompareExchangeWeak_b2ab2c = OpFunction %void None %6
%9 = OpLabel
@ -41,15 +41,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%24 = OpLabel
%25 = OpLoad %uint %tint_symbol
%26 = OpIEqual %bool %25 %uint_0
OpSelectionMerge %27 None
OpBranchConditional %26 %28 %27
%28 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %31
OpBranch %27
%27 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %27
OpControlBarrier %uint_2 %uint_2 %uint_264
%34 = OpFunctionCall %void %atomicCompareExchangeWeak_b2ab2c
%30 = OpFunctionCall %void %atomicCompareExchangeWeak_b2ab2c
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicExchange_0a5dca(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicExchange_0a5dca = OpFunction %void None %6
%9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicExchange_0a5dca
%24 = OpFunctionCall %void %atomicExchange_0a5dca
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicExchange_e114ba(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 31
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicExchange_e114ba = OpFunction %void None %7
%10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicExchange_e114ba
%25 = OpFunctionCall %void %atomicExchange_e114ba
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicLoad_361bf1(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 29
; Bound: 24
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -24,7 +24,6 @@
%uint_0 = OpConstant %uint 0
%_ptr_Function_uint = OpTypePointer Function %uint
%16 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicLoad_361bf1 = OpFunction %void None %6
%9 = OpLabel
@ -35,15 +34,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%18 = OpLabel
%19 = OpLoad %uint %tint_symbol
%20 = OpIEqual %bool %19 %uint_0
OpSelectionMerge %22 None
OpBranchConditional %20 %23 %22
%23 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %16
OpBranch %22
%22 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%28 = OpFunctionCall %void %atomicLoad_361bf1
%23 = OpFunctionCall %void %atomicLoad_361bf1
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicLoad_afcc03(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_0 = OpConstant %uint 0
%_ptr_Function_int = OpTypePointer Function %int
%17 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicLoad_afcc03 = OpFunction %void None %7
%10 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicLoad_afcc03
%24 = OpFunctionCall %void %atomicLoad_afcc03
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicMax_a89cc3(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 31
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicMax_a89cc3 = OpFunction %void None %7
%10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicMax_a89cc3
%25 = OpFunctionCall %void %atomicMax_a89cc3
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicMax_beccfc(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicMax_beccfc = OpFunction %void None %6
%9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicMax_beccfc
%24 = OpFunctionCall %void %atomicMax_beccfc
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicMin_278235(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 31
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicMin_278235 = OpFunction %void None %7
%10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicMin_278235
%25 = OpFunctionCall %void %atomicMin_278235
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicMin_69d383(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicMin_69d383 = OpFunction %void None %6
%9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicMin_69d383
%24 = OpFunctionCall %void %atomicMin_69d383
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicOr_5e3d61(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicOr_5e3d61 = OpFunction %void None %6
%9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicOr_5e3d61
%24 = OpFunctionCall %void %atomicOr_5e3d61
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicOr_d09248(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 31
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicOr_d09248 = OpFunction %void None %7
%10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicOr_d09248
%25 = OpFunctionCall %void %atomicOr_d09248
OpReturn
OpFunctionEnd

View File

@ -12,7 +12,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicStore_726882(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 28
; Bound: 23
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -22,8 +22,7 @@
%uint_2 = OpConstant %uint 2
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%bool = OpTypeBool
%24 = OpConstantNull %uint
%19 = OpConstantNull %uint
%uint_264 = OpConstant %uint 264
%atomicStore_726882 = OpFunction %void None %6
%9 = OpLabel
@ -32,15 +31,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%16 = OpLabel
%17 = OpLoad %uint %tint_symbol
%18 = OpIEqual %bool %17 %uint_0
OpSelectionMerge %20 None
OpBranchConditional %18 %21 %20
%21 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %24
OpBranch %20
%20 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %19
OpControlBarrier %uint_2 %uint_2 %uint_264
%27 = OpFunctionCall %void %atomicStore_726882
%22 = OpFunctionCall %void %atomicStore_726882
OpReturn
OpFunctionEnd

View File

@ -12,7 +12,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicStore_8bea94(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 29
; Bound: 24
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -23,8 +23,7 @@
%uint_2 = OpConstant %uint 2
%uint_0 = OpConstant %uint 0
%int_1 = OpConstant %int 1
%bool = OpTypeBool
%25 = OpConstantNull %int
%20 = OpConstantNull %int
%uint_264 = OpConstant %uint 264
%atomicStore_8bea94 = OpFunction %void None %7
%10 = OpLabel
@ -33,15 +32,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%17 = OpLabel
%18 = OpLoad %uint %tint_symbol
%19 = OpIEqual %bool %18 %uint_0
OpSelectionMerge %21 None
OpBranchConditional %19 %22 %21
%22 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %25
OpBranch %21
%21 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %20
OpControlBarrier %uint_2 %uint_2 %uint_264
%28 = OpFunctionCall %void %atomicStore_8bea94
%23 = OpFunctionCall %void %atomicStore_8bea94
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
int atomic_result_1 = 0;
InterlockedExchange(arg_0, 0, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicXor_75dc95(threadgroup atomic_int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 31
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -26,7 +26,6 @@
%int_1 = OpConstant %int 1
%_ptr_Function_int = OpTypePointer Function %int
%18 = OpConstantNull %int
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicXor_75dc95 = OpFunction %void None %7
%10 = OpLabel
@ -37,15 +36,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %7
%20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%30 = OpFunctionCall %void %atomicXor_75dc95
%25 = OpFunctionCall %void %atomicXor_75dc95
OpReturn
OpFunctionEnd

View File

@ -13,7 +13,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
uint atomic_result_1 = 0u;
InterlockedExchange(arg_0, 0u, atomic_result_1);
}

View File

@ -7,7 +7,7 @@ void atomicXor_c8e6be(threadgroup atomic_uint* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup atomic_uint tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_0 "arg_0"
OpName %tint_symbol "tint_symbol"
@ -25,7 +25,6 @@
%uint_1 = OpConstant %uint 1
%_ptr_Function_uint = OpTypePointer Function %uint
%17 = OpConstantNull %uint
%bool = OpTypeBool
%uint_264 = OpConstant %uint 264
%atomicXor_c8e6be = OpFunction %void None %6
%9 = OpLabel
@ -36,15 +35,8 @@
OpFunctionEnd
%compute_main = OpFunction %void None %6
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%21 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %23 None
OpBranchConditional %21 %24 %23
%24 = OpLabel
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
OpBranch %23
%23 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %atomicXor_c8e6be
%24 = OpFunctionCall %void %atomicXor_c8e6be
OpReturn
OpFunctionEnd

View File

@ -22,7 +22,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
arg_1 = 0;
}
GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void frexp_0da285(threadgroup int* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup int tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
tint_symbol_2 = int();
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 32
; Bound: 26
; Schema: 0
OpCapability Shader
%13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol"
@ -30,9 +30,7 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
%float_1 = OpConstant %float 1
%_ptr_Function_float = OpTypePointer Function %float
%18 = OpConstantNull %float
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%27 = OpConstantNull %int
%21 = OpConstantNull %int
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%frexp_0da285 = OpFunction %void None %7
@ -44,15 +42,8 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
OpFunctionEnd
%compute_main = OpFunction %void None %7
%20 = OpLabel
%21 = OpLoad %uint %tint_symbol
%23 = OpIEqual %bool %21 %uint_0
OpSelectionMerge %25 None
OpBranchConditional %23 %26 %25
%26 = OpLabel
OpStore %arg_1 %27
OpBranch %25
%25 = OpLabel
OpStore %arg_1 %21
OpControlBarrier %uint_2 %uint_2 %uint_264
%31 = OpFunctionCall %void %frexp_0da285
%25 = OpFunctionCall %void %frexp_0da285
OpReturn
OpFunctionEnd

View File

@ -22,7 +22,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
arg_1 = int3(0, 0, 0);
}
GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void frexp_40fc9b(threadgroup int3* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup int3 tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
tint_symbol_2 = int3();
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 33
; Bound: 27
; Schema: 0
OpCapability Shader
%15 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol"
@ -31,9 +31,7 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
%v3float = OpTypeVector %float 3
%16 = OpConstantNull %v3float
%_ptr_Function_v3float = OpTypePointer Function %v3float
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%28 = OpConstantNull %v3int
%22 = OpConstantNull %v3int
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%frexp_40fc9b = OpFunction %void None %8
@ -45,15 +43,8 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd
%compute_main = OpFunction %void None %8
%21 = OpLabel
%22 = OpLoad %uint %tint_symbol
%24 = OpIEqual %bool %22 %uint_0
OpSelectionMerge %26 None
OpBranchConditional %24 %27 %26
%27 = OpLabel
OpStore %arg_1 %28
OpBranch %26
%26 = OpLabel
OpStore %arg_1 %22
OpControlBarrier %uint_2 %uint_2 %uint_264
%32 = OpFunctionCall %void %frexp_40fc9b
%26 = OpFunctionCall %void %frexp_40fc9b
OpReturn
OpFunctionEnd

View File

@ -22,7 +22,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
arg_1 = int2(0, 0);
}
GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void frexp_a3f940(threadgroup int2* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup int2 tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
tint_symbol_2 = int2();
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 33
; Bound: 27
; Schema: 0
OpCapability Shader
%15 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol"
@ -31,9 +31,7 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
%v2float = OpTypeVector %float 2
%16 = OpConstantNull %v2float
%_ptr_Function_v2float = OpTypePointer Function %v2float
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%28 = OpConstantNull %v2int
%22 = OpConstantNull %v2int
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%frexp_a3f940 = OpFunction %void None %8
@ -45,15 +43,8 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd
%compute_main = OpFunction %void None %8
%21 = OpLabel
%22 = OpLoad %uint %tint_symbol
%24 = OpIEqual %bool %22 %uint_0
OpSelectionMerge %26 None
OpBranchConditional %24 %27 %26
%27 = OpLabel
OpStore %arg_1 %28
OpBranch %26
%26 = OpLabel
OpStore %arg_1 %22
OpControlBarrier %uint_2 %uint_2 %uint_264
%32 = OpFunctionCall %void %frexp_a3f940
%26 = OpFunctionCall %void %frexp_a3f940
OpReturn
OpFunctionEnd

View File

@ -22,7 +22,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
arg_1 = int4(0, 0, 0, 0);
}
GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void frexp_b87f4e(threadgroup int4* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup int4 tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
tint_symbol_2 = int4();
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 33
; Bound: 27
; Schema: 0
OpCapability Shader
%15 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol"
@ -31,9 +31,7 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
%v4float = OpTypeVector %float 4
%16 = OpConstantNull %v4float
%_ptr_Function_v4float = OpTypePointer Function %v4float
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%28 = OpConstantNull %v4int
%22 = OpConstantNull %v4int
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%frexp_b87f4e = OpFunction %void None %8
@ -45,15 +43,8 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd
%compute_main = OpFunction %void None %8
%21 = OpLabel
%22 = OpLoad %uint %tint_symbol
%24 = OpIEqual %bool %22 %uint_0
OpSelectionMerge %26 None
OpBranchConditional %24 %27 %26
%27 = OpLabel
OpStore %arg_1 %28
OpBranch %26
%26 = OpLabel
OpStore %arg_1 %22
OpControlBarrier %uint_2 %uint_2 %uint_264
%32 = OpFunctionCall %void %frexp_b87f4e
%26 = OpFunctionCall %void %frexp_b87f4e
OpReturn
OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
arg_1 = float4(0.0f, 0.0f, 0.0f, 0.0f);
}
GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void modf_1d59e5(threadgroup float4* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup float4 tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
tint_symbol_2 = float4();
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 24
; Schema: 0
OpCapability Shader
%13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol"
@ -29,8 +29,6 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
%8 = OpTypeFunction %void
%14 = OpConstantNull %v4float
%_ptr_Function_v4float = OpTypePointer Function %v4float
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%modf_1d59e5 = OpFunction %void None %8
@ -42,15 +40,8 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd
%compute_main = OpFunction %void None %8
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpStore %arg_1 %14
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %modf_1d59e5
%23 = OpFunctionCall %void %modf_1d59e5
OpReturn
OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
arg_1 = float2(0.0f, 0.0f);
}
GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void modf_a128ab(threadgroup float2* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup float2 tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
tint_symbol_2 = float2();
}
threadgroup_barrier(mem_flags::mem_threadgroup);

View File

@ -5,12 +5,12 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 30
; Bound: 24
; Schema: 0
OpCapability Shader
%13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
OpEntryPoint GLCompute %compute_main "compute_main"
OpExecutionMode %compute_main LocalSize 1 1 1
OpName %arg_1 "arg_1"
OpName %tint_symbol "tint_symbol"
@ -29,8 +29,6 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
%8 = OpTypeFunction %void
%14 = OpConstantNull %v2float
%_ptr_Function_v2float = OpTypePointer Function %v2float
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%uint_2 = OpConstant %uint 2
%uint_264 = OpConstant %uint 264
%modf_a128ab = OpFunction %void None %8
@ -42,15 +40,8 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
OpFunctionEnd
%compute_main = OpFunction %void None %8
%19 = OpLabel
%20 = OpLoad %uint %tint_symbol
%22 = OpIEqual %bool %20 %uint_0
OpSelectionMerge %24 None
OpBranchConditional %22 %25 %24
%25 = OpLabel
OpStore %arg_1 %14
OpBranch %24
%24 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%29 = OpFunctionCall %void %modf_a128ab
%23 = OpFunctionCall %void %modf_a128ab
OpReturn
OpFunctionEnd

View File

@ -15,7 +15,7 @@ struct tint_symbol_1 {
[numthreads(1, 1, 1)]
void compute_main(tint_symbol_1 tint_symbol) {
const uint local_invocation_index = tint_symbol.local_invocation_index;
if ((local_invocation_index == 0u)) {
{
arg_1 = float3(0.0f, 0.0f, 0.0f);
}
GroupMemoryBarrierWithGroupSync();

View File

@ -19,7 +19,7 @@ void modf_bb9088(threadgroup float3* const tint_symbol_1) {
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup float3 tint_symbol_2;
if ((local_invocation_index == 0u)) {
{
tint_symbol_2 = float3();
}
threadgroup_barrier(mem_flags::mem_threadgroup);

Some files were not shown because too many files have changed in this diff Show More