mirror of
https://github.com/encounter/dawn-cmake.git
synced 2025-10-24 10:40:30 +00:00
transform: Optimize ZeroInitWorkgroupMemory for arrays
Spread the array zeroing across as many workgroup invocations as possible. Bug: tint:910 Change-Id: I1cb5a6aaafd2a0a4093ea3b9797c173378bc5605 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60203 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Reviewed-by: David Neto <dneto@google.com>
This commit is contained in:
parent
669c57f3d1
commit
89a0bde59c
@ -72,9 +72,6 @@ Output Hlsl::Run(const Program* in, const DataMap& inputs) {
|
||||
manager.Add<PromoteInitializersToConstVar>();
|
||||
manager.Add<PadArrayElements>();
|
||||
|
||||
ZeroInitWorkgroupMemory::Config zero_init_cfg;
|
||||
zero_init_cfg.init_arrays_with_loop_size_threshold = 32; // 8 scalars
|
||||
data.Add<ZeroInitWorkgroupMemory::Config>(zero_init_cfg);
|
||||
data.Add<CanonicalizeEntryPointIO::Config>(
|
||||
CanonicalizeEntryPointIO::BuiltinStyle::kStructMember);
|
||||
auto out = manager.Run(in, data);
|
||||
|
@ -38,7 +38,7 @@ fn main() {
|
||||
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_1 : f32;
|
||||
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_2 : f32;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_1 = f32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
@ -95,7 +95,7 @@ fn foo(a : f32, tint_symbol_3 : ptr<private, f32>, tint_symbol_4 : ptr<workgroup
|
||||
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_5 : f32;
|
||||
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_6 : f32;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_5 = f32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
@ -152,7 +152,7 @@ fn main() {
|
||||
fn main([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
[[internal(disable_validation__ignore_storage_class)]] var<workgroup> tint_symbol_1 : f32;
|
||||
[[internal(disable_validation__ignore_storage_class)]] var<private> tint_symbol_2 : f32;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_1 = f32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
|
@ -14,17 +14,21 @@
|
||||
|
||||
#include "src/transform/zero_init_workgroup_memory.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/ast/workgroup_decoration.h"
|
||||
#include "src/program_builder.h"
|
||||
#include "src/sem/atomic_type.h"
|
||||
#include "src/sem/function.h"
|
||||
#include "src/sem/variable.h"
|
||||
#include "src/utils/get_or_create.h"
|
||||
#include "src/utils/unique_vector.h"
|
||||
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory);
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::ZeroInitWorkgroupMemory::Config);
|
||||
|
||||
namespace tint {
|
||||
namespace transform {
|
||||
@ -33,152 +37,112 @@ namespace transform {
|
||||
struct ZeroInitWorkgroupMemory::State {
|
||||
/// The clone context
|
||||
CloneContext& ctx;
|
||||
/// The config
|
||||
Config cfg;
|
||||
|
||||
/// Zero() generates the statements required to zero initialize the workgroup
|
||||
/// storage expression of type `ty`.
|
||||
/// @param ty the expression type
|
||||
/// @param stmts the built statements
|
||||
/// @param get_expr a function that builds the AST nodes for the expression
|
||||
void Zero(const sem::Type* ty,
|
||||
ast::StatementList& stmts,
|
||||
const std::function<ast::Expression*()>& get_expr) {
|
||||
if (CanZero(ty)) {
|
||||
auto* var = get_expr();
|
||||
auto* zero_init = ctx.dst->Construct(CreateASTTypeFor(ctx, ty));
|
||||
stmts.emplace_back(
|
||||
ctx.dst->create<ast::AssignmentStatement>(var, zero_init));
|
||||
return;
|
||||
/// An alias to *ctx.dst
|
||||
ProgramBuilder& b = *ctx.dst;
|
||||
|
||||
/// The constant size of the workgroup. If 0, then #workgroup_size_expr should
|
||||
/// be used instead.
|
||||
uint32_t workgroup_size_const = 0;
|
||||
/// The size of the workgroup as an expression generator. Use if
|
||||
/// #workgroup_size_const is 0.
|
||||
std::function<ast::Expression*()> workgroup_size_expr;
|
||||
|
||||
/// ArrayIndex represents a function on the local invocation index, of
|
||||
/// the form: `array_index = (local_invocation_index % modulo) / division`
|
||||
struct ArrayIndex {
|
||||
/// The RHS of the modulus part of the expression
|
||||
uint32_t modulo = 1;
|
||||
/// The RHS of the division part of the expression
|
||||
uint32_t division = 1;
|
||||
|
||||
/// Equality operator
|
||||
/// @param i the ArrayIndex to compare to this ArrayIndex
|
||||
/// @returns true if `i` and this ArrayIndex are equal
|
||||
bool operator==(const ArrayIndex& i) const {
|
||||
return modulo == i.modulo && division == i.division;
|
||||
}
|
||||
|
||||
if (auto* atomic = ty->As<sem::Atomic>()) {
|
||||
auto* zero_init =
|
||||
ctx.dst->Construct(CreateASTTypeFor(ctx, atomic->Type()));
|
||||
auto* store = ctx.dst->Call("atomicStore", ctx.dst->AddressOf(get_expr()),
|
||||
zero_init);
|
||||
stmts.emplace_back(ctx.dst->create<ast::CallStatement>(store));
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto* str = ty->As<sem::Struct>()) {
|
||||
for (auto* member : str->Members()) {
|
||||
auto name = ctx.Clone(member->Declaration()->symbol());
|
||||
Zero(member->Type(), stmts,
|
||||
[&] { return ctx.dst->MemberAccessor(get_expr(), name); });
|
||||
/// Hash function for the ArrayIndex type
|
||||
struct Hasher {
|
||||
/// @param i the ArrayIndex to calculate a hash for
|
||||
/// @returns the hash value for the ArrayIndex `i`
|
||||
size_t operator()(const ArrayIndex& i) const {
|
||||
return utils::Hash(i.modulo, i.division);
|
||||
}
|
||||
return;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
if (auto* arr = ty->As<sem::Array>()) {
|
||||
if (ShouldEmitForLoop(arr)) {
|
||||
auto i = ctx.dst->Symbols().New("i");
|
||||
auto* i_decl = ctx.dst->Decl(ctx.dst->Var(i, ctx.dst->ty.i32()));
|
||||
auto* cond = ctx.dst->create<ast::BinaryExpression>(
|
||||
ast::BinaryOp::kLessThan, ctx.dst->Expr(i),
|
||||
ctx.dst->Expr(static_cast<int>(arr->Count())));
|
||||
auto* inc = ctx.dst->Assign(i, ctx.dst->Add(i, 1));
|
||||
ast::StatementList for_stmts;
|
||||
Zero(arr->ElemType(), for_stmts,
|
||||
[&] { return ctx.dst->IndexAccessor(get_expr(), i); });
|
||||
auto* body = ctx.dst->Block(for_stmts);
|
||||
stmts.emplace_back(ctx.dst->For(i_decl, cond, inc, body));
|
||||
} else {
|
||||
for (size_t i = 0; i < arr->Count(); i++) {
|
||||
Zero(arr->ElemType(), stmts, [&] {
|
||||
return ctx.dst->IndexAccessor(get_expr(),
|
||||
static_cast<ProgramBuilder::u32>(i));
|
||||
});
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
/// A list of unique ArrayIndex
|
||||
using ArrayIndices = UniqueVector<ArrayIndex, ArrayIndex::Hasher>;
|
||||
|
||||
TINT_UNREACHABLE(Transform, ctx.dst->Diagnostics())
|
||||
<< "could not zero workgroup type: " << ty->type_name();
|
||||
}
|
||||
/// Expression holds information about an expression that is being built for a
|
||||
/// statement will zero workgroup values.
|
||||
struct Expression {
|
||||
/// The AST expression node
|
||||
ast::Expression* expr = nullptr;
|
||||
/// The number of iterations required to zero the value
|
||||
uint32_t num_iterations = 0;
|
||||
/// All array indices used by this expression
|
||||
ArrayIndices array_indices;
|
||||
};
|
||||
|
||||
/// @returns true if the type `ty` can be zeroed with a simple zero-value
|
||||
/// expression in the form of a type constructor without operands. If
|
||||
/// CanZero() returns false, then the type needs to be initialized by
|
||||
/// decomposing the initialization into multiple sub-initializations.
|
||||
/// @param ty the type to inspect
|
||||
bool CanZero(const sem::Type* ty) {
|
||||
if (ty->Is<sem::Atomic>()) {
|
||||
return false;
|
||||
}
|
||||
if (auto* str = ty->As<sem::Struct>()) {
|
||||
for (auto* member : str->Members()) {
|
||||
if (!CanZero(member->Type())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (auto* arr = ty->As<sem::Array>()) {
|
||||
if (ShouldEmitForLoop(arr) || !CanZero(arr->ElemType())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/// Statement holds information about a statement that will zero workgroup
|
||||
/// values.
|
||||
struct Statement {
|
||||
/// The AST statement node
|
||||
ast::Statement* stmt;
|
||||
/// The number of iterations required to zero the value
|
||||
uint32_t num_iterations;
|
||||
/// All array indices used by this statement
|
||||
ArrayIndices array_indices;
|
||||
};
|
||||
|
||||
/// @returns true if the array should be emitted as a for-loop instead of
|
||||
/// using zero-initializer statements.
|
||||
/// @param array the array
|
||||
bool ShouldEmitForLoop(const sem::Array* array) {
|
||||
// TODO(bclayton): If array sizes become pipeline-overridable then this
|
||||
// we need to return true for these arrays.
|
||||
// See https://github.com/gpuweb/gpuweb/pull/1792
|
||||
return (cfg.init_arrays_with_loop_size_threshold != 0) &&
|
||||
(array->Size() >= cfg.init_arrays_with_loop_size_threshold);
|
||||
}
|
||||
};
|
||||
/// All statements that zero workgroup memory
|
||||
std::vector<Statement> statements;
|
||||
|
||||
ZeroInitWorkgroupMemory::ZeroInitWorkgroupMemory() = default;
|
||||
/// A map of ArrayIndex to the name reserved for the `let` declaration of that
|
||||
/// index.
|
||||
std::unordered_map<ArrayIndex, Symbol, ArrayIndex::Hasher> array_index_names;
|
||||
|
||||
ZeroInitWorkgroupMemory::~ZeroInitWorkgroupMemory() = default;
|
||||
/// Constructor
|
||||
/// @param c the CloneContext used for the transform
|
||||
explicit State(CloneContext& c) : ctx(c) {}
|
||||
|
||||
void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
|
||||
const DataMap& inputs,
|
||||
DataMap&) {
|
||||
auto& sem = ctx.src->Sem();
|
||||
/// Run inserts the workgroup memory zero-initialization logic at the top of
|
||||
/// the given function
|
||||
/// @param fn a compute shader entry point function
|
||||
void Run(ast::Function* fn) {
|
||||
auto& sem = ctx.src->Sem();
|
||||
|
||||
Config cfg;
|
||||
if (auto* c = inputs.Get<Config>()) {
|
||||
cfg = *c;
|
||||
}
|
||||
CalculateWorkgroupSize(
|
||||
ast::GetDecoration<ast::WorkgroupDecoration>(fn->decorations()));
|
||||
|
||||
for (auto* ast_func : ctx.src->AST().Functions()) {
|
||||
if (!ast_func->IsEntryPoint()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate a list of statements to zero initialize each of the workgroup
|
||||
// storage variables.
|
||||
ast::StatementList stmts;
|
||||
auto* func = sem.Get(ast_func);
|
||||
// Generate a list of statements to zero initialize each of the
|
||||
// workgroup storage variables used by `fn`. This will populate #statements.
|
||||
auto* func = sem.Get(fn);
|
||||
for (auto* var : func->ReferencedModuleVariables()) {
|
||||
if (var->StorageClass() != ast::StorageClass::kWorkgroup) {
|
||||
continue;
|
||||
if (var->StorageClass() == ast::StorageClass::kWorkgroup) {
|
||||
BuildZeroingStatements(
|
||||
var->Type()->UnwrapRef(), [&](uint32_t num_values) {
|
||||
auto var_name = ctx.Clone(var->Declaration()->symbol());
|
||||
return Expression{b.Expr(var_name), num_values, ArrayIndices{}};
|
||||
});
|
||||
}
|
||||
State{ctx, cfg}.Zero(var->Type()->UnwrapRef(), stmts, [&] {
|
||||
auto var_name = ctx.Clone(var->Declaration()->symbol());
|
||||
return ctx.dst->Expr(var_name);
|
||||
});
|
||||
}
|
||||
|
||||
if (stmts.empty()) {
|
||||
continue; // No workgroup variables to initialize.
|
||||
if (statements.empty()) {
|
||||
return; // No workgroup variables to initialize.
|
||||
}
|
||||
|
||||
// Scan the entry point for an existing local_invocation_index builtin
|
||||
// parameter
|
||||
ast::Expression* local_index = nullptr;
|
||||
for (auto* param : ast_func->params()) {
|
||||
std::function<ast::Expression*()> local_index;
|
||||
for (auto* param : fn->params()) {
|
||||
if (auto* builtin = ast::GetDecoration<ast::BuiltinDecoration>(
|
||||
param->decorations())) {
|
||||
if (builtin->value() == ast::Builtin::kLocalInvocationIndex) {
|
||||
local_index = ctx.dst->Expr(ctx.Clone(param->symbol()));
|
||||
local_index = [=] { return b.Expr(ctx.Clone(param->symbol())); };
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -188,9 +152,11 @@ void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
|
||||
if (auto* builtin = ast::GetDecoration<ast::BuiltinDecoration>(
|
||||
member->Declaration()->decorations())) {
|
||||
if (builtin->value() == ast::Builtin::kLocalInvocationIndex) {
|
||||
auto* param_expr = ctx.dst->Expr(ctx.Clone(param->symbol()));
|
||||
auto member_name = ctx.Clone(member->Declaration()->symbol());
|
||||
local_index = ctx.dst->MemberAccessor(param_expr, member_name);
|
||||
local_index = [=] {
|
||||
auto* param_expr = b.Expr(ctx.Clone(param->symbol()));
|
||||
auto member_name = ctx.Clone(member->Declaration()->symbol());
|
||||
return b.MemberAccessor(param_expr, member_name);
|
||||
};
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -199,37 +165,283 @@ void ZeroInitWorkgroupMemory::Run(CloneContext& ctx,
|
||||
}
|
||||
if (!local_index) {
|
||||
// No existing local index parameter. Append one to the entry point.
|
||||
auto* param = ctx.dst->Param(
|
||||
ctx.dst->Symbols().New("local_invocation_index"), ctx.dst->ty.u32(),
|
||||
{ctx.dst->Builtin(ast::Builtin::kLocalInvocationIndex)});
|
||||
ctx.InsertBack(ast_func->params(), param);
|
||||
local_index = ctx.dst->Expr(param->symbol());
|
||||
auto* param =
|
||||
b.Param(b.Symbols().New("local_invocation_index"), b.ty.u32(),
|
||||
{b.Builtin(ast::Builtin::kLocalInvocationIndex)});
|
||||
ctx.InsertBack(fn->params(), param);
|
||||
local_index = [=] { return b.Expr(param->symbol()); };
|
||||
}
|
||||
|
||||
// We only want to zero-initialize the workgroup memory with the first
|
||||
// shader invocation. Construct an if statement that holds stmts.
|
||||
// TODO(crbug.com/tint/910): We should attempt to optimize this for arrays.
|
||||
auto* if_zero_local_index = ctx.dst->create<ast::BinaryExpression>(
|
||||
ast::BinaryOp::kEqual, local_index, ctx.dst->Expr(0u));
|
||||
auto* if_stmt = ctx.dst->If(if_zero_local_index, ctx.dst->Block(stmts));
|
||||
// Take the zeroing statements and bin them by the number of iterations
|
||||
// required to zero the workgroup data. We then emit these in blocks,
|
||||
// possibly wrapped in if-statements or for-loops.
|
||||
std::unordered_map<uint32_t, std::vector<Statement>>
|
||||
stmts_by_num_iterations;
|
||||
std::vector<uint32_t> num_sorted_iterations;
|
||||
for (auto& s : statements) {
|
||||
auto& stmts = stmts_by_num_iterations[s.num_iterations];
|
||||
if (stmts.empty()) {
|
||||
num_sorted_iterations.emplace_back(s.num_iterations);
|
||||
}
|
||||
stmts.emplace_back(s);
|
||||
}
|
||||
std::sort(num_sorted_iterations.begin(), num_sorted_iterations.end());
|
||||
|
||||
// Insert this if-statement at the top of the entry point.
|
||||
ctx.InsertFront(ast_func->body()->statements(), if_stmt);
|
||||
// Loop over the statements, grouped by num_iterations.
|
||||
for (auto num_iterations : num_sorted_iterations) {
|
||||
auto& stmts = stmts_by_num_iterations[num_iterations];
|
||||
|
||||
// Append a single workgroup barrier after the if statement.
|
||||
ctx.InsertFront(
|
||||
ast_func->body()->statements(),
|
||||
ctx.dst->create<ast::CallStatement>(ctx.dst->Call("workgroupBarrier")));
|
||||
// Gather all the array indices used by all the statements in the block.
|
||||
ArrayIndices array_indices;
|
||||
for (auto& s : stmts) {
|
||||
for (auto& idx : s.array_indices) {
|
||||
array_indices.add(idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine the block type used to emit these statements.
|
||||
|
||||
if (workgroup_size_const == 0 || num_iterations > workgroup_size_const) {
|
||||
// Either the workgroup size is dynamic, or smaller than num_iterations.
|
||||
// In either case, we need to generate a for loop to ensure we
|
||||
// initialize all the array elements.
|
||||
//
|
||||
// for (var idx : u32 = local_index;
|
||||
// idx < num_iterations;
|
||||
// idx += workgroup_size) {
|
||||
// ...
|
||||
// }
|
||||
auto idx = b.Symbols().New("idx");
|
||||
auto* init = b.Decl(b.Var(idx, b.ty.u32(), local_index()));
|
||||
auto* cond = b.create<ast::BinaryExpression>(
|
||||
ast::BinaryOp::kLessThan, b.Expr(idx), b.Expr(num_iterations));
|
||||
auto* cont = b.Assign(
|
||||
idx, b.Add(idx, workgroup_size_const ? b.Expr(workgroup_size_const)
|
||||
: workgroup_size_expr()));
|
||||
|
||||
auto block = DeclareArrayIndices(num_iterations, array_indices,
|
||||
[&] { return b.Expr(idx); });
|
||||
for (auto& s : stmts) {
|
||||
block.emplace_back(s.stmt);
|
||||
}
|
||||
auto* for_loop = b.For(init, cond, cont, b.Block(block));
|
||||
ctx.InsertFront(fn->body()->statements(), for_loop);
|
||||
} else if (num_iterations < workgroup_size_const) {
|
||||
// Workgroup size is a known constant, but is greater than
|
||||
// num_iterations. Emit an if statement:
|
||||
//
|
||||
// if (local_index < num_iterations) {
|
||||
// ...
|
||||
// }
|
||||
auto* cond = b.create<ast::BinaryExpression>(
|
||||
ast::BinaryOp::kLessThan, local_index(), b.Expr(num_iterations));
|
||||
auto block = DeclareArrayIndices(num_iterations, array_indices,
|
||||
[&] { return b.Expr(local_index()); });
|
||||
for (auto& s : stmts) {
|
||||
block.emplace_back(s.stmt);
|
||||
}
|
||||
auto* if_stmt = b.If(cond, b.Block(block));
|
||||
ctx.InsertFront(fn->body()->statements(), if_stmt);
|
||||
} else {
|
||||
// Workgroup size exactly equals num_iterations.
|
||||
// No need for any conditionals. Just emit a basic block:
|
||||
//
|
||||
// {
|
||||
// ...
|
||||
// }
|
||||
auto block = DeclareArrayIndices(num_iterations, array_indices,
|
||||
[&] { return b.Expr(local_index()); });
|
||||
for (auto& s : stmts) {
|
||||
block.emplace_back(s.stmt);
|
||||
}
|
||||
ctx.InsertFront(fn->body()->statements(), b.Block(block));
|
||||
}
|
||||
}
|
||||
|
||||
// Append a single workgroup barrier after the zero initialization.
|
||||
ctx.InsertFront(fn->body()->statements(),
|
||||
b.create<ast::CallStatement>(b.Call("workgroupBarrier")));
|
||||
}
|
||||
|
||||
/// BuildZeroingExpr is a function that builds a sub-expression used to zero
|
||||
/// workgroup values. `num_values` is the number of elements that the
|
||||
/// expression will be used to zero. Returns the expression.
|
||||
using BuildZeroingExpr = std::function<Expression(uint32_t num_values)>;
|
||||
|
||||
/// BuildZeroingStatements() generates the statements required to zero
|
||||
/// initialize the workgroup storage expression of type `ty`.
|
||||
/// @param ty the expression type
|
||||
/// @param get_expr a function that builds the AST nodes for the expression.
|
||||
void BuildZeroingStatements(const sem::Type* ty,
|
||||
const BuildZeroingExpr& get_expr) {
|
||||
if (CanTriviallyZero(ty)) {
|
||||
auto var = get_expr(1u);
|
||||
auto* zero_init = b.Construct(CreateASTTypeFor(ctx, ty));
|
||||
statements.emplace_back(Statement{b.Assign(var.expr, zero_init),
|
||||
var.num_iterations, var.array_indices});
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto* atomic = ty->As<sem::Atomic>()) {
|
||||
auto* zero_init = b.Construct(CreateASTTypeFor(ctx, atomic->Type()));
|
||||
auto expr = get_expr(1u);
|
||||
auto* store = b.Call("atomicStore", b.AddressOf(expr.expr), zero_init);
|
||||
statements.emplace_back(Statement{b.create<ast::CallStatement>(store),
|
||||
expr.num_iterations,
|
||||
expr.array_indices});
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto* str = ty->As<sem::Struct>()) {
|
||||
for (auto* member : str->Members()) {
|
||||
auto name = ctx.Clone(member->Declaration()->symbol());
|
||||
BuildZeroingStatements(member->Type(), [&](uint32_t num_values) {
|
||||
auto s = get_expr(num_values);
|
||||
return Expression{b.MemberAccessor(s.expr, name), s.num_iterations,
|
||||
s.array_indices};
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto* arr = ty->As<sem::Array>()) {
|
||||
BuildZeroingStatements(arr->ElemType(), [&](uint32_t num_values) {
|
||||
// num_values is the number of values to zero for the element type.
|
||||
// The number of iterations required to zero the array and its elements
|
||||
// is:
|
||||
// `num_values * arr->Count()`
|
||||
// The index for this array is:
|
||||
// `(idx % modulo) / division`
|
||||
auto modulo = num_values * arr->Count();
|
||||
auto division = num_values;
|
||||
auto a = get_expr(modulo);
|
||||
auto array_indices = a.array_indices;
|
||||
array_indices.add(ArrayIndex{modulo, division});
|
||||
auto index =
|
||||
utils::GetOrCreate(array_index_names, ArrayIndex{modulo, division},
|
||||
[&] { return b.Symbols().New("i"); });
|
||||
return Expression{b.IndexAccessor(a.expr, index), a.num_iterations,
|
||||
array_indices};
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
TINT_UNREACHABLE(Transform, b.Diagnostics())
|
||||
<< "could not zero workgroup type: " << ty->type_name();
|
||||
}
|
||||
|
||||
/// DeclareArrayIndices returns a list of statements that contain the `let`
|
||||
/// declarations for all of the ArrayIndices.
|
||||
/// @param num_iterations the number of iterations for the block
|
||||
/// @param array_indices the list of array indices to generate `let`
|
||||
/// declarations for
|
||||
/// @param iteration a function that returns the index of the current
|
||||
/// iteration.
|
||||
/// @returns the list of `let` statements that declare the array indices
|
||||
ast::StatementList DeclareArrayIndices(
|
||||
uint32_t num_iterations,
|
||||
const ArrayIndices& array_indices,
|
||||
const std::function<ast::Expression*()>& iteration) {
|
||||
ast::StatementList stmts;
|
||||
std::map<Symbol, ArrayIndex> indices_by_name;
|
||||
for (auto index : array_indices) {
|
||||
auto name = array_index_names.at(index);
|
||||
auto* mod =
|
||||
(num_iterations > index.modulo)
|
||||
? b.create<ast::BinaryExpression>(
|
||||
ast::BinaryOp::kModulo, iteration(), b.Expr(index.modulo))
|
||||
: iteration();
|
||||
auto* div = (index.division != 1u) ? b.Div(mod, index.division) : mod;
|
||||
auto* decl = b.Decl(b.Const(name, b.ty.u32(), div));
|
||||
stmts.emplace_back(decl);
|
||||
}
|
||||
return stmts;
|
||||
}
|
||||
|
||||
/// CalculateWorkgroupSize initializes the members #workgroup_size_const and
|
||||
/// #workgroup_size_expr with the linear workgroup size.
|
||||
/// @param deco the workgroup decoration applied to the entry point function
|
||||
void CalculateWorkgroupSize(const ast::WorkgroupDecoration* deco) {
|
||||
bool is_signed = false;
|
||||
workgroup_size_const = 1u;
|
||||
workgroup_size_expr = nullptr;
|
||||
for (auto* expr : deco->values()) {
|
||||
if (!expr) {
|
||||
continue;
|
||||
}
|
||||
auto* sem = ctx.src->Sem().Get(expr);
|
||||
if (auto c = sem->ConstantValue()) {
|
||||
if (c.ElementType()->Is<sem::I32>()) {
|
||||
workgroup_size_const *= static_cast<uint32_t>(c.Elements()[0].i32);
|
||||
continue;
|
||||
} else if (c.ElementType()->Is<sem::U32>()) {
|
||||
workgroup_size_const *= c.Elements()[0].u32;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Constant value could not be found. Build expression instead.
|
||||
workgroup_size_expr = [this, expr, size = workgroup_size_expr] {
|
||||
auto* e = ctx.Clone(expr);
|
||||
if (ctx.src->TypeOf(expr)->UnwrapRef()->Is<sem::I32>()) {
|
||||
e = b.Construct<ProgramBuilder::u32>(e);
|
||||
}
|
||||
return size ? b.Mul(size(), e) : e;
|
||||
};
|
||||
}
|
||||
if (workgroup_size_expr) {
|
||||
if (workgroup_size_const != 1) {
|
||||
// Fold workgroup_size_const in to workgroup_size_expr
|
||||
workgroup_size_expr = [this, is_signed,
|
||||
const_size = workgroup_size_const,
|
||||
expr_size = workgroup_size_expr] {
|
||||
return is_signed
|
||||
? b.Mul(expr_size(), static_cast<int32_t>(const_size))
|
||||
: b.Mul(expr_size(), const_size);
|
||||
};
|
||||
}
|
||||
// Indicate that workgroup_size_expr should be used instead of the
|
||||
// constant.
|
||||
workgroup_size_const = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// @returns true if a variable with store type `ty` can be efficiently zeroed
|
||||
/// by assignment of a type constructor without operands. If
|
||||
/// CanTriviallyZero() returns false, then the type needs to be
|
||||
/// initialized by decomposing the initialization into multiple
|
||||
/// sub-initializations.
|
||||
/// @param ty the type to inspect
|
||||
bool CanTriviallyZero(const sem::Type* ty) {
|
||||
if (ty->Is<sem::Atomic>()) {
|
||||
return false;
|
||||
}
|
||||
if (auto* str = ty->As<sem::Struct>()) {
|
||||
for (auto* member : str->Members()) {
|
||||
if (!CanTriviallyZero(member->Type())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ty->Is<sem::Array>()) {
|
||||
return false;
|
||||
}
|
||||
// True for all other storable types
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
ZeroInitWorkgroupMemory::ZeroInitWorkgroupMemory() = default;
|
||||
|
||||
ZeroInitWorkgroupMemory::~ZeroInitWorkgroupMemory() = default;
|
||||
|
||||
void ZeroInitWorkgroupMemory::Run(CloneContext& ctx, const DataMap&, DataMap&) {
|
||||
for (auto* fn : ctx.src->AST().Functions()) {
|
||||
if (fn->pipeline_stage() == ast::PipelineStage::kCompute) {
|
||||
State{ctx}.Run(fn);
|
||||
}
|
||||
}
|
||||
ctx.Clone();
|
||||
}
|
||||
|
||||
ZeroInitWorkgroupMemory::Config::Config() = default;
|
||||
ZeroInitWorkgroupMemory::Config::Config(const Config&) = default;
|
||||
ZeroInitWorkgroupMemory::Config::~Config() = default;
|
||||
ZeroInitWorkgroupMemory::Config& ZeroInitWorkgroupMemory::Config::operator=(
|
||||
const Config&) = default;
|
||||
|
||||
} // namespace transform
|
||||
} // namespace tint
|
||||
|
@ -26,27 +26,6 @@ namespace transform {
|
||||
class ZeroInitWorkgroupMemory
|
||||
: public Castable<ZeroInitWorkgroupMemory, Transform> {
|
||||
public:
|
||||
/// Configuration options for the transform
|
||||
struct Config : public Castable<Config, Data> {
|
||||
/// Constructor
|
||||
Config();
|
||||
|
||||
/// Copy constructor
|
||||
Config(const Config&);
|
||||
|
||||
/// Destructor
|
||||
~Config() override;
|
||||
|
||||
/// Assignment operator
|
||||
/// @returns this Config
|
||||
Config& operator=(const Config&);
|
||||
|
||||
/// If greater than 0, then arrays of at least this size in bytes will be
|
||||
/// zero initialized using a for loop. If 0, then the array is assigned a
|
||||
/// zero initialized array with a single statement.
|
||||
uint32_t init_arrays_with_loop_size_threshold = 0;
|
||||
};
|
||||
|
||||
/// Constructor
|
||||
ZeroInitWorkgroupMemory();
|
||||
|
||||
|
@ -76,7 +76,7 @@ TEST_F(ZeroInitWorkgroupMemoryTest, SingleWorkgroupVar_ExistingLocalIndex) {
|
||||
var<workgroup> v : i32;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] idx : u32) {
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
ignore(v); // Initialization should be inserted above this statement
|
||||
}
|
||||
)";
|
||||
@ -84,8 +84,8 @@ fn f([[builtin(local_invocation_index)]] idx : u32) {
|
||||
var<workgroup> v : i32;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] idx : u32) {
|
||||
if ((idx == 0u)) {
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
{
|
||||
v = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
@ -104,7 +104,7 @@ TEST_F(ZeroInitWorkgroupMemoryTest,
|
||||
var<workgroup> v : i32;
|
||||
|
||||
struct Params {
|
||||
[[builtin(local_invocation_index)]] idx : u32;
|
||||
[[builtin(local_invocation_index)]] local_idx : u32;
|
||||
};
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
@ -117,12 +117,12 @@ var<workgroup> v : i32;
|
||||
|
||||
struct Params {
|
||||
[[builtin(local_invocation_index)]]
|
||||
idx : u32;
|
||||
local_idx : u32;
|
||||
};
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f(params : Params) {
|
||||
if ((params.idx == 0u)) {
|
||||
{
|
||||
v = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
@ -149,7 +149,7 @@ var<workgroup> v : i32;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
v = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
@ -162,7 +162,8 @@ fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
TEST_F(ZeroInitWorkgroupMemoryTest, MultipleWorkgroupVar_ExistingLocalIndex) {
|
||||
TEST_F(ZeroInitWorkgroupMemoryTest,
|
||||
MultipleWorkgroupVar_ExistingLocalIndex_Size1) {
|
||||
auto* src = R"(
|
||||
struct S {
|
||||
x : i32;
|
||||
@ -176,7 +177,7 @@ var<workgroup> b : S;
|
||||
var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] idx : u32) {
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
ignore(a); // Initialization should be inserted above this statement
|
||||
ignore(b);
|
||||
ignore(c);
|
||||
@ -195,11 +196,246 @@ var<workgroup> b : S;
|
||||
var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] idx : u32) {
|
||||
if ((idx == 0u)) {
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
{
|
||||
a = i32();
|
||||
b = S();
|
||||
c = array<S, 32>();
|
||||
b.x = i32();
|
||||
}
|
||||
for(var idx : u32 = local_idx; (idx < 8u); idx = (idx + 1u)) {
|
||||
let i : u32 = idx;
|
||||
b.y[i] = i32();
|
||||
}
|
||||
for(var idx_1 : u32 = local_idx; (idx_1 < 32u); idx_1 = (idx_1 + 1u)) {
|
||||
let i_1 : u32 = idx_1;
|
||||
c[i_1].x = i32();
|
||||
}
|
||||
for(var idx_2 : u32 = local_idx; (idx_2 < 256u); idx_2 = (idx_2 + 1u)) {
|
||||
let i_2 : u32 = (idx_2 / 8u);
|
||||
let i : u32 = (idx_2 % 8u);
|
||||
c[i_2].y[i] = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(a);
|
||||
ignore(b);
|
||||
ignore(c);
|
||||
}
|
||||
)";
|
||||
|
||||
auto got = Run<ZeroInitWorkgroupMemory>(src);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
TEST_F(ZeroInitWorkgroupMemoryTest,
|
||||
MultipleWorkgroupVar_ExistingLocalIndex_Size_2_3) {
|
||||
auto* src = R"(
|
||||
struct S {
|
||||
x : i32;
|
||||
y : array<i32, 8>;
|
||||
};
|
||||
|
||||
var<workgroup> a : i32;
|
||||
|
||||
var<workgroup> b : S;
|
||||
|
||||
var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[stage(compute), workgroup_size(2, 3)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
ignore(a); // Initialization should be inserted above this statement
|
||||
ignore(b);
|
||||
ignore(c);
|
||||
}
|
||||
)";
|
||||
auto* expect = R"(
|
||||
struct S {
|
||||
x : i32;
|
||||
y : array<i32, 8>;
|
||||
};
|
||||
|
||||
var<workgroup> a : i32;
|
||||
|
||||
var<workgroup> b : S;
|
||||
|
||||
var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[stage(compute), workgroup_size(2, 3)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
if ((local_idx < 1u)) {
|
||||
a = i32();
|
||||
b.x = i32();
|
||||
}
|
||||
for(var idx : u32 = local_idx; (idx < 8u); idx = (idx + 6u)) {
|
||||
let i : u32 = idx;
|
||||
b.y[i] = i32();
|
||||
}
|
||||
for(var idx_1 : u32 = local_idx; (idx_1 < 32u); idx_1 = (idx_1 + 6u)) {
|
||||
let i_1 : u32 = idx_1;
|
||||
c[i_1].x = i32();
|
||||
}
|
||||
for(var idx_2 : u32 = local_idx; (idx_2 < 256u); idx_2 = (idx_2 + 6u)) {
|
||||
let i_2 : u32 = (idx_2 / 8u);
|
||||
let i : u32 = (idx_2 % 8u);
|
||||
c[i_2].y[i] = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(a);
|
||||
ignore(b);
|
||||
ignore(c);
|
||||
}
|
||||
)";
|
||||
|
||||
auto got = Run<ZeroInitWorkgroupMemory>(src);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
TEST_F(ZeroInitWorkgroupMemoryTest,
|
||||
MultipleWorkgroupVar_ExistingLocalIndex_Size_2_3_X) {
|
||||
auto* src = R"(
|
||||
struct S {
|
||||
x : i32;
|
||||
y : array<i32, 8>;
|
||||
};
|
||||
|
||||
var<workgroup> a : i32;
|
||||
|
||||
var<workgroup> b : S;
|
||||
|
||||
var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[override(1)]] let X : i32;
|
||||
|
||||
[[stage(compute), workgroup_size(2, 3, X)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
ignore(a); // Initialization should be inserted above this statement
|
||||
ignore(b);
|
||||
ignore(c);
|
||||
}
|
||||
)";
|
||||
auto* expect =
|
||||
R"(
|
||||
struct S {
|
||||
x : i32;
|
||||
y : array<i32, 8>;
|
||||
};
|
||||
|
||||
var<workgroup> a : i32;
|
||||
|
||||
var<workgroup> b : S;
|
||||
|
||||
var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[override(1)]] let X : i32;
|
||||
|
||||
[[stage(compute), workgroup_size(2, 3, X)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
for(var idx : u32 = local_idx; (idx < 1u); idx = (idx + (u32(X) * 6u))) {
|
||||
a = i32();
|
||||
b.x = i32();
|
||||
}
|
||||
for(var idx_1 : u32 = local_idx; (idx_1 < 8u); idx_1 = (idx_1 + (u32(X) * 6u))) {
|
||||
let i : u32 = idx_1;
|
||||
b.y[i] = i32();
|
||||
}
|
||||
for(var idx_2 : u32 = local_idx; (idx_2 < 32u); idx_2 = (idx_2 + (u32(X) * 6u))) {
|
||||
let i_1 : u32 = idx_2;
|
||||
c[i_1].x = i32();
|
||||
}
|
||||
for(var idx_3 : u32 = local_idx; (idx_3 < 256u); idx_3 = (idx_3 + (u32(X) * 6u))) {
|
||||
let i_2 : u32 = (idx_3 / 8u);
|
||||
let i : u32 = (idx_3 % 8u);
|
||||
c[i_2].y[i] = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(a);
|
||||
ignore(b);
|
||||
ignore(c);
|
||||
}
|
||||
)";
|
||||
|
||||
auto got = Run<ZeroInitWorkgroupMemory>(src);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
TEST_F(ZeroInitWorkgroupMemoryTest,
|
||||
MultipleWorkgroupVar_ExistingLocalIndex_Size_5u_X_10u) {
|
||||
auto* src = R"(
|
||||
struct S {
|
||||
x : array<array<i32, 8>, 10>;
|
||||
y : array<i32, 8>;
|
||||
z : array<array<array<i32, 8>, 10>, 20>;
|
||||
};
|
||||
|
||||
var<workgroup> a : i32;
|
||||
|
||||
var<workgroup> b : S;
|
||||
|
||||
var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[override(1)]] let X : u32;
|
||||
|
||||
[[stage(compute), workgroup_size(5u, X, 10u)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
ignore(a); // Initialization should be inserted above this statement
|
||||
ignore(b);
|
||||
ignore(c);
|
||||
}
|
||||
)";
|
||||
auto* expect =
|
||||
R"(
|
||||
struct S {
|
||||
x : array<array<i32, 8>, 10>;
|
||||
y : array<i32, 8>;
|
||||
z : array<array<array<i32, 8>, 10>, 20>;
|
||||
};
|
||||
|
||||
var<workgroup> a : i32;
|
||||
|
||||
var<workgroup> b : S;
|
||||
|
||||
var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[override(1)]] let X : u32;
|
||||
|
||||
[[stage(compute), workgroup_size(5u, X, 10u)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
for(var idx : u32 = local_idx; (idx < 1u); idx = (idx + (X * 50u))) {
|
||||
a = i32();
|
||||
}
|
||||
for(var idx_1 : u32 = local_idx; (idx_1 < 8u); idx_1 = (idx_1 + (X * 50u))) {
|
||||
let i_1 : u32 = idx_1;
|
||||
b.y[i_1] = i32();
|
||||
}
|
||||
for(var idx_2 : u32 = local_idx; (idx_2 < 80u); idx_2 = (idx_2 + (X * 50u))) {
|
||||
let i : u32 = (idx_2 / 8u);
|
||||
let i_1 : u32 = (idx_2 % 8u);
|
||||
b.x[i][i_1] = i32();
|
||||
}
|
||||
for(var idx_3 : u32 = local_idx; (idx_3 < 256u); idx_3 = (idx_3 + (X * 50u))) {
|
||||
let i_4 : u32 = (idx_3 / 8u);
|
||||
let i_1 : u32 = (idx_3 % 8u);
|
||||
c[i_4].y[i_1] = i32();
|
||||
}
|
||||
for(var idx_4 : u32 = local_idx; (idx_4 < 1600u); idx_4 = (idx_4 + (X * 50u))) {
|
||||
let i_2 : u32 = (idx_4 / 80u);
|
||||
let i : u32 = ((idx_4 % 80u) / 8u);
|
||||
let i_1 : u32 = (idx_4 % 8u);
|
||||
b.z[i_2][i][i_1] = i32();
|
||||
}
|
||||
for(var idx_5 : u32 = local_idx; (idx_5 < 2560u); idx_5 = (idx_5 + (X * 50u))) {
|
||||
let i_3 : u32 = (idx_5 / 80u);
|
||||
let i : u32 = ((idx_5 % 80u) / 8u);
|
||||
let i_1 : u32 = (idx_5 % 8u);
|
||||
c[i_3].x[i][i_1] = i32();
|
||||
}
|
||||
for(var idx_6 : u32 = local_idx; (idx_6 < 51200u); idx_6 = (idx_6 + (X * 50u))) {
|
||||
let i_5 : u32 = (idx_6 / 1600u);
|
||||
let i_2 : u32 = ((idx_6 % 1600u) / 80u);
|
||||
let i : u32 = ((idx_6 % 80u) / 8u);
|
||||
let i_1 : u32 = (idx_6 % 8u);
|
||||
c[i_5].z[i_2][i][i_1] = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(a);
|
||||
@ -247,10 +483,22 @@ var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>, [[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
a = i32();
|
||||
b = S();
|
||||
c = array<S, 32>();
|
||||
b.x = i32();
|
||||
}
|
||||
for(var idx : u32 = local_invocation_index; (idx < 8u); idx = (idx + 1u)) {
|
||||
let i : u32 = idx;
|
||||
b.y[i] = i32();
|
||||
}
|
||||
for(var idx_1 : u32 = local_invocation_index; (idx_1 < 32u); idx_1 = (idx_1 + 1u)) {
|
||||
let i_1 : u32 = idx_1;
|
||||
c[i_1].x = i32();
|
||||
}
|
||||
for(var idx_2 : u32 = local_invocation_index; (idx_2 < 256u); idx_2 = (idx_2 + 1u)) {
|
||||
let i_2 : u32 = (idx_2 / 8u);
|
||||
let i : u32 = (idx_2 % 8u);
|
||||
c[i_2].y[i] = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(a);
|
||||
@ -283,12 +531,12 @@ fn f1() {
|
||||
ignore(c);
|
||||
}
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
[[stage(compute), workgroup_size(1, 2, 3)]]
|
||||
fn f2([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>) {
|
||||
ignore(b); // Initialization should be inserted above this statement
|
||||
}
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
[[stage(compute), workgroup_size(4, 5, 6)]]
|
||||
fn f3() {
|
||||
ignore(c); // Initialization should be inserted above this statement
|
||||
ignore(a);
|
||||
@ -308,30 +556,50 @@ var<workgroup> c : array<S, 32>;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f1([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
a = i32();
|
||||
c = array<S, 32>();
|
||||
}
|
||||
for(var idx : u32 = local_invocation_index; (idx < 32u); idx = (idx + 1u)) {
|
||||
let i : u32 = idx;
|
||||
c[i].x = i32();
|
||||
}
|
||||
for(var idx_1 : u32 = local_invocation_index; (idx_1 < 256u); idx_1 = (idx_1 + 1u)) {
|
||||
let i_1 : u32 = (idx_1 / 8u);
|
||||
let i_2 : u32 = (idx_1 % 8u);
|
||||
c[i_1].y[i_2] = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(a);
|
||||
ignore(c);
|
||||
}
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
[[stage(compute), workgroup_size(1, 2, 3)]]
|
||||
fn f2([[builtin(local_invocation_id)]] local_invocation_id : vec3<u32>, [[builtin(local_invocation_index)]] local_invocation_index_1 : u32) {
|
||||
if ((local_invocation_index_1 == 0u)) {
|
||||
b = S();
|
||||
if ((local_invocation_index_1 < 1u)) {
|
||||
b.x = i32();
|
||||
}
|
||||
for(var idx_2 : u32 = local_invocation_index_1; (idx_2 < 8u); idx_2 = (idx_2 + 6u)) {
|
||||
let i_3 : u32 = idx_2;
|
||||
b.y[i_3] = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(b);
|
||||
}
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
[[stage(compute), workgroup_size(4, 5, 6)]]
|
||||
fn f3([[builtin(local_invocation_index)]] local_invocation_index_2 : u32) {
|
||||
if ((local_invocation_index_2 == 0u)) {
|
||||
c = array<S, 32>();
|
||||
if ((local_invocation_index_2 < 1u)) {
|
||||
a = i32();
|
||||
}
|
||||
if ((local_invocation_index_2 < 32u)) {
|
||||
let i_4 : u32 = local_invocation_index_2;
|
||||
c[i_4].x = i32();
|
||||
}
|
||||
for(var idx_3 : u32 = local_invocation_index_2; (idx_3 < 256u); idx_3 = (idx_3 + 120u)) {
|
||||
let i_5 : u32 = (idx_3 / 8u);
|
||||
let i_6 : u32 = (idx_3 % 8u);
|
||||
c[i_5].y[i_6] = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(c);
|
||||
ignore(a);
|
||||
@ -356,7 +624,7 @@ fn call_use_v() {
|
||||
}
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] idx : u32) {
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
call_use_v(); // Initialization should be inserted above this statement
|
||||
}
|
||||
)";
|
||||
@ -372,8 +640,8 @@ fn call_use_v() {
|
||||
}
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] idx : u32) {
|
||||
if ((idx == 0u)) {
|
||||
fn f([[builtin(local_invocation_index)]] local_idx : u32) {
|
||||
{
|
||||
v = i32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
@ -404,7 +672,7 @@ var<workgroup> u : atomic<u32>;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomicStore(&(i), i32());
|
||||
atomicStore(&(u), u32());
|
||||
}
|
||||
@ -449,7 +717,7 @@ var<workgroup> w : S;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
w.a = i32();
|
||||
atomicStore(&(w.i), i32());
|
||||
w.b = f32();
|
||||
@ -480,11 +748,9 @@ var<workgroup> w : array<atomic<u32>, 4>;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
atomicStore(&(w[0u]), u32());
|
||||
atomicStore(&(w[1u]), u32());
|
||||
atomicStore(&(w[2u]), u32());
|
||||
atomicStore(&(w[3u]), u32());
|
||||
for(var idx : u32 = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
|
||||
let i : u32 = idx;
|
||||
atomicStore(&(w[i]), u32());
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(w);
|
||||
@ -526,27 +792,13 @@ var<workgroup> w : array<S, 4>;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
w[0u].a = i32();
|
||||
atomicStore(&(w[0u].i), i32());
|
||||
w[0u].b = f32();
|
||||
atomicStore(&(w[0u].u), u32());
|
||||
w[0u].c = u32();
|
||||
w[1u].a = i32();
|
||||
atomicStore(&(w[1u].i), i32());
|
||||
w[1u].b = f32();
|
||||
atomicStore(&(w[1u].u), u32());
|
||||
w[1u].c = u32();
|
||||
w[2u].a = i32();
|
||||
atomicStore(&(w[2u].i), i32());
|
||||
w[2u].b = f32();
|
||||
atomicStore(&(w[2u].u), u32());
|
||||
w[2u].c = u32();
|
||||
w[3u].a = i32();
|
||||
atomicStore(&(w[3u].i), i32());
|
||||
w[3u].b = f32();
|
||||
atomicStore(&(w[3u].u), u32());
|
||||
w[3u].c = u32();
|
||||
for(var idx : u32 = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
|
||||
let i_1 : u32 = idx;
|
||||
w[i_1].a = i32();
|
||||
atomicStore(&(w[i_1].i), i32());
|
||||
w[i_1].b = f32();
|
||||
atomicStore(&(w[i_1].u), u32());
|
||||
w[i_1].c = u32();
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(w);
|
||||
@ -558,56 +810,6 @@ fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
TEST_F(ZeroInitWorkgroupMemoryTest, WorkgroupArray_InitWithLoop) {
|
||||
auto* src = R"(
|
||||
struct S {
|
||||
a : array<i32, 3>; // size: 12, less than the loop threshold
|
||||
b : array<i32, 4>; // size: 16, equal to the loop threshold
|
||||
c : array<i32, 5>; // size: 20, greater than the loop threshold
|
||||
};
|
||||
|
||||
var<workgroup> w : S;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f() {
|
||||
ignore(w); // Initialization should be inserted above this statement
|
||||
}
|
||||
)";
|
||||
auto* expect = R"(
|
||||
struct S {
|
||||
a : array<i32, 3>;
|
||||
b : array<i32, 4>;
|
||||
c : array<i32, 5>;
|
||||
};
|
||||
|
||||
var<workgroup> w : S;
|
||||
|
||||
[[stage(compute), workgroup_size(1)]]
|
||||
fn f([[builtin(local_invocation_index)]] local_invocation_index : u32) {
|
||||
if ((local_invocation_index == 0u)) {
|
||||
w.a = array<i32, 3>();
|
||||
for(var i : i32; (i < 4); i = (i + 1)) {
|
||||
w.b[i] = i32();
|
||||
}
|
||||
for(var i_1 : i32; (i_1 < 5); i_1 = (i_1 + 1)) {
|
||||
w.c[i_1] = i32();
|
||||
}
|
||||
}
|
||||
workgroupBarrier();
|
||||
ignore(w);
|
||||
}
|
||||
)";
|
||||
|
||||
ZeroInitWorkgroupMemory::Config cfg;
|
||||
cfg.init_arrays_with_loop_size_threshold = 16;
|
||||
|
||||
DataMap data;
|
||||
data.Add<ZeroInitWorkgroupMemory::Config>(cfg);
|
||||
auto got = Run<ZeroInitWorkgroupMemory>(src, data);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace transform
|
||||
} // namespace tint
|
||||
|
@ -16,11 +16,10 @@ struct tint_symbol_2 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void f(tint_symbol_2 tint_symbol_1) {
|
||||
const uint local_invocation_index = tint_symbol_1.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
for(int i = 0; (i < 64); i = (i + 1)) {
|
||||
s.data[i] = 0;
|
||||
}
|
||||
{
|
||||
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
|
||||
const uint i = idx;
|
||||
s.data[i] = 0;
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -15,13 +15,13 @@ struct Result {
|
||||
};
|
||||
|
||||
kernel void f(uint local_invocation_index [[thread_index_in_threadgroup]], constant UBO& ubo [[buffer(0)]], device Result& result [[buffer(1)]]) {
|
||||
threadgroup S tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
S const tint_symbol_1 = {};
|
||||
tint_symbol_2 = tint_symbol_1;
|
||||
threadgroup S tint_symbol_1;
|
||||
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
|
||||
uint const i = idx;
|
||||
tint_symbol_1.data.arr[i] = int();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
result.out = tint_symbol_2.data.arr[ubo.dynamic_idx];
|
||||
result.out = tint_symbol_1.data.arr[ubo.dynamic_idx];
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 38
|
||||
; Bound: 52
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
@ -18,6 +18,7 @@
|
||||
OpName %s "s"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
OpName %f "f"
|
||||
OpName %idx "idx"
|
||||
OpDecorate %UBO Block
|
||||
OpMemberDecorate %UBO 0 Offset 0
|
||||
OpDecorate %ubo NonWritable
|
||||
@ -47,30 +48,51 @@
|
||||
%tint_symbol = OpVariable %_ptr_Input_uint Input
|
||||
%void = OpTypeVoid
|
||||
%16 = OpTypeFunction %void
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%23 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%26 = OpConstantNull %S
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
|
||||
%38 = OpConstantNull %int
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
|
||||
%_ptr_Uniform_int = OpTypePointer Uniform %int
|
||||
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
|
||||
%f = OpFunction %void None %16
|
||||
%19 = OpLabel
|
||||
%idx = OpVariable %_ptr_Function_uint Function %23
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpStore %s %26
|
||||
OpStore %idx %20
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpLoopMerge %25 %26 None
|
||||
OpBranch %27
|
||||
%27 = OpLabel
|
||||
%29 = OpLoad %uint %idx
|
||||
%30 = OpULessThan %bool %29 %uint_64
|
||||
%28 = OpLogicalNot %bool %30
|
||||
OpSelectionMerge %32 None
|
||||
OpBranchConditional %28 %33 %32
|
||||
%33 = OpLabel
|
||||
OpBranch %25
|
||||
%32 = OpLabel
|
||||
%34 = OpLoad %uint %idx
|
||||
%37 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34
|
||||
OpStore %37 %38
|
||||
OpBranch %26
|
||||
%26 = OpLabel
|
||||
%39 = OpLoad %uint %idx
|
||||
%41 = OpIAdd %uint %39 %uint_1
|
||||
OpStore %idx %41
|
||||
OpBranch %24
|
||||
%25 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%31 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
|
||||
%33 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
|
||||
%34 = OpLoad %int %33
|
||||
%36 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34
|
||||
%37 = OpLoad %int %36
|
||||
OpStore %31 %37
|
||||
%46 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
|
||||
%48 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
|
||||
%49 = OpLoad %int %48
|
||||
%50 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %49
|
||||
%51 = OpLoad %int %50
|
||||
OpStore %46 %51
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -16,11 +16,10 @@ struct tint_symbol_2 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void f(tint_symbol_2 tint_symbol_1) {
|
||||
const uint local_invocation_index = tint_symbol_1.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
for(int i = 0; (i < 64); i = (i + 1)) {
|
||||
s.data[i] = 0;
|
||||
}
|
||||
{
|
||||
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
|
||||
const uint i = idx;
|
||||
s.data[i] = 0;
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -15,14 +15,14 @@ struct Result {
|
||||
};
|
||||
|
||||
kernel void f(uint local_invocation_index [[thread_index_in_threadgroup]], constant UBO& ubo [[buffer(0)]], device Result& result [[buffer(1)]]) {
|
||||
threadgroup S tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
S const tint_symbol_1 = {};
|
||||
tint_symbol_2 = tint_symbol_1;
|
||||
threadgroup S tint_symbol_1;
|
||||
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
|
||||
uint const i = idx;
|
||||
tint_symbol_1.data.arr[i] = int();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
tint_symbol_2.data.arr[ubo.dynamic_idx] = 1;
|
||||
result.out = tint_symbol_2.data.arr[3];
|
||||
tint_symbol_1.data.arr[ubo.dynamic_idx] = 1;
|
||||
result.out = tint_symbol_1.data.arr[3];
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 41
|
||||
; Bound: 55
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
@ -18,6 +18,7 @@
|
||||
OpName %s "s"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
OpName %f "f"
|
||||
OpName %idx "idx"
|
||||
OpDecorate %UBO Block
|
||||
OpMemberDecorate %UBO 0 Offset 0
|
||||
OpDecorate %ubo NonWritable
|
||||
@ -47,34 +48,55 @@
|
||||
%tint_symbol = OpVariable %_ptr_Input_uint Input
|
||||
%void = OpTypeVoid
|
||||
%16 = OpTypeFunction %void
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%23 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%26 = OpConstantNull %S
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
|
||||
%38 = OpConstantNull %int
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%_ptr_Uniform_int = OpTypePointer Uniform %int
|
||||
%_ptr_Workgroup_int = OpTypePointer Workgroup %int
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
|
||||
%int_3 = OpConstant %int 3
|
||||
%f = OpFunction %void None %16
|
||||
%19 = OpLabel
|
||||
%idx = OpVariable %_ptr_Function_uint Function %23
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpStore %s %26
|
||||
OpStore %idx %20
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpLoopMerge %25 %26 None
|
||||
OpBranch %27
|
||||
%27 = OpLabel
|
||||
%29 = OpLoad %uint %idx
|
||||
%30 = OpULessThan %bool %29 %uint_64
|
||||
%28 = OpLogicalNot %bool %30
|
||||
OpSelectionMerge %32 None
|
||||
OpBranchConditional %28 %33 %32
|
||||
%33 = OpLabel
|
||||
OpBranch %25
|
||||
%32 = OpLabel
|
||||
%34 = OpLoad %uint %idx
|
||||
%37 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %34
|
||||
OpStore %37 %38
|
||||
OpBranch %26
|
||||
%26 = OpLabel
|
||||
%39 = OpLoad %uint %idx
|
||||
%41 = OpIAdd %uint %39 %uint_1
|
||||
OpStore %idx %41
|
||||
OpBranch %24
|
||||
%25 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%31 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
|
||||
%32 = OpLoad %int %31
|
||||
%34 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %32
|
||||
OpStore %34 %int_1
|
||||
%37 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
|
||||
%39 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %int_3
|
||||
%40 = OpLoad %int %39
|
||||
OpStore %37 %40
|
||||
%46 = OpAccessChain %_ptr_Uniform_int %ubo %uint_0
|
||||
%47 = OpLoad %int %46
|
||||
%48 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %47
|
||||
OpStore %48 %int_1
|
||||
%51 = OpAccessChain %_ptr_StorageBuffer_int %result %uint_0
|
||||
%53 = OpAccessChain %_ptr_Workgroup_int %s %uint_0 %int_3
|
||||
%54 = OpLoad %int %53
|
||||
OpStore %51 %54
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -59,24 +59,12 @@ void main(tint_symbol_1 tint_symbol) {
|
||||
const uint3 local_id = tint_symbol.local_id;
|
||||
const uint3 global_id = tint_symbol.global_id;
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
for(int i = 0; (i < 64); i = (i + 1)) {
|
||||
{
|
||||
for(int i_1 = 0; (i_1 < 64); i_1 = (i_1 + 1)) {
|
||||
mm_Asub[i][i_1] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
for(int i_2 = 0; (i_2 < 64); i_2 = (i_2 + 1)) {
|
||||
{
|
||||
for(int i_3 = 0; (i_3 < 64); i_3 = (i_3 + 1)) {
|
||||
mm_Bsub[i_2][i_3] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 256u)) {
|
||||
const uint i = (idx / 64u);
|
||||
const uint i_1 = (idx % 64u);
|
||||
mm_Asub[i][i_1] = 0.0f;
|
||||
mm_Bsub[i][i_1] = 0.0f;
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -51,13 +51,13 @@ void mm_write(constant Uniforms& uniforms, device Matrix& resultMatrix, uint row
|
||||
}
|
||||
|
||||
kernel void tint_symbol(uint3 local_id [[thread_position_in_threadgroup]], uint3 global_id [[thread_position_in_grid]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Uniforms& uniforms [[buffer(3)]], const device Matrix& firstMatrix [[buffer(0)]], const device Matrix& secondMatrix [[buffer(1)]], device Matrix& resultMatrix [[buffer(2)]]) {
|
||||
threadgroup tint_array_wrapper tint_symbol_4;
|
||||
threadgroup tint_array_wrapper tint_symbol_5;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
tint_array_wrapper const tint_symbol_2 = {.arr={}};
|
||||
tint_symbol_4 = tint_symbol_2;
|
||||
tint_array_wrapper const tint_symbol_3 = {.arr={}};
|
||||
tint_symbol_5 = tint_symbol_3;
|
||||
threadgroup tint_array_wrapper tint_symbol_2;
|
||||
threadgroup tint_array_wrapper tint_symbol_3;
|
||||
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 256u)) {
|
||||
uint const i = (idx / 64u);
|
||||
uint const i_1 = (idx % 64u);
|
||||
tint_symbol_2.arr[i].arr[i_1] = float();
|
||||
tint_symbol_3.arr[i].arr[i_1] = float();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint const tileRow = (local_id.y * RowPerThread);
|
||||
@ -80,23 +80,23 @@ kernel void tint_symbol(uint3 local_id [[thread_position_in_threadgroup]], uint3
|
||||
for(uint innerCol = 0u; (innerCol < ColPerThreadA); innerCol = (innerCol + 1u)) {
|
||||
uint const inputRow = (tileRow + innerRow);
|
||||
uint const inputCol = (tileColA + innerCol);
|
||||
tint_symbol_4.arr[inputRow].arr[inputCol] = mm_readA(uniforms, firstMatrix, (globalRow + innerRow), ((t * TileInner) + inputCol));
|
||||
tint_symbol_2.arr[inputRow].arr[inputCol] = mm_readA(uniforms, firstMatrix, (globalRow + innerRow), ((t * TileInner) + inputCol));
|
||||
}
|
||||
}
|
||||
for(uint innerRow = 0u; (innerRow < RowPerThreadB); innerRow = (innerRow + 1u)) {
|
||||
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
|
||||
uint const inputRow = (tileRowB + innerRow);
|
||||
uint const inputCol = (tileCol + innerCol);
|
||||
tint_symbol_5.arr[innerCol].arr[inputCol] = mm_readB(uniforms, secondMatrix, ((t * TileInner) + inputRow), (globalCol + innerCol));
|
||||
tint_symbol_3.arr[innerCol].arr[inputCol] = mm_readB(uniforms, secondMatrix, ((t * TileInner) + inputRow), (globalCol + innerCol));
|
||||
}
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
for(uint k = 0u; (k < TileInner); k = (k + 1u)) {
|
||||
for(uint inner = 0u; (inner < ColPerThread); inner = (inner + 1u)) {
|
||||
BCached.arr[inner] = tint_symbol_5.arr[k].arr[(tileCol + inner)];
|
||||
BCached.arr[inner] = tint_symbol_3.arr[k].arr[(tileCol + inner)];
|
||||
}
|
||||
for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
|
||||
ACached = tint_symbol_4.arr[(tileRow + innerRow)].arr[k];
|
||||
ACached = tint_symbol_2.arr[(tileRow + innerRow)].arr[k];
|
||||
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
|
||||
uint const index = ((innerRow * ColPerThread) + innerCol);
|
||||
acc.arr[index] = (acc.arr[index] + (ACached * BCached.arr[innerCol]));
|
||||
|
@ -1,7 +1,7 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 356
|
||||
; Bound: 372
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
@ -38,6 +38,7 @@
|
||||
OpName %col_1 "col"
|
||||
OpName %value "value"
|
||||
OpName %main "main"
|
||||
OpName %idx "idx"
|
||||
OpName %acc "acc"
|
||||
OpName %ACached "ACached"
|
||||
OpName %BCached "BCached"
|
||||
@ -114,20 +115,21 @@
|
||||
%void = OpTypeVoid
|
||||
%75 = OpTypeFunction %void %uint %uint %float
|
||||
%98 = OpTypeFunction %void
|
||||
%105 = OpConstantNull %_arr__arr_float_TileAOuter_TileAOuter
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%104 = OpConstantNull %uint
|
||||
%uint_4096 = OpConstant %uint 4096
|
||||
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
|
||||
%121 = OpConstantNull %float
|
||||
%uint_256 = OpConstant %uint 256
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%uint_16 = OpConstant %uint 16
|
||||
%_arr_float_uint_16 = OpTypeArray %float %uint_16
|
||||
%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16
|
||||
%129 = OpConstantNull %_arr_float_uint_16
|
||||
%149 = OpConstantNull %_arr_float_uint_16
|
||||
%_ptr_Function_float = OpTypePointer Function %float
|
||||
%132 = OpConstantNull %float
|
||||
%_arr_float_RowPerThread = OpTypeArray %float %RowPerThread
|
||||
%_ptr_Function__arr_float_RowPerThread = OpTypePointer Function %_arr_float_RowPerThread
|
||||
%136 = OpConstantNull %_arr_float_RowPerThread
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%139 = OpConstantNull %uint
|
||||
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
|
||||
%155 = OpConstantNull %_arr_float_RowPerThread
|
||||
%mm_readA = OpFunction %float None %25
|
||||
%row = OpFunctionParameter %uint
|
||||
%col = OpFunctionParameter %uint
|
||||
@ -218,377 +220,399 @@
|
||||
OpFunctionEnd
|
||||
%main = OpFunction %void None %98
|
||||
%100 = OpLabel
|
||||
%acc = OpVariable %_ptr_Function__arr_float_uint_16 Function %129
|
||||
%ACached = OpVariable %_ptr_Function_float Function %132
|
||||
%BCached = OpVariable %_ptr_Function__arr_float_RowPerThread Function %136
|
||||
%index = OpVariable %_ptr_Function_uint Function %139
|
||||
%t = OpVariable %_ptr_Function_uint Function %139
|
||||
%innerRow = OpVariable %_ptr_Function_uint Function %139
|
||||
%innerCol = OpVariable %_ptr_Function_uint Function %139
|
||||
%innerRow_0 = OpVariable %_ptr_Function_uint Function %139
|
||||
%innerCol_0 = OpVariable %_ptr_Function_uint Function %139
|
||||
%k = OpVariable %_ptr_Function_uint Function %139
|
||||
%inner = OpVariable %_ptr_Function_uint Function %139
|
||||
%innerRow_1 = OpVariable %_ptr_Function_uint Function %139
|
||||
%innerCol_1 = OpVariable %_ptr_Function_uint Function %139
|
||||
%innerRow_2 = OpVariable %_ptr_Function_uint Function %139
|
||||
%innerCol_2 = OpVariable %_ptr_Function_uint Function %139
|
||||
%idx = OpVariable %_ptr_Function_uint Function %104
|
||||
%acc = OpVariable %_ptr_Function__arr_float_uint_16 Function %149
|
||||
%ACached = OpVariable %_ptr_Function_float Function %121
|
||||
%BCached = OpVariable %_ptr_Function__arr_float_RowPerThread Function %155
|
||||
%index = OpVariable %_ptr_Function_uint Function %104
|
||||
%t = OpVariable %_ptr_Function_uint Function %104
|
||||
%innerRow = OpVariable %_ptr_Function_uint Function %104
|
||||
%innerCol = OpVariable %_ptr_Function_uint Function %104
|
||||
%innerRow_0 = OpVariable %_ptr_Function_uint Function %104
|
||||
%innerCol_0 = OpVariable %_ptr_Function_uint Function %104
|
||||
%k = OpVariable %_ptr_Function_uint Function %104
|
||||
%inner = OpVariable %_ptr_Function_uint Function %104
|
||||
%innerRow_1 = OpVariable %_ptr_Function_uint Function %104
|
||||
%innerCol_1 = OpVariable %_ptr_Function_uint Function %104
|
||||
%innerRow_2 = OpVariable %_ptr_Function_uint Function %104
|
||||
%innerCol_2 = OpVariable %_ptr_Function_uint Function %104
|
||||
%101 = OpLoad %uint %tint_symbol_2
|
||||
%102 = OpIEqual %bool %101 %uint_0
|
||||
OpSelectionMerge %103 None
|
||||
OpBranchConditional %102 %104 %103
|
||||
%104 = OpLabel
|
||||
OpStore %mm_Asub %105
|
||||
OpStore %mm_Bsub %105
|
||||
OpBranch %103
|
||||
%103 = OpLabel
|
||||
OpStore %idx %101
|
||||
OpBranch %105
|
||||
%105 = OpLabel
|
||||
OpLoopMerge %106 %107 None
|
||||
OpBranch %108
|
||||
%108 = OpLabel
|
||||
%110 = OpLoad %uint %idx
|
||||
%112 = OpULessThan %bool %110 %uint_4096
|
||||
%109 = OpLogicalNot %bool %112
|
||||
OpSelectionMerge %113 None
|
||||
OpBranchConditional %109 %114 %113
|
||||
%114 = OpLabel
|
||||
OpBranch %106
|
||||
%113 = OpLabel
|
||||
%115 = OpLoad %uint %idx
|
||||
%116 = OpUDiv %uint %115 %TileAOuter
|
||||
%117 = OpLoad %uint %idx
|
||||
%118 = OpUMod %uint %117 %TileAOuter
|
||||
%120 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %116 %118
|
||||
OpStore %120 %121
|
||||
%122 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %116 %118
|
||||
OpStore %122 %121
|
||||
OpBranch %107
|
||||
%107 = OpLabel
|
||||
%123 = OpLoad %uint %idx
|
||||
%125 = OpIAdd %uint %123 %uint_256
|
||||
OpStore %idx %125
|
||||
OpBranch %105
|
||||
%106 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%108 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
|
||||
%109 = OpLoad %uint %108
|
||||
%110 = OpIMul %uint %109 %RowPerThread
|
||||
%111 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
|
||||
%112 = OpLoad %uint %111
|
||||
%113 = OpIMul %uint %112 %RowPerThread
|
||||
%114 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_1
|
||||
%115 = OpLoad %uint %114
|
||||
%116 = OpIMul %uint %115 %RowPerThread
|
||||
%117 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
|
||||
%118 = OpLoad %uint %117
|
||||
%119 = OpIMul %uint %118 %RowPerThread
|
||||
%120 = OpAccessChain %_ptr_Uniform_uint %uniforms %uint_1
|
||||
%121 = OpLoad %uint %120
|
||||
%122 = OpISub %uint %121 %uint_1
|
||||
%123 = OpUDiv %uint %122 %TileAOuter
|
||||
%124 = OpIAdd %uint %123 %uint_1
|
||||
%128 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
|
||||
%129 = OpLoad %uint %128
|
||||
%130 = OpIMul %uint %129 %RowPerThread
|
||||
%131 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
|
||||
%132 = OpLoad %uint %131
|
||||
%133 = OpIMul %uint %132 %RowPerThread
|
||||
%134 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_1
|
||||
%135 = OpLoad %uint %134
|
||||
%136 = OpIMul %uint %135 %RowPerThread
|
||||
%137 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
|
||||
%138 = OpLoad %uint %137
|
||||
%139 = OpIMul %uint %138 %RowPerThread
|
||||
%140 = OpAccessChain %_ptr_Uniform_uint %uniforms %uint_1
|
||||
%141 = OpLoad %uint %140
|
||||
%142 = OpISub %uint %141 %uint_1
|
||||
%143 = OpUDiv %uint %142 %TileAOuter
|
||||
%144 = OpIAdd %uint %143 %uint_1
|
||||
OpStore %index %uint_0
|
||||
OpBranch %140
|
||||
%140 = OpLabel
|
||||
OpLoopMerge %141 %142 None
|
||||
OpBranch %143
|
||||
%143 = OpLabel
|
||||
%145 = OpLoad %uint %index
|
||||
%146 = OpIMul %uint %RowPerThread %RowPerThread
|
||||
%147 = OpULessThan %bool %145 %146
|
||||
%144 = OpLogicalNot %bool %147
|
||||
OpSelectionMerge %148 None
|
||||
OpBranchConditional %144 %149 %148
|
||||
%149 = OpLabel
|
||||
OpBranch %141
|
||||
%148 = OpLabel
|
||||
%150 = OpLoad %uint %index
|
||||
%151 = OpAccessChain %_ptr_Function_float %acc %150
|
||||
OpStore %151 %float_0
|
||||
OpBranch %142
|
||||
%142 = OpLabel
|
||||
%152 = OpLoad %uint %index
|
||||
%153 = OpIAdd %uint %152 %uint_1
|
||||
OpStore %index %153
|
||||
OpBranch %140
|
||||
%141 = OpLabel
|
||||
%154 = OpUDiv %uint %TileAOuter %uint_16
|
||||
%155 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
|
||||
%156 = OpLoad %uint %155
|
||||
%157 = OpIMul %uint %156 %154
|
||||
%158 = OpUDiv %uint %TileAOuter %uint_16
|
||||
%159 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
|
||||
%160 = OpLoad %uint %159
|
||||
%161 = OpIMul %uint %160 %158
|
||||
OpStore %t %uint_0
|
||||
OpBranch %163
|
||||
%163 = OpLabel
|
||||
OpLoopMerge %164 %165 None
|
||||
OpBranch %166
|
||||
OpBranch %157
|
||||
%157 = OpLabel
|
||||
OpLoopMerge %158 %159 None
|
||||
OpBranch %160
|
||||
%160 = OpLabel
|
||||
%162 = OpLoad %uint %index
|
||||
%163 = OpIMul %uint %RowPerThread %RowPerThread
|
||||
%164 = OpULessThan %bool %162 %163
|
||||
%161 = OpLogicalNot %bool %164
|
||||
OpSelectionMerge %165 None
|
||||
OpBranchConditional %161 %166 %165
|
||||
%166 = OpLabel
|
||||
%168 = OpLoad %uint %t
|
||||
%169 = OpULessThan %bool %168 %124
|
||||
%167 = OpLogicalNot %bool %169
|
||||
OpSelectionMerge %170 None
|
||||
OpBranchConditional %167 %171 %170
|
||||
%171 = OpLabel
|
||||
OpBranch %164
|
||||
%170 = OpLabel
|
||||
OpStore %innerRow %uint_0
|
||||
OpBranch %173
|
||||
%173 = OpLabel
|
||||
OpLoopMerge %174 %175 None
|
||||
OpBranch %176
|
||||
%176 = OpLabel
|
||||
%178 = OpLoad %uint %innerRow
|
||||
%179 = OpULessThan %bool %178 %RowPerThread
|
||||
%177 = OpLogicalNot %bool %179
|
||||
OpSelectionMerge %180 None
|
||||
OpBranchConditional %177 %181 %180
|
||||
%181 = OpLabel
|
||||
OpBranch %174
|
||||
OpBranch %158
|
||||
%165 = OpLabel
|
||||
%167 = OpLoad %uint %index
|
||||
%168 = OpAccessChain %_ptr_Function_float %acc %167
|
||||
OpStore %168 %float_0
|
||||
OpBranch %159
|
||||
%159 = OpLabel
|
||||
%169 = OpLoad %uint %index
|
||||
%170 = OpIAdd %uint %169 %uint_1
|
||||
OpStore %index %170
|
||||
OpBranch %157
|
||||
%158 = OpLabel
|
||||
%171 = OpUDiv %uint %TileAOuter %uint_16
|
||||
%172 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_0
|
||||
%173 = OpLoad %uint %172
|
||||
%174 = OpIMul %uint %173 %171
|
||||
%175 = OpUDiv %uint %TileAOuter %uint_16
|
||||
%176 = OpAccessChain %_ptr_Input_uint %tint_symbol %uint_1
|
||||
%177 = OpLoad %uint %176
|
||||
%178 = OpIMul %uint %177 %175
|
||||
OpStore %t %uint_0
|
||||
OpBranch %180
|
||||
%180 = OpLabel
|
||||
OpStore %innerCol %uint_0
|
||||
OpLoopMerge %181 %182 None
|
||||
OpBranch %183
|
||||
%183 = OpLabel
|
||||
OpLoopMerge %184 %185 None
|
||||
OpBranch %186
|
||||
%186 = OpLabel
|
||||
%188 = OpLoad %uint %innerCol
|
||||
%189 = OpULessThan %bool %188 %154
|
||||
%187 = OpLogicalNot %bool %189
|
||||
OpSelectionMerge %190 None
|
||||
OpBranchConditional %187 %191 %190
|
||||
%191 = OpLabel
|
||||
OpBranch %184
|
||||
%185 = OpLoad %uint %t
|
||||
%186 = OpULessThan %bool %185 %144
|
||||
%184 = OpLogicalNot %bool %186
|
||||
OpSelectionMerge %187 None
|
||||
OpBranchConditional %184 %188 %187
|
||||
%188 = OpLabel
|
||||
OpBranch %181
|
||||
%187 = OpLabel
|
||||
OpStore %innerRow %uint_0
|
||||
OpBranch %190
|
||||
%190 = OpLabel
|
||||
%192 = OpLoad %uint %innerRow
|
||||
%193 = OpIAdd %uint %110 %192
|
||||
%194 = OpLoad %uint %innerCol
|
||||
%195 = OpIAdd %uint %157 %194
|
||||
%197 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %193 %195
|
||||
%199 = OpLoad %uint %innerRow
|
||||
%200 = OpIAdd %uint %116 %199
|
||||
%201 = OpLoad %uint %t
|
||||
%202 = OpIMul %uint %201 %TileAOuter
|
||||
%203 = OpIAdd %uint %202 %195
|
||||
%198 = OpFunctionCall %float %mm_readA %200 %203
|
||||
OpStore %197 %198
|
||||
OpBranch %185
|
||||
%185 = OpLabel
|
||||
%204 = OpLoad %uint %innerCol
|
||||
%205 = OpIAdd %uint %204 %uint_1
|
||||
OpStore %innerCol %205
|
||||
OpBranch %183
|
||||
%184 = OpLabel
|
||||
OpBranch %175
|
||||
%175 = OpLabel
|
||||
%206 = OpLoad %uint %innerRow
|
||||
%207 = OpIAdd %uint %206 %uint_1
|
||||
OpStore %innerRow %207
|
||||
OpBranch %173
|
||||
%174 = OpLabel
|
||||
OpLoopMerge %191 %192 None
|
||||
OpBranch %193
|
||||
%193 = OpLabel
|
||||
%195 = OpLoad %uint %innerRow
|
||||
%196 = OpULessThan %bool %195 %RowPerThread
|
||||
%194 = OpLogicalNot %bool %196
|
||||
OpSelectionMerge %197 None
|
||||
OpBranchConditional %194 %198 %197
|
||||
%198 = OpLabel
|
||||
OpBranch %191
|
||||
%197 = OpLabel
|
||||
OpStore %innerCol %uint_0
|
||||
OpBranch %200
|
||||
%200 = OpLabel
|
||||
OpLoopMerge %201 %202 None
|
||||
OpBranch %203
|
||||
%203 = OpLabel
|
||||
%205 = OpLoad %uint %innerCol
|
||||
%206 = OpULessThan %bool %205 %171
|
||||
%204 = OpLogicalNot %bool %206
|
||||
OpSelectionMerge %207 None
|
||||
OpBranchConditional %204 %208 %207
|
||||
%208 = OpLabel
|
||||
OpBranch %201
|
||||
%207 = OpLabel
|
||||
%209 = OpLoad %uint %innerRow
|
||||
%210 = OpIAdd %uint %130 %209
|
||||
%211 = OpLoad %uint %innerCol
|
||||
%212 = OpIAdd %uint %174 %211
|
||||
%213 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %210 %212
|
||||
%215 = OpLoad %uint %innerRow
|
||||
%216 = OpIAdd %uint %136 %215
|
||||
%217 = OpLoad %uint %t
|
||||
%218 = OpIMul %uint %217 %TileAOuter
|
||||
%219 = OpIAdd %uint %218 %212
|
||||
%214 = OpFunctionCall %float %mm_readA %216 %219
|
||||
OpStore %213 %214
|
||||
OpBranch %202
|
||||
%202 = OpLabel
|
||||
%220 = OpLoad %uint %innerCol
|
||||
%221 = OpIAdd %uint %220 %uint_1
|
||||
OpStore %innerCol %221
|
||||
OpBranch %200
|
||||
%201 = OpLabel
|
||||
OpBranch %192
|
||||
%192 = OpLabel
|
||||
%222 = OpLoad %uint %innerRow
|
||||
%223 = OpIAdd %uint %222 %uint_1
|
||||
OpStore %innerRow %223
|
||||
OpBranch %190
|
||||
%191 = OpLabel
|
||||
OpStore %innerRow_0 %uint_0
|
||||
OpBranch %209
|
||||
%209 = OpLabel
|
||||
OpLoopMerge %210 %211 None
|
||||
OpBranch %212
|
||||
%212 = OpLabel
|
||||
%214 = OpLoad %uint %innerRow_0
|
||||
%215 = OpULessThan %bool %214 %158
|
||||
%213 = OpLogicalNot %bool %215
|
||||
OpSelectionMerge %216 None
|
||||
OpBranchConditional %213 %217 %216
|
||||
%217 = OpLabel
|
||||
OpBranch %210
|
||||
%216 = OpLabel
|
||||
OpBranch %225
|
||||
%225 = OpLabel
|
||||
OpLoopMerge %226 %227 None
|
||||
OpBranch %228
|
||||
%228 = OpLabel
|
||||
%230 = OpLoad %uint %innerRow_0
|
||||
%231 = OpULessThan %bool %230 %175
|
||||
%229 = OpLogicalNot %bool %231
|
||||
OpSelectionMerge %232 None
|
||||
OpBranchConditional %229 %233 %232
|
||||
%233 = OpLabel
|
||||
OpBranch %226
|
||||
%232 = OpLabel
|
||||
OpStore %innerCol_0 %uint_0
|
||||
OpBranch %219
|
||||
%219 = OpLabel
|
||||
OpLoopMerge %220 %221 None
|
||||
OpBranch %222
|
||||
%222 = OpLabel
|
||||
%224 = OpLoad %uint %innerCol_0
|
||||
%225 = OpULessThan %bool %224 %RowPerThread
|
||||
%223 = OpLogicalNot %bool %225
|
||||
OpSelectionMerge %226 None
|
||||
OpBranchConditional %223 %227 %226
|
||||
%227 = OpLabel
|
||||
OpBranch %220
|
||||
%226 = OpLabel
|
||||
%228 = OpLoad %uint %innerRow_0
|
||||
%229 = OpIAdd %uint %161 %228
|
||||
%230 = OpLoad %uint %innerCol_0
|
||||
%231 = OpIAdd %uint %113 %230
|
||||
%232 = OpLoad %uint %innerCol_0
|
||||
%233 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %232 %231
|
||||
%235 = OpLoad %uint %t
|
||||
%236 = OpIMul %uint %235 %TileAOuter
|
||||
%237 = OpIAdd %uint %236 %229
|
||||
%238 = OpLoad %uint %innerCol_0
|
||||
%239 = OpIAdd %uint %119 %238
|
||||
%234 = OpFunctionCall %float %mm_readB %237 %239
|
||||
OpStore %233 %234
|
||||
OpBranch %221
|
||||
%221 = OpLabel
|
||||
OpBranch %235
|
||||
%235 = OpLabel
|
||||
OpLoopMerge %236 %237 None
|
||||
OpBranch %238
|
||||
%238 = OpLabel
|
||||
%240 = OpLoad %uint %innerCol_0
|
||||
%241 = OpIAdd %uint %240 %uint_1
|
||||
OpStore %innerCol_0 %241
|
||||
OpBranch %219
|
||||
%220 = OpLabel
|
||||
OpBranch %211
|
||||
%211 = OpLabel
|
||||
%242 = OpLoad %uint %innerRow_0
|
||||
%243 = OpIAdd %uint %242 %uint_1
|
||||
OpStore %innerRow_0 %243
|
||||
OpBranch %209
|
||||
%210 = OpLabel
|
||||
%241 = OpULessThan %bool %240 %RowPerThread
|
||||
%239 = OpLogicalNot %bool %241
|
||||
OpSelectionMerge %242 None
|
||||
OpBranchConditional %239 %243 %242
|
||||
%243 = OpLabel
|
||||
OpBranch %236
|
||||
%242 = OpLabel
|
||||
%244 = OpLoad %uint %innerRow_0
|
||||
%245 = OpIAdd %uint %178 %244
|
||||
%246 = OpLoad %uint %innerCol_0
|
||||
%247 = OpIAdd %uint %133 %246
|
||||
%248 = OpLoad %uint %innerCol_0
|
||||
%249 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %248 %247
|
||||
%251 = OpLoad %uint %t
|
||||
%252 = OpIMul %uint %251 %TileAOuter
|
||||
%253 = OpIAdd %uint %252 %245
|
||||
%254 = OpLoad %uint %innerCol_0
|
||||
%255 = OpIAdd %uint %139 %254
|
||||
%250 = OpFunctionCall %float %mm_readB %253 %255
|
||||
OpStore %249 %250
|
||||
OpBranch %237
|
||||
%237 = OpLabel
|
||||
%256 = OpLoad %uint %innerCol_0
|
||||
%257 = OpIAdd %uint %256 %uint_1
|
||||
OpStore %innerCol_0 %257
|
||||
OpBranch %235
|
||||
%236 = OpLabel
|
||||
OpBranch %227
|
||||
%227 = OpLabel
|
||||
%258 = OpLoad %uint %innerRow_0
|
||||
%259 = OpIAdd %uint %258 %uint_1
|
||||
OpStore %innerRow_0 %259
|
||||
OpBranch %225
|
||||
%226 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
OpStore %k %uint_0
|
||||
OpBranch %246
|
||||
%246 = OpLabel
|
||||
OpLoopMerge %247 %248 None
|
||||
OpBranch %249
|
||||
%249 = OpLabel
|
||||
%251 = OpLoad %uint %k
|
||||
%252 = OpULessThan %bool %251 %TileAOuter
|
||||
%250 = OpLogicalNot %bool %252
|
||||
OpSelectionMerge %253 None
|
||||
OpBranchConditional %250 %254 %253
|
||||
%254 = OpLabel
|
||||
OpBranch %247
|
||||
%253 = OpLabel
|
||||
OpStore %inner %uint_0
|
||||
OpBranch %256
|
||||
%256 = OpLabel
|
||||
OpLoopMerge %257 %258 None
|
||||
OpBranch %259
|
||||
%259 = OpLabel
|
||||
%261 = OpLoad %uint %inner
|
||||
%262 = OpULessThan %bool %261 %RowPerThread
|
||||
%260 = OpLogicalNot %bool %262
|
||||
OpSelectionMerge %263 None
|
||||
OpBranchConditional %260 %264 %263
|
||||
%264 = OpLabel
|
||||
OpBranch %257
|
||||
%263 = OpLabel
|
||||
%265 = OpLoad %uint %inner
|
||||
%266 = OpAccessChain %_ptr_Function_float %BCached %265
|
||||
OpBranch %262
|
||||
%262 = OpLabel
|
||||
OpLoopMerge %263 %264 None
|
||||
OpBranch %265
|
||||
%265 = OpLabel
|
||||
%267 = OpLoad %uint %k
|
||||
%268 = OpLoad %uint %inner
|
||||
%269 = OpIAdd %uint %113 %268
|
||||
%270 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %267 %269
|
||||
%271 = OpLoad %float %270
|
||||
OpStore %266 %271
|
||||
OpBranch %258
|
||||
%258 = OpLabel
|
||||
%272 = OpLoad %uint %inner
|
||||
%273 = OpIAdd %uint %272 %uint_1
|
||||
OpStore %inner %273
|
||||
OpBranch %256
|
||||
%257 = OpLabel
|
||||
OpStore %innerRow_1 %uint_0
|
||||
%268 = OpULessThan %bool %267 %TileAOuter
|
||||
%266 = OpLogicalNot %bool %268
|
||||
OpSelectionMerge %269 None
|
||||
OpBranchConditional %266 %270 %269
|
||||
%270 = OpLabel
|
||||
OpBranch %263
|
||||
%269 = OpLabel
|
||||
OpStore %inner %uint_0
|
||||
OpBranch %272
|
||||
%272 = OpLabel
|
||||
OpLoopMerge %273 %274 None
|
||||
OpBranch %275
|
||||
%275 = OpLabel
|
||||
OpLoopMerge %276 %277 None
|
||||
OpBranch %278
|
||||
%278 = OpLabel
|
||||
%280 = OpLoad %uint %innerRow_1
|
||||
%281 = OpULessThan %bool %280 %RowPerThread
|
||||
%279 = OpLogicalNot %bool %281
|
||||
OpSelectionMerge %282 None
|
||||
OpBranchConditional %279 %283 %282
|
||||
%283 = OpLabel
|
||||
OpBranch %276
|
||||
%282 = OpLabel
|
||||
%284 = OpLoad %uint %innerRow_1
|
||||
%285 = OpIAdd %uint %110 %284
|
||||
%286 = OpLoad %uint %k
|
||||
%287 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %285 %286
|
||||
%288 = OpLoad %float %287
|
||||
OpStore %ACached %288
|
||||
%277 = OpLoad %uint %inner
|
||||
%278 = OpULessThan %bool %277 %RowPerThread
|
||||
%276 = OpLogicalNot %bool %278
|
||||
OpSelectionMerge %279 None
|
||||
OpBranchConditional %276 %280 %279
|
||||
%280 = OpLabel
|
||||
OpBranch %273
|
||||
%279 = OpLabel
|
||||
%281 = OpLoad %uint %inner
|
||||
%282 = OpAccessChain %_ptr_Function_float %BCached %281
|
||||
%283 = OpLoad %uint %k
|
||||
%284 = OpLoad %uint %inner
|
||||
%285 = OpIAdd %uint %133 %284
|
||||
%286 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %283 %285
|
||||
%287 = OpLoad %float %286
|
||||
OpStore %282 %287
|
||||
OpBranch %274
|
||||
%274 = OpLabel
|
||||
%288 = OpLoad %uint %inner
|
||||
%289 = OpIAdd %uint %288 %uint_1
|
||||
OpStore %inner %289
|
||||
OpBranch %272
|
||||
%273 = OpLabel
|
||||
OpStore %innerRow_1 %uint_0
|
||||
OpBranch %291
|
||||
%291 = OpLabel
|
||||
OpLoopMerge %292 %293 None
|
||||
OpBranch %294
|
||||
%294 = OpLabel
|
||||
%296 = OpLoad %uint %innerRow_1
|
||||
%297 = OpULessThan %bool %296 %RowPerThread
|
||||
%295 = OpLogicalNot %bool %297
|
||||
OpSelectionMerge %298 None
|
||||
OpBranchConditional %295 %299 %298
|
||||
%299 = OpLabel
|
||||
OpBranch %292
|
||||
%298 = OpLabel
|
||||
%300 = OpLoad %uint %innerRow_1
|
||||
%301 = OpIAdd %uint %130 %300
|
||||
%302 = OpLoad %uint %k
|
||||
%303 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %301 %302
|
||||
%304 = OpLoad %float %303
|
||||
OpStore %ACached %304
|
||||
OpStore %innerCol_1 %uint_0
|
||||
OpBranch %290
|
||||
%290 = OpLabel
|
||||
OpLoopMerge %291 %292 None
|
||||
OpBranch %306
|
||||
%306 = OpLabel
|
||||
OpLoopMerge %307 %308 None
|
||||
OpBranch %309
|
||||
%309 = OpLabel
|
||||
%311 = OpLoad %uint %innerCol_1
|
||||
%312 = OpULessThan %bool %311 %RowPerThread
|
||||
%310 = OpLogicalNot %bool %312
|
||||
OpSelectionMerge %313 None
|
||||
OpBranchConditional %310 %314 %313
|
||||
%314 = OpLabel
|
||||
OpBranch %307
|
||||
%313 = OpLabel
|
||||
%315 = OpLoad %uint %innerRow_1
|
||||
%316 = OpIMul %uint %315 %RowPerThread
|
||||
%317 = OpLoad %uint %innerCol_1
|
||||
%318 = OpIAdd %uint %316 %317
|
||||
%319 = OpAccessChain %_ptr_Function_float %acc %318
|
||||
%320 = OpAccessChain %_ptr_Function_float %acc %318
|
||||
%321 = OpLoad %float %320
|
||||
%322 = OpLoad %float %ACached
|
||||
%323 = OpLoad %uint %innerCol_1
|
||||
%324 = OpAccessChain %_ptr_Function_float %BCached %323
|
||||
%325 = OpLoad %float %324
|
||||
%326 = OpFMul %float %322 %325
|
||||
%327 = OpFAdd %float %321 %326
|
||||
OpStore %319 %327
|
||||
OpBranch %308
|
||||
%308 = OpLabel
|
||||
%328 = OpLoad %uint %innerCol_1
|
||||
%329 = OpIAdd %uint %328 %uint_1
|
||||
OpStore %innerCol_1 %329
|
||||
OpBranch %306
|
||||
%307 = OpLabel
|
||||
OpBranch %293
|
||||
%293 = OpLabel
|
||||
%295 = OpLoad %uint %innerCol_1
|
||||
%296 = OpULessThan %bool %295 %RowPerThread
|
||||
%294 = OpLogicalNot %bool %296
|
||||
OpSelectionMerge %297 None
|
||||
OpBranchConditional %294 %298 %297
|
||||
%298 = OpLabel
|
||||
%330 = OpLoad %uint %innerRow_1
|
||||
%331 = OpIAdd %uint %330 %uint_1
|
||||
OpStore %innerRow_1 %331
|
||||
OpBranch %291
|
||||
%297 = OpLabel
|
||||
%299 = OpLoad %uint %innerRow_1
|
||||
%300 = OpIMul %uint %299 %RowPerThread
|
||||
%301 = OpLoad %uint %innerCol_1
|
||||
%302 = OpIAdd %uint %300 %301
|
||||
%303 = OpAccessChain %_ptr_Function_float %acc %302
|
||||
%304 = OpAccessChain %_ptr_Function_float %acc %302
|
||||
%305 = OpLoad %float %304
|
||||
%306 = OpLoad %float %ACached
|
||||
%307 = OpLoad %uint %innerCol_1
|
||||
%308 = OpAccessChain %_ptr_Function_float %BCached %307
|
||||
%309 = OpLoad %float %308
|
||||
%310 = OpFMul %float %306 %309
|
||||
%311 = OpFAdd %float %305 %310
|
||||
OpStore %303 %311
|
||||
OpBranch %292
|
||||
%292 = OpLabel
|
||||
%312 = OpLoad %uint %innerCol_1
|
||||
%313 = OpIAdd %uint %312 %uint_1
|
||||
OpStore %innerCol_1 %313
|
||||
OpBranch %290
|
||||
%291 = OpLabel
|
||||
OpBranch %277
|
||||
%277 = OpLabel
|
||||
%314 = OpLoad %uint %innerRow_1
|
||||
%315 = OpIAdd %uint %314 %uint_1
|
||||
OpStore %innerRow_1 %315
|
||||
OpBranch %275
|
||||
%276 = OpLabel
|
||||
OpBranch %248
|
||||
%248 = OpLabel
|
||||
%316 = OpLoad %uint %k
|
||||
%317 = OpIAdd %uint %316 %uint_1
|
||||
OpStore %k %317
|
||||
OpBranch %246
|
||||
%247 = OpLabel
|
||||
OpBranch %264
|
||||
%264 = OpLabel
|
||||
%332 = OpLoad %uint %k
|
||||
%333 = OpIAdd %uint %332 %uint_1
|
||||
OpStore %k %333
|
||||
OpBranch %262
|
||||
%263 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
OpBranch %165
|
||||
%165 = OpLabel
|
||||
%319 = OpLoad %uint %t
|
||||
%320 = OpIAdd %uint %319 %uint_1
|
||||
OpStore %t %320
|
||||
OpBranch %163
|
||||
%164 = OpLabel
|
||||
OpBranch %182
|
||||
%182 = OpLabel
|
||||
%335 = OpLoad %uint %t
|
||||
%336 = OpIAdd %uint %335 %uint_1
|
||||
OpStore %t %336
|
||||
OpBranch %180
|
||||
%181 = OpLabel
|
||||
OpStore %innerRow_2 %uint_0
|
||||
OpBranch %322
|
||||
%322 = OpLabel
|
||||
OpLoopMerge %323 %324 None
|
||||
OpBranch %325
|
||||
%325 = OpLabel
|
||||
%327 = OpLoad %uint %innerRow_2
|
||||
%328 = OpULessThan %bool %327 %RowPerThread
|
||||
%326 = OpLogicalNot %bool %328
|
||||
OpSelectionMerge %329 None
|
||||
OpBranchConditional %326 %330 %329
|
||||
%330 = OpLabel
|
||||
OpBranch %323
|
||||
%329 = OpLabel
|
||||
OpBranch %338
|
||||
%338 = OpLabel
|
||||
OpLoopMerge %339 %340 None
|
||||
OpBranch %341
|
||||
%341 = OpLabel
|
||||
%343 = OpLoad %uint %innerRow_2
|
||||
%344 = OpULessThan %bool %343 %RowPerThread
|
||||
%342 = OpLogicalNot %bool %344
|
||||
OpSelectionMerge %345 None
|
||||
OpBranchConditional %342 %346 %345
|
||||
%346 = OpLabel
|
||||
OpBranch %339
|
||||
%345 = OpLabel
|
||||
OpStore %innerCol_2 %uint_0
|
||||
OpBranch %332
|
||||
%332 = OpLabel
|
||||
OpLoopMerge %333 %334 None
|
||||
OpBranch %335
|
||||
%335 = OpLabel
|
||||
%337 = OpLoad %uint %innerCol_2
|
||||
%338 = OpULessThan %bool %337 %RowPerThread
|
||||
%336 = OpLogicalNot %bool %338
|
||||
OpSelectionMerge %339 None
|
||||
OpBranchConditional %336 %340 %339
|
||||
OpBranch %348
|
||||
%348 = OpLabel
|
||||
OpLoopMerge %349 %350 None
|
||||
OpBranch %351
|
||||
%351 = OpLabel
|
||||
%353 = OpLoad %uint %innerCol_2
|
||||
%354 = OpULessThan %bool %353 %RowPerThread
|
||||
%352 = OpLogicalNot %bool %354
|
||||
OpSelectionMerge %355 None
|
||||
OpBranchConditional %352 %356 %355
|
||||
%356 = OpLabel
|
||||
OpBranch %349
|
||||
%355 = OpLabel
|
||||
%357 = OpLoad %uint %innerRow_2
|
||||
%358 = OpIMul %uint %357 %RowPerThread
|
||||
%359 = OpLoad %uint %innerCol_2
|
||||
%360 = OpIAdd %uint %358 %359
|
||||
%362 = OpLoad %uint %innerRow_2
|
||||
%363 = OpIAdd %uint %136 %362
|
||||
%364 = OpLoad %uint %innerCol_2
|
||||
%365 = OpIAdd %uint %139 %364
|
||||
%366 = OpAccessChain %_ptr_Function_float %acc %360
|
||||
%367 = OpLoad %float %366
|
||||
%361 = OpFunctionCall %void %mm_write %363 %365 %367
|
||||
OpBranch %350
|
||||
%350 = OpLabel
|
||||
%368 = OpLoad %uint %innerCol_2
|
||||
%369 = OpIAdd %uint %368 %uint_1
|
||||
OpStore %innerCol_2 %369
|
||||
OpBranch %348
|
||||
%349 = OpLabel
|
||||
OpBranch %340
|
||||
%340 = OpLabel
|
||||
OpBranch %333
|
||||
%370 = OpLoad %uint %innerRow_2
|
||||
%371 = OpIAdd %uint %370 %uint_1
|
||||
OpStore %innerRow_2 %371
|
||||
OpBranch %338
|
||||
%339 = OpLabel
|
||||
%341 = OpLoad %uint %innerRow_2
|
||||
%342 = OpIMul %uint %341 %RowPerThread
|
||||
%343 = OpLoad %uint %innerCol_2
|
||||
%344 = OpIAdd %uint %342 %343
|
||||
%346 = OpLoad %uint %innerRow_2
|
||||
%347 = OpIAdd %uint %116 %346
|
||||
%348 = OpLoad %uint %innerCol_2
|
||||
%349 = OpIAdd %uint %119 %348
|
||||
%350 = OpAccessChain %_ptr_Function_float %acc %344
|
||||
%351 = OpLoad %float %350
|
||||
%345 = OpFunctionCall %void %mm_write %347 %349 %351
|
||||
OpBranch %334
|
||||
%334 = OpLabel
|
||||
%352 = OpLoad %uint %innerCol_2
|
||||
%353 = OpIAdd %uint %352 %uint_1
|
||||
OpStore %innerCol_2 %353
|
||||
OpBranch %332
|
||||
%333 = OpLabel
|
||||
OpBranch %324
|
||||
%324 = OpLabel
|
||||
%354 = OpLoad %uint %innerRow_2
|
||||
%355 = OpIAdd %uint %354 %uint_1
|
||||
OpStore %innerRow_2 %355
|
||||
OpBranch %322
|
||||
%323 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -21,15 +21,11 @@ void main(tint_symbol_1 tint_symbol) {
|
||||
const uint3 WorkGroupID = tint_symbol.WorkGroupID;
|
||||
const uint3 LocalInvocationID = tint_symbol.LocalInvocationID;
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
for(int i_1 = 0; (i_1 < 4); i_1 = (i_1 + 1)) {
|
||||
{
|
||||
for(int i_2 = 0; (i_2 < 256); i_2 = (i_2 + 1)) {
|
||||
tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
|
||||
const uint i_1 = (idx / 256u);
|
||||
const uint i_2 = (idx % 256u);
|
||||
tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -15,15 +15,16 @@ struct tint_array_wrapper {
|
||||
tint_array_wrapper_1 arr[4];
|
||||
};
|
||||
|
||||
kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture(1)]], sampler tint_symbol_5 [[sampler(0)]], texture2d<float, access::write> tint_symbol_6 [[texture(2)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Params& params [[buffer(1)]], constant Flip& flip [[buffer(3)]]) {
|
||||
threadgroup tint_array_wrapper tint_symbol_3;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
tint_array_wrapper const tint_symbol_2 = {.arr={}};
|
||||
tint_symbol_3 = tint_symbol_2;
|
||||
kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_3 [[texture(1)]], sampler tint_symbol_4 [[sampler(0)]], texture2d<float, access::write> tint_symbol_5 [[texture(2)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Params& params [[buffer(1)]], constant Flip& flip [[buffer(3)]]) {
|
||||
threadgroup tint_array_wrapper tint_symbol_2;
|
||||
for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
|
||||
uint const i_1 = (idx / 256u);
|
||||
uint const i_2 = (idx % 256u);
|
||||
tint_symbol_2.arr[i_1].arr[i_2] = float3();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint const filterOffset = ((params.filterDim - 1u) / 2u);
|
||||
int2 const dims = int2(tint_symbol_4.get_width(0), tint_symbol_4.get_height(0));
|
||||
int2 const dims = int2(tint_symbol_3.get_width(0), tint_symbol_3.get_height(0));
|
||||
int2 const baseIndex = (int2(((WorkGroupID.xy * uint2(params.blockDim, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u)))) - int2(int(filterOffset), 0));
|
||||
for(uint r = 0u; (r < 4u); r = (r + 1u)) {
|
||||
for(uint c = 0u; (c < 4u); c = (c + 1u)) {
|
||||
@ -31,7 +32,7 @@ kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture
|
||||
if ((flip.value != 0u)) {
|
||||
loadIndex = loadIndex.yx;
|
||||
}
|
||||
tint_symbol_3.arr[r].arr[((4u * LocalInvocationID.x) + c)] = tint_symbol_4.sample(tint_symbol_5, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), level(0.0f)).rgb;
|
||||
tint_symbol_2.arr[r].arr[((4u * LocalInvocationID.x) + c)] = tint_symbol_3.sample(tint_symbol_4, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), level(0.0f)).rgb;
|
||||
}
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
@ -46,9 +47,9 @@ kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_4 [[texture
|
||||
float3 acc = float3(0.0f, 0.0f, 0.0f);
|
||||
for(uint f = 0u; (f < params.filterDim); f = (f + 1u)) {
|
||||
uint i = ((center + f) - filterOffset);
|
||||
acc = (acc + ((1.0f / float(params.filterDim)) * tint_symbol_3.arr[r].arr[i]));
|
||||
acc = (acc + ((1.0f / float(params.filterDim)) * tint_symbol_2.arr[r].arr[i]));
|
||||
}
|
||||
tint_symbol_6.write(float4(acc, 1.0f), uint2(writeIndex));
|
||||
tint_symbol_5.write(float4(acc, 1.0f), uint2(writeIndex));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 239
|
||||
; Bound: 254
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability ImageQuery
|
||||
@ -23,6 +23,7 @@
|
||||
OpName %tint_symbol_1 "tint_symbol_1"
|
||||
OpName %tint_symbol_2 "tint_symbol_2"
|
||||
OpName %main "main"
|
||||
OpName %idx "idx"
|
||||
OpName %r "r"
|
||||
OpName %c "c"
|
||||
OpName %loadIndex "loadIndex"
|
||||
@ -87,288 +88,310 @@
|
||||
%tint_symbol_2 = OpVariable %_ptr_Input_uint Input
|
||||
%void = OpTypeVoid
|
||||
%31 = OpTypeFunction %void
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%38 = OpConstantNull %uint
|
||||
%uint_1024 = OpConstant %uint 1024
|
||||
%bool = OpTypeBool
|
||||
%41 = OpConstantNull %_arr__arr_v3float_uint_256_uint_4
|
||||
%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
|
||||
%56 = OpConstantNull %v3float
|
||||
%uint_64 = OpConstant %uint 64
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%int = OpTypeInt 32 1
|
||||
%v2int = OpTypeVector %int 2
|
||||
%int_0 = OpConstant %int 0
|
||||
%v2uint = OpTypeVector %uint 2
|
||||
%66 = OpConstantComposite %v2uint %uint_4 %uint_1
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%74 = OpConstantNull %uint
|
||||
%85 = OpConstantComposite %v2uint %uint_4 %uint_1
|
||||
%_ptr_Function_v2int = OpTypePointer Function %v2int
|
||||
%102 = OpConstantNull %v2int
|
||||
%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
|
||||
%119 = OpConstantNull %v2int
|
||||
%v4float = OpTypeVector %float 4
|
||||
%122 = OpTypeSampledImage %10
|
||||
%138 = OpTypeSampledImage %10
|
||||
%v2float = OpTypeVector %float 2
|
||||
%float_0_25 = OpConstant %float 0.25
|
||||
%128 = OpConstantComposite %v2float %float_0_25 %float_0_25
|
||||
%144 = OpConstantComposite %v2float %float_0_25 %float_0_25
|
||||
%float_0 = OpConstant %float 0
|
||||
%v2bool = OpTypeVector %bool 2
|
||||
%193 = OpConstantComposite %v3float %float_0 %float_0 %float_0
|
||||
%209 = OpConstantComposite %v3float %float_0 %float_0 %float_0
|
||||
%_ptr_Function_v3float = OpTypePointer Function %v3float
|
||||
%196 = OpConstantNull %v3float
|
||||
%float_1 = OpConstant %float 1
|
||||
%main = OpFunction %void None %31
|
||||
%34 = OpLabel
|
||||
%r = OpVariable %_ptr_Function_uint Function %74
|
||||
%c = OpVariable %_ptr_Function_uint Function %74
|
||||
%loadIndex = OpVariable %_ptr_Function_v2int Function %102
|
||||
%r_0 = OpVariable %_ptr_Function_uint Function %74
|
||||
%c_0 = OpVariable %_ptr_Function_uint Function %74
|
||||
%writeIndex = OpVariable %_ptr_Function_v2int Function %102
|
||||
%acc = OpVariable %_ptr_Function_v3float Function %196
|
||||
%f = OpVariable %_ptr_Function_uint Function %74
|
||||
%i = OpVariable %_ptr_Function_uint Function %74
|
||||
%idx = OpVariable %_ptr_Function_uint Function %38
|
||||
%r = OpVariable %_ptr_Function_uint Function %38
|
||||
%c = OpVariable %_ptr_Function_uint Function %38
|
||||
%loadIndex = OpVariable %_ptr_Function_v2int Function %119
|
||||
%r_0 = OpVariable %_ptr_Function_uint Function %38
|
||||
%c_0 = OpVariable %_ptr_Function_uint Function %38
|
||||
%writeIndex = OpVariable %_ptr_Function_v2int Function %119
|
||||
%acc = OpVariable %_ptr_Function_v3float Function %56
|
||||
%f = OpVariable %_ptr_Function_uint Function %38
|
||||
%i = OpVariable %_ptr_Function_uint Function %38
|
||||
%35 = OpLoad %uint %tint_symbol_2
|
||||
%37 = OpIEqual %bool %35 %uint_0
|
||||
OpSelectionMerge %39 None
|
||||
OpBranchConditional %37 %40 %39
|
||||
%40 = OpLabel
|
||||
OpStore %tile %41
|
||||
OpStore %idx %35
|
||||
OpBranch %39
|
||||
%39 = OpLabel
|
||||
OpLoopMerge %40 %41 None
|
||||
OpBranch %42
|
||||
%42 = OpLabel
|
||||
%44 = OpLoad %uint %idx
|
||||
%46 = OpULessThan %bool %44 %uint_1024
|
||||
%43 = OpLogicalNot %bool %46
|
||||
OpSelectionMerge %48 None
|
||||
OpBranchConditional %43 %49 %48
|
||||
%49 = OpLabel
|
||||
OpBranch %40
|
||||
%48 = OpLabel
|
||||
%50 = OpLoad %uint %idx
|
||||
%51 = OpUDiv %uint %50 %uint_256
|
||||
%52 = OpLoad %uint %idx
|
||||
%53 = OpUMod %uint %52 %uint_256
|
||||
%55 = OpAccessChain %_ptr_Workgroup_v3float %tile %51 %53
|
||||
OpStore %55 %56
|
||||
OpBranch %41
|
||||
%41 = OpLabel
|
||||
%57 = OpLoad %uint %idx
|
||||
%59 = OpIAdd %uint %57 %uint_64
|
||||
OpStore %idx %59
|
||||
OpBranch %39
|
||||
%40 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%46 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
|
||||
%47 = OpLoad %uint %46
|
||||
%49 = OpISub %uint %47 %uint_1
|
||||
%50 = OpUDiv %uint %49 %uint_2
|
||||
%54 = OpLoad %10 %inputTex
|
||||
%51 = OpImageQuerySizeLod %v2int %54 %int_0
|
||||
%58 = OpLoad %v3uint %tint_symbol
|
||||
%59 = OpVectorShuffle %v2uint %58 %58 0 1
|
||||
%60 = OpAccessChain %_ptr_Uniform_uint %params %uint_1
|
||||
%61 = OpLoad %uint %60
|
||||
%62 = OpCompositeConstruct %v2uint %61 %uint_4
|
||||
%63 = OpIMul %v2uint %59 %62
|
||||
%64 = OpLoad %v3uint %tint_symbol_1
|
||||
%65 = OpVectorShuffle %v2uint %64 %64 0 1
|
||||
%67 = OpIMul %v2uint %65 %66
|
||||
%68 = OpIAdd %v2uint %63 %67
|
||||
%56 = OpBitcast %v2int %68
|
||||
%69 = OpBitcast %int %50
|
||||
%70 = OpCompositeConstruct %v2int %69 %int_0
|
||||
%71 = OpISub %v2int %56 %70
|
||||
%65 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
|
||||
%66 = OpLoad %uint %65
|
||||
%68 = OpISub %uint %66 %uint_1
|
||||
%69 = OpUDiv %uint %68 %uint_2
|
||||
%73 = OpLoad %10 %inputTex
|
||||
%70 = OpImageQuerySizeLod %v2int %73 %int_0
|
||||
%77 = OpLoad %v3uint %tint_symbol
|
||||
%78 = OpVectorShuffle %v2uint %77 %77 0 1
|
||||
%79 = OpAccessChain %_ptr_Uniform_uint %params %uint_1
|
||||
%80 = OpLoad %uint %79
|
||||
%81 = OpCompositeConstruct %v2uint %80 %uint_4
|
||||
%82 = OpIMul %v2uint %78 %81
|
||||
%83 = OpLoad %v3uint %tint_symbol_1
|
||||
%84 = OpVectorShuffle %v2uint %83 %83 0 1
|
||||
%86 = OpIMul %v2uint %84 %85
|
||||
%87 = OpIAdd %v2uint %82 %86
|
||||
%75 = OpBitcast %v2int %87
|
||||
%88 = OpBitcast %int %69
|
||||
%89 = OpCompositeConstruct %v2int %88 %int_0
|
||||
%90 = OpISub %v2int %75 %89
|
||||
OpStore %r %uint_0
|
||||
OpBranch %75
|
||||
%75 = OpLabel
|
||||
OpLoopMerge %76 %77 None
|
||||
OpBranch %78
|
||||
%78 = OpLabel
|
||||
%80 = OpLoad %uint %r
|
||||
%81 = OpULessThan %bool %80 %uint_4
|
||||
%79 = OpLogicalNot %bool %81
|
||||
OpSelectionMerge %82 None
|
||||
OpBranchConditional %79 %83 %82
|
||||
%83 = OpLabel
|
||||
OpBranch %76
|
||||
%82 = OpLabel
|
||||
OpStore %c %uint_0
|
||||
OpBranch %85
|
||||
%85 = OpLabel
|
||||
OpLoopMerge %86 %87 None
|
||||
OpBranch %88
|
||||
%88 = OpLabel
|
||||
%90 = OpLoad %uint %c
|
||||
%91 = OpULessThan %bool %90 %uint_4
|
||||
%89 = OpLogicalNot %bool %91
|
||||
OpSelectionMerge %92 None
|
||||
OpBranchConditional %89 %93 %92
|
||||
%93 = OpLabel
|
||||
OpBranch %86
|
||||
OpBranch %92
|
||||
%92 = OpLabel
|
||||
%95 = OpLoad %uint %c
|
||||
%94 = OpBitcast %int %95
|
||||
OpLoopMerge %93 %94 None
|
||||
OpBranch %95
|
||||
%95 = OpLabel
|
||||
%97 = OpLoad %uint %r
|
||||
%96 = OpBitcast %int %97
|
||||
%98 = OpCompositeConstruct %v2int %94 %96
|
||||
%99 = OpIAdd %v2int %71 %98
|
||||
OpStore %loadIndex %99
|
||||
%103 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
|
||||
%104 = OpLoad %uint %103
|
||||
%105 = OpINotEqual %bool %104 %uint_0
|
||||
OpSelectionMerge %106 None
|
||||
OpBranchConditional %105 %107 %106
|
||||
%107 = OpLabel
|
||||
%108 = OpLoad %v2int %loadIndex
|
||||
%109 = OpVectorShuffle %v2int %108 %108 1 0
|
||||
OpStore %loadIndex %109
|
||||
OpBranch %106
|
||||
%106 = OpLabel
|
||||
%110 = OpLoad %uint %r
|
||||
%111 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
|
||||
%112 = OpLoad %uint %111
|
||||
%113 = OpIMul %uint %uint_4 %112
|
||||
%114 = OpLoad %uint %c
|
||||
%115 = OpIAdd %uint %113 %114
|
||||
%117 = OpAccessChain %_ptr_Workgroup_v3float %tile %110 %115
|
||||
%120 = OpLoad %3 %samp
|
||||
%121 = OpLoad %10 %inputTex
|
||||
%123 = OpSampledImage %122 %121 %120
|
||||
%126 = OpLoad %v2int %loadIndex
|
||||
%124 = OpConvertSToF %v2float %126
|
||||
%129 = OpFAdd %v2float %124 %128
|
||||
%130 = OpConvertSToF %v2float %51
|
||||
%131 = OpFDiv %v2float %129 %130
|
||||
%118 = OpImageSampleExplicitLod %v4float %123 %131 Lod %float_0
|
||||
%133 = OpVectorShuffle %v3float %118 %118 0 1 2
|
||||
OpStore %117 %133
|
||||
OpBranch %87
|
||||
%87 = OpLabel
|
||||
%134 = OpLoad %uint %c
|
||||
%135 = OpIAdd %uint %134 %uint_1
|
||||
OpStore %c %135
|
||||
OpBranch %85
|
||||
%86 = OpLabel
|
||||
OpBranch %77
|
||||
%77 = OpLabel
|
||||
%136 = OpLoad %uint %r
|
||||
%137 = OpIAdd %uint %136 %uint_1
|
||||
OpStore %r %137
|
||||
OpBranch %75
|
||||
%76 = OpLabel
|
||||
%98 = OpULessThan %bool %97 %uint_4
|
||||
%96 = OpLogicalNot %bool %98
|
||||
OpSelectionMerge %99 None
|
||||
OpBranchConditional %96 %100 %99
|
||||
%100 = OpLabel
|
||||
OpBranch %93
|
||||
%99 = OpLabel
|
||||
OpStore %c %uint_0
|
||||
OpBranch %102
|
||||
%102 = OpLabel
|
||||
OpLoopMerge %103 %104 None
|
||||
OpBranch %105
|
||||
%105 = OpLabel
|
||||
%107 = OpLoad %uint %c
|
||||
%108 = OpULessThan %bool %107 %uint_4
|
||||
%106 = OpLogicalNot %bool %108
|
||||
OpSelectionMerge %109 None
|
||||
OpBranchConditional %106 %110 %109
|
||||
%110 = OpLabel
|
||||
OpBranch %103
|
||||
%109 = OpLabel
|
||||
%112 = OpLoad %uint %c
|
||||
%111 = OpBitcast %int %112
|
||||
%114 = OpLoad %uint %r
|
||||
%113 = OpBitcast %int %114
|
||||
%115 = OpCompositeConstruct %v2int %111 %113
|
||||
%116 = OpIAdd %v2int %90 %115
|
||||
OpStore %loadIndex %116
|
||||
%120 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
|
||||
%121 = OpLoad %uint %120
|
||||
%122 = OpINotEqual %bool %121 %uint_0
|
||||
OpSelectionMerge %123 None
|
||||
OpBranchConditional %122 %124 %123
|
||||
%124 = OpLabel
|
||||
%125 = OpLoad %v2int %loadIndex
|
||||
%126 = OpVectorShuffle %v2int %125 %125 1 0
|
||||
OpStore %loadIndex %126
|
||||
OpBranch %123
|
||||
%123 = OpLabel
|
||||
%127 = OpLoad %uint %r
|
||||
%128 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
|
||||
%129 = OpLoad %uint %128
|
||||
%130 = OpIMul %uint %uint_4 %129
|
||||
%131 = OpLoad %uint %c
|
||||
%132 = OpIAdd %uint %130 %131
|
||||
%133 = OpAccessChain %_ptr_Workgroup_v3float %tile %127 %132
|
||||
%136 = OpLoad %3 %samp
|
||||
%137 = OpLoad %10 %inputTex
|
||||
%139 = OpSampledImage %138 %137 %136
|
||||
%142 = OpLoad %v2int %loadIndex
|
||||
%140 = OpConvertSToF %v2float %142
|
||||
%145 = OpFAdd %v2float %140 %144
|
||||
%146 = OpConvertSToF %v2float %70
|
||||
%147 = OpFDiv %v2float %145 %146
|
||||
%134 = OpImageSampleExplicitLod %v4float %139 %147 Lod %float_0
|
||||
%149 = OpVectorShuffle %v3float %134 %134 0 1 2
|
||||
OpStore %133 %149
|
||||
OpBranch %104
|
||||
%104 = OpLabel
|
||||
%150 = OpLoad %uint %c
|
||||
%151 = OpIAdd %uint %150 %uint_1
|
||||
OpStore %c %151
|
||||
OpBranch %102
|
||||
%103 = OpLabel
|
||||
OpBranch %94
|
||||
%94 = OpLabel
|
||||
%152 = OpLoad %uint %r
|
||||
%153 = OpIAdd %uint %152 %uint_1
|
||||
OpStore %r %153
|
||||
OpBranch %92
|
||||
%93 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
OpStore %r_0 %uint_0
|
||||
OpBranch %140
|
||||
%140 = OpLabel
|
||||
OpLoopMerge %141 %142 None
|
||||
OpBranch %143
|
||||
%143 = OpLabel
|
||||
%145 = OpLoad %uint %r_0
|
||||
%146 = OpULessThan %bool %145 %uint_4
|
||||
%144 = OpLogicalNot %bool %146
|
||||
OpSelectionMerge %147 None
|
||||
OpBranchConditional %144 %148 %147
|
||||
%148 = OpLabel
|
||||
OpBranch %141
|
||||
%147 = OpLabel
|
||||
OpBranch %156
|
||||
%156 = OpLabel
|
||||
OpLoopMerge %157 %158 None
|
||||
OpBranch %159
|
||||
%159 = OpLabel
|
||||
%161 = OpLoad %uint %r_0
|
||||
%162 = OpULessThan %bool %161 %uint_4
|
||||
%160 = OpLogicalNot %bool %162
|
||||
OpSelectionMerge %163 None
|
||||
OpBranchConditional %160 %164 %163
|
||||
%164 = OpLabel
|
||||
OpBranch %157
|
||||
%163 = OpLabel
|
||||
OpStore %c_0 %uint_0
|
||||
OpBranch %150
|
||||
%150 = OpLabel
|
||||
OpLoopMerge %151 %152 None
|
||||
OpBranch %153
|
||||
%153 = OpLabel
|
||||
%155 = OpLoad %uint %c_0
|
||||
%156 = OpULessThan %bool %155 %uint_4
|
||||
%154 = OpLogicalNot %bool %156
|
||||
OpSelectionMerge %157 None
|
||||
OpBranchConditional %154 %158 %157
|
||||
%158 = OpLabel
|
||||
OpBranch %151
|
||||
%157 = OpLabel
|
||||
%160 = OpLoad %uint %c_0
|
||||
%159 = OpBitcast %int %160
|
||||
%162 = OpLoad %uint %r_0
|
||||
%161 = OpBitcast %int %162
|
||||
%163 = OpCompositeConstruct %v2int %159 %161
|
||||
%164 = OpIAdd %v2int %71 %163
|
||||
OpStore %writeIndex %164
|
||||
%166 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
|
||||
%167 = OpLoad %uint %166
|
||||
%168 = OpINotEqual %bool %167 %uint_0
|
||||
OpSelectionMerge %169 None
|
||||
OpBranchConditional %168 %170 %169
|
||||
%170 = OpLabel
|
||||
%171 = OpLoad %v2int %writeIndex
|
||||
%172 = OpVectorShuffle %v2int %171 %171 1 0
|
||||
OpStore %writeIndex %172
|
||||
OpBranch %166
|
||||
%166 = OpLabel
|
||||
OpLoopMerge %167 %168 None
|
||||
OpBranch %169
|
||||
%169 = OpLabel
|
||||
%173 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
|
||||
%174 = OpLoad %uint %173
|
||||
%175 = OpIMul %uint %uint_4 %174
|
||||
%171 = OpLoad %uint %c_0
|
||||
%172 = OpULessThan %bool %171 %uint_4
|
||||
%170 = OpLogicalNot %bool %172
|
||||
OpSelectionMerge %173 None
|
||||
OpBranchConditional %170 %174 %173
|
||||
%174 = OpLabel
|
||||
OpBranch %167
|
||||
%173 = OpLabel
|
||||
%176 = OpLoad %uint %c_0
|
||||
%177 = OpIAdd %uint %175 %176
|
||||
%178 = OpUGreaterThanEqual %bool %177 %50
|
||||
OpSelectionMerge %179 None
|
||||
OpBranchConditional %178 %180 %179
|
||||
%180 = OpLabel
|
||||
%181 = OpISub %uint %uint_256 %50
|
||||
%182 = OpULessThan %bool %177 %181
|
||||
OpBranch %179
|
||||
%179 = OpLabel
|
||||
%183 = OpPhi %bool %178 %169 %182 %180
|
||||
OpSelectionMerge %184 None
|
||||
OpBranchConditional %183 %185 %184
|
||||
%185 = OpLabel
|
||||
%175 = OpBitcast %int %176
|
||||
%178 = OpLoad %uint %r_0
|
||||
%177 = OpBitcast %int %178
|
||||
%179 = OpCompositeConstruct %v2int %175 %177
|
||||
%180 = OpIAdd %v2int %90 %179
|
||||
OpStore %writeIndex %180
|
||||
%182 = OpAccessChain %_ptr_Uniform_uint %flip %uint_0
|
||||
%183 = OpLoad %uint %182
|
||||
%184 = OpINotEqual %bool %183 %uint_0
|
||||
OpSelectionMerge %185 None
|
||||
OpBranchConditional %184 %186 %185
|
||||
%186 = OpLabel
|
||||
%187 = OpLoad %v2int %writeIndex
|
||||
%188 = OpSLessThan %v2bool %187 %51
|
||||
%186 = OpAll %bool %188
|
||||
OpBranch %184
|
||||
%184 = OpLabel
|
||||
%190 = OpPhi %bool %183 %179 %186 %185
|
||||
OpSelectionMerge %191 None
|
||||
OpBranchConditional %190 %192 %191
|
||||
%192 = OpLabel
|
||||
OpStore %acc %193
|
||||
OpStore %f %uint_0
|
||||
OpBranch %198
|
||||
%198 = OpLabel
|
||||
OpLoopMerge %199 %200 None
|
||||
OpBranch %201
|
||||
%188 = OpVectorShuffle %v2int %187 %187 1 0
|
||||
OpStore %writeIndex %188
|
||||
OpBranch %185
|
||||
%185 = OpLabel
|
||||
%189 = OpAccessChain %_ptr_Input_uint %tint_symbol_1 %uint_0
|
||||
%190 = OpLoad %uint %189
|
||||
%191 = OpIMul %uint %uint_4 %190
|
||||
%192 = OpLoad %uint %c_0
|
||||
%193 = OpIAdd %uint %191 %192
|
||||
%194 = OpUGreaterThanEqual %bool %193 %69
|
||||
OpSelectionMerge %195 None
|
||||
OpBranchConditional %194 %196 %195
|
||||
%196 = OpLabel
|
||||
%197 = OpISub %uint %uint_256 %69
|
||||
%198 = OpULessThan %bool %193 %197
|
||||
OpBranch %195
|
||||
%195 = OpLabel
|
||||
%199 = OpPhi %bool %194 %185 %198 %196
|
||||
OpSelectionMerge %200 None
|
||||
OpBranchConditional %199 %201 %200
|
||||
%201 = OpLabel
|
||||
%203 = OpLoad %uint %f
|
||||
%204 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
|
||||
%205 = OpLoad %uint %204
|
||||
%206 = OpULessThan %bool %203 %205
|
||||
%202 = OpLogicalNot %bool %206
|
||||
OpSelectionMerge %207 None
|
||||
OpBranchConditional %202 %208 %207
|
||||
%208 = OpLabel
|
||||
OpBranch %199
|
||||
%207 = OpLabel
|
||||
%209 = OpLoad %uint %f
|
||||
%210 = OpIAdd %uint %177 %209
|
||||
%211 = OpISub %uint %210 %50
|
||||
OpStore %i %211
|
||||
%213 = OpLoad %v3float %acc
|
||||
%216 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
|
||||
%217 = OpLoad %uint %216
|
||||
%215 = OpConvertUToF %float %217
|
||||
%218 = OpFDiv %float %float_1 %215
|
||||
%219 = OpLoad %uint %r_0
|
||||
%220 = OpLoad %uint %i
|
||||
%221 = OpAccessChain %_ptr_Workgroup_v3float %tile %219 %220
|
||||
%222 = OpLoad %v3float %221
|
||||
%223 = OpVectorTimesScalar %v3float %222 %218
|
||||
%224 = OpFAdd %v3float %213 %223
|
||||
OpStore %acc %224
|
||||
%203 = OpLoad %v2int %writeIndex
|
||||
%204 = OpSLessThan %v2bool %203 %70
|
||||
%202 = OpAll %bool %204
|
||||
OpBranch %200
|
||||
%200 = OpLabel
|
||||
%225 = OpLoad %uint %f
|
||||
%226 = OpIAdd %uint %225 %uint_1
|
||||
OpStore %f %226
|
||||
OpBranch %198
|
||||
%199 = OpLabel
|
||||
%228 = OpLoad %14 %outputTex
|
||||
%229 = OpLoad %v2int %writeIndex
|
||||
%230 = OpLoad %v3float %acc
|
||||
%231 = OpCompositeExtract %float %230 0
|
||||
%232 = OpCompositeExtract %float %230 1
|
||||
%233 = OpCompositeExtract %float %230 2
|
||||
%234 = OpCompositeConstruct %v4float %231 %232 %233 %float_1
|
||||
OpImageWrite %228 %229 %234
|
||||
OpBranch %191
|
||||
%191 = OpLabel
|
||||
OpBranch %152
|
||||
%152 = OpLabel
|
||||
%235 = OpLoad %uint %c_0
|
||||
%236 = OpIAdd %uint %235 %uint_1
|
||||
OpStore %c_0 %236
|
||||
OpBranch %150
|
||||
%151 = OpLabel
|
||||
OpBranch %142
|
||||
%142 = OpLabel
|
||||
%237 = OpLoad %uint %r_0
|
||||
%238 = OpIAdd %uint %237 %uint_1
|
||||
OpStore %r_0 %238
|
||||
OpBranch %140
|
||||
%141 = OpLabel
|
||||
%206 = OpPhi %bool %199 %195 %202 %201
|
||||
OpSelectionMerge %207 None
|
||||
OpBranchConditional %206 %208 %207
|
||||
%208 = OpLabel
|
||||
OpStore %acc %209
|
||||
OpStore %f %uint_0
|
||||
OpBranch %213
|
||||
%213 = OpLabel
|
||||
OpLoopMerge %214 %215 None
|
||||
OpBranch %216
|
||||
%216 = OpLabel
|
||||
%218 = OpLoad %uint %f
|
||||
%219 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
|
||||
%220 = OpLoad %uint %219
|
||||
%221 = OpULessThan %bool %218 %220
|
||||
%217 = OpLogicalNot %bool %221
|
||||
OpSelectionMerge %222 None
|
||||
OpBranchConditional %217 %223 %222
|
||||
%223 = OpLabel
|
||||
OpBranch %214
|
||||
%222 = OpLabel
|
||||
%224 = OpLoad %uint %f
|
||||
%225 = OpIAdd %uint %193 %224
|
||||
%226 = OpISub %uint %225 %69
|
||||
OpStore %i %226
|
||||
%228 = OpLoad %v3float %acc
|
||||
%231 = OpAccessChain %_ptr_Uniform_uint %params %uint_0
|
||||
%232 = OpLoad %uint %231
|
||||
%230 = OpConvertUToF %float %232
|
||||
%233 = OpFDiv %float %float_1 %230
|
||||
%234 = OpLoad %uint %r_0
|
||||
%235 = OpLoad %uint %i
|
||||
%236 = OpAccessChain %_ptr_Workgroup_v3float %tile %234 %235
|
||||
%237 = OpLoad %v3float %236
|
||||
%238 = OpVectorTimesScalar %v3float %237 %233
|
||||
%239 = OpFAdd %v3float %228 %238
|
||||
OpStore %acc %239
|
||||
OpBranch %215
|
||||
%215 = OpLabel
|
||||
%240 = OpLoad %uint %f
|
||||
%241 = OpIAdd %uint %240 %uint_1
|
||||
OpStore %f %241
|
||||
OpBranch %213
|
||||
%214 = OpLabel
|
||||
%243 = OpLoad %14 %outputTex
|
||||
%244 = OpLoad %v2int %writeIndex
|
||||
%245 = OpLoad %v3float %acc
|
||||
%246 = OpCompositeExtract %float %245 0
|
||||
%247 = OpCompositeExtract %float %245 1
|
||||
%248 = OpCompositeExtract %float %245 2
|
||||
%249 = OpCompositeConstruct %v4float %246 %247 %248 %float_1
|
||||
OpImageWrite %243 %244 %249
|
||||
OpBranch %207
|
||||
%207 = OpLabel
|
||||
OpBranch %168
|
||||
%168 = OpLabel
|
||||
%250 = OpLoad %uint %c_0
|
||||
%251 = OpIAdd %uint %250 %uint_1
|
||||
OpStore %c_0 %251
|
||||
OpBranch %166
|
||||
%167 = OpLabel
|
||||
OpBranch %158
|
||||
%158 = OpLabel
|
||||
%252 = OpLoad %uint %r_0
|
||||
%253 = OpIAdd %uint %252 %uint_1
|
||||
OpStore %r_0 %253
|
||||
OpBranch %156
|
||||
%157 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -350,21 +350,16 @@ void main(tint_symbol_1 tint_symbol) {
|
||||
const uint3 gl_LocalInvocationID_param = tint_symbol.gl_LocalInvocationID_param;
|
||||
const uint3 gl_GlobalInvocationID_param = tint_symbol.gl_GlobalInvocationID_param;
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
for(int i = 0; (i < 64); i = (i + 1)) {
|
||||
{
|
||||
for(int i_1 = 0; (i_1 < 64); i_1 = (i_1 + 1)) {
|
||||
mm_Asub[i][i_1] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
for(int i_2 = 0; (i_2 < 64); i_2 = (i_2 + 1)) {
|
||||
const float tint_symbol_6[1] = (float[1])0;
|
||||
mm_Bsub[i_2] = tint_symbol_6;
|
||||
}
|
||||
{
|
||||
const uint i_1 = local_invocation_index;
|
||||
const uint i_2 = (local_invocation_index % 1u);
|
||||
mm_Bsub[i_1][i_2] = 0.0f;
|
||||
}
|
||||
{
|
||||
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 64u)) {
|
||||
const uint i = (idx / 64u);
|
||||
const uint i_1 = (idx % 64u);
|
||||
mm_Asub[i][i_1] = 0.0f;
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -53,7 +53,7 @@ bool coordsInBounds_vi2_vi2_(thread int2* const coord, thread int2* const shape)
|
||||
return x_88;
|
||||
}
|
||||
|
||||
float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread int* const row, thread int* const col, thread int* const tint_symbol_5, thread int* const tint_symbol_6, thread int* const tint_symbol_7) {
|
||||
float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread int* const row, thread int* const col, thread int* const tint_symbol_3, thread int* const tint_symbol_4, thread int* const tint_symbol_5) {
|
||||
int batchASize = 0;
|
||||
int2 param_10 = 0;
|
||||
int2 param_11 = 0;
|
||||
@ -63,16 +63,16 @@ float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread
|
||||
batchASize = (x_417 * x_419);
|
||||
int const x_421 = *(row);
|
||||
int const x_422 = *(col);
|
||||
int const x_424 = *(tint_symbol_5);
|
||||
int const x_425 = *(tint_symbol_6);
|
||||
int const x_424 = *(tint_symbol_3);
|
||||
int const x_425 = *(tint_symbol_4);
|
||||
param_10 = int2(x_421, x_422);
|
||||
param_11 = int2(x_424, x_425);
|
||||
bool const x_429 = coordsInBounds_vi2_vi2_(&(param_10), &(param_11));
|
||||
if (x_429) {
|
||||
int const x_438 = *(tint_symbol_7);
|
||||
int const x_438 = *(tint_symbol_5);
|
||||
int const x_439 = batchASize;
|
||||
int const x_441 = *(row);
|
||||
int const x_442 = *(tint_symbol_6);
|
||||
int const x_442 = *(tint_symbol_4);
|
||||
int const x_445 = *(col);
|
||||
float const x_448 = x_165.A[(((x_438 * x_439) + (x_441 * x_442)) + x_445)];
|
||||
x_430 = x_448;
|
||||
@ -83,7 +83,7 @@ float mm_readA_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, thread
|
||||
return x_450;
|
||||
}
|
||||
|
||||
float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread int* const row_1, thread int* const col_1, thread int* const tint_symbol_8, thread int* const tint_symbol_9, thread int* const tint_symbol_10) {
|
||||
float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread int* const row_1, thread int* const col_1, thread int* const tint_symbol_6, thread int* const tint_symbol_7, thread int* const tint_symbol_8) {
|
||||
int batchBSize = 0;
|
||||
int2 param_12 = 0;
|
||||
int2 param_13 = 0;
|
||||
@ -93,16 +93,16 @@ float mm_readB_i1_i1_(constant Uniforms& x_48, const device ssbB& x_185, thread
|
||||
batchBSize = (x_455 * x_457);
|
||||
int const x_459 = *(row_1);
|
||||
int const x_460 = *(col_1);
|
||||
int const x_462 = *(tint_symbol_8);
|
||||
int const x_463 = *(tint_symbol_9);
|
||||
int const x_462 = *(tint_symbol_6);
|
||||
int const x_463 = *(tint_symbol_7);
|
||||
param_12 = int2(x_459, x_460);
|
||||
param_13 = int2(x_462, x_463);
|
||||
bool const x_467 = coordsInBounds_vi2_vi2_(&(param_12), &(param_13));
|
||||
if (x_467) {
|
||||
int const x_475 = *(tint_symbol_10);
|
||||
int const x_475 = *(tint_symbol_8);
|
||||
int const x_476 = batchBSize;
|
||||
int const x_478 = *(row_1);
|
||||
int const x_479 = *(tint_symbol_9);
|
||||
int const x_479 = *(tint_symbol_7);
|
||||
int const x_482 = *(col_1);
|
||||
float const x_485 = x_185.B[(((x_475 * x_476) + (x_478 * x_479)) + x_482)];
|
||||
x_468 = x_485;
|
||||
@ -146,17 +146,17 @@ void setOutput_i1_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread
|
||||
return;
|
||||
}
|
||||
|
||||
void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int* const row_2, thread int* const col_2, thread float* const value_2, thread int* const tint_symbol_11) {
|
||||
void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int* const row_2, thread int* const col_2, thread float* const value_2, thread int* const tint_symbol_9) {
|
||||
int3 outCoord = 0;
|
||||
int param_14 = 0;
|
||||
int param_15 = 0;
|
||||
int param_16 = 0;
|
||||
float param_17 = 0.0f;
|
||||
int const x_491 = *(tint_symbol_11);
|
||||
int const x_491 = *(tint_symbol_9);
|
||||
int const x_492 = *(row_2);
|
||||
int const x_493 = *(col_2);
|
||||
outCoord = int3(x_491, x_492, x_493);
|
||||
int const x_496 = *(tint_symbol_11);
|
||||
int const x_496 = *(tint_symbol_9);
|
||||
param_14 = x_496;
|
||||
int const x_498 = *(row_2);
|
||||
param_15 = x_498;
|
||||
@ -168,7 +168,7 @@ void mm_write_i1_i1_f1_(constant Uniforms& x_48, device ssbOut& x_54, thread int
|
||||
return;
|
||||
}
|
||||
|
||||
void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const dimAOuter, thread int* const dimInner, thread int* const dimBOuter, thread uint3* const tint_symbol_12, thread uint3* const tint_symbol_13, thread int* const tint_symbol_14, thread int* const tint_symbol_15, thread int* const tint_symbol_16, threadgroup tint_array_wrapper* const tint_symbol_17, thread int* const tint_symbol_18, threadgroup tint_array_wrapper_2* const tint_symbol_19) {
|
||||
void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const dimAOuter, thread int* const dimInner, thread int* const dimBOuter, thread uint3* const tint_symbol_10, thread uint3* const tint_symbol_11, thread int* const tint_symbol_12, thread int* const tint_symbol_13, thread int* const tint_symbol_14, threadgroup tint_array_wrapper* const tint_symbol_15, thread int* const tint_symbol_16, threadgroup tint_array_wrapper_2* const tint_symbol_17) {
|
||||
int tileRow = 0;
|
||||
int tileCol = 0;
|
||||
int globalRow = 0;
|
||||
@ -203,13 +203,13 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
|
||||
int param_7 = 0;
|
||||
int param_8 = 0;
|
||||
float param_9 = 0.0f;
|
||||
uint const x_132 = (*(tint_symbol_12)).y;
|
||||
uint const x_132 = (*(tint_symbol_10)).y;
|
||||
tileRow = (as_type<int>(x_132) * 1);
|
||||
uint const x_137 = (*(tint_symbol_12)).x;
|
||||
uint const x_137 = (*(tint_symbol_10)).x;
|
||||
tileCol = (as_type<int>(x_137) * 1);
|
||||
uint const x_143 = (*(tint_symbol_13)).y;
|
||||
uint const x_143 = (*(tint_symbol_11)).y;
|
||||
globalRow = (as_type<int>(x_143) * 1);
|
||||
uint const x_148 = (*(tint_symbol_13)).x;
|
||||
uint const x_148 = (*(tint_symbol_11)).x;
|
||||
globalCol = (as_type<int>(x_148) * 1);
|
||||
int const x_152 = *(dimInner);
|
||||
numTiles = (((x_152 - 1) / 64) + 1);
|
||||
@ -240,9 +240,9 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
|
||||
innerRow = (x_183 + 1);
|
||||
}
|
||||
}
|
||||
uint const x_187 = (*(tint_symbol_12)).x;
|
||||
uint const x_187 = (*(tint_symbol_10)).x;
|
||||
tileColA = (as_type<int>(x_187) * 64);
|
||||
uint const x_192 = (*(tint_symbol_12)).y;
|
||||
uint const x_192 = (*(tint_symbol_10)).y;
|
||||
tileRowB = (as_type<int>(x_192) * 1);
|
||||
t = 0;
|
||||
while (true) {
|
||||
@ -280,8 +280,8 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
|
||||
int const x_240 = inputCol;
|
||||
param_3 = (x_235 + x_236);
|
||||
param_4 = ((x_238 * 64) + x_240);
|
||||
float const x_244 = mm_readA_i1_i1_(x_48, x_165, &(param_3), &(param_4), tint_symbol_14, tint_symbol_15, tint_symbol_16);
|
||||
(*(tint_symbol_17)).arr[x_233].arr[x_234] = x_244;
|
||||
float const x_244 = mm_readA_i1_i1_(x_48, x_165, &(param_3), &(param_4), tint_symbol_12, tint_symbol_13, tint_symbol_14);
|
||||
(*(tint_symbol_15)).arr[x_233].arr[x_234] = x_244;
|
||||
{
|
||||
int const x_247 = innerCol_1;
|
||||
innerCol_1 = (x_247 + 1);
|
||||
@ -320,8 +320,8 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
|
||||
int const x_285 = innerCol_2;
|
||||
param_5 = ((x_280 * 64) + x_282);
|
||||
param_6 = (x_284 + x_285);
|
||||
float const x_289 = mm_readB_i1_i1_(x_48, x_185, &(param_5), &(param_6), tint_symbol_15, tint_symbol_18, tint_symbol_16);
|
||||
(*(tint_symbol_19)).arr[x_278].arr[x_279] = x_289;
|
||||
float const x_289 = mm_readB_i1_i1_(x_48, x_185, &(param_5), &(param_6), tint_symbol_13, tint_symbol_16, tint_symbol_14);
|
||||
(*(tint_symbol_17)).arr[x_278].arr[x_279] = x_289;
|
||||
{
|
||||
int const x_291 = innerCol_2;
|
||||
innerCol_2 = (x_291 + 1);
|
||||
@ -351,7 +351,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
|
||||
int const x_315 = k;
|
||||
int const x_316 = tileCol;
|
||||
int const x_317 = inner;
|
||||
float const x_320 = (*(tint_symbol_19)).arr[x_315].arr[(x_316 + x_317)];
|
||||
float const x_320 = (*(tint_symbol_17)).arr[x_315].arr[(x_316 + x_317)];
|
||||
BCached.arr[x_314] = x_320;
|
||||
{
|
||||
int const x_322 = inner;
|
||||
@ -368,7 +368,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
|
||||
int const x_333 = tileRow;
|
||||
int const x_334 = innerRow_3;
|
||||
int const x_336 = k;
|
||||
float const x_338 = (*(tint_symbol_17)).arr[(x_333 + x_334)].arr[x_336];
|
||||
float const x_338 = (*(tint_symbol_15)).arr[(x_333 + x_334)].arr[x_336];
|
||||
ACached = x_338;
|
||||
innerCol_3 = 0;
|
||||
while (true) {
|
||||
@ -445,7 +445,7 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
|
||||
param_8 = (x_400 + x_401);
|
||||
float const x_409 = acc.arr[x_403].arr[x_404];
|
||||
param_9 = x_409;
|
||||
mm_write_i1_i1_f1_(x_48, x_54, &(param_7), &(param_8), &(param_9), tint_symbol_16);
|
||||
mm_write_i1_i1_f1_(x_48, x_54, &(param_7), &(param_8), &(param_9), tint_symbol_14);
|
||||
}
|
||||
{
|
||||
int const x_411 = innerCol_4;
|
||||
@ -460,47 +460,51 @@ void mm_matMul_i1_i1_i1_(constant Uniforms& x_48, const device ssbA& x_165, cons
|
||||
return;
|
||||
}
|
||||
|
||||
void main_1(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const tint_symbol_20, thread int* const tint_symbol_21, thread int* const tint_symbol_22, thread uint3* const tint_symbol_23, thread int* const tint_symbol_24, thread uint3* const tint_symbol_25, threadgroup tint_array_wrapper* const tint_symbol_26, threadgroup tint_array_wrapper_2* const tint_symbol_27) {
|
||||
void main_1(constant Uniforms& x_48, const device ssbA& x_165, const device ssbB& x_185, device ssbOut& x_54, thread int* const tint_symbol_18, thread int* const tint_symbol_19, thread int* const tint_symbol_20, thread uint3* const tint_symbol_21, thread int* const tint_symbol_22, thread uint3* const tint_symbol_23, threadgroup tint_array_wrapper* const tint_symbol_24, threadgroup tint_array_wrapper_2* const tint_symbol_25) {
|
||||
int param_18 = 0;
|
||||
int param_19 = 0;
|
||||
int param_20 = 0;
|
||||
int const x_67 = x_48.aShape.y;
|
||||
*(tint_symbol_20) = x_67;
|
||||
*(tint_symbol_18) = x_67;
|
||||
int const x_71 = x_48.aShape.z;
|
||||
*(tint_symbol_21) = x_71;
|
||||
*(tint_symbol_19) = x_71;
|
||||
int const x_75 = x_48.bShape.z;
|
||||
*(tint_symbol_22) = x_75;
|
||||
uint const x_505 = (*(tint_symbol_23)).z;
|
||||
*(tint_symbol_24) = as_type<int>(x_505);
|
||||
int const x_508 = *(tint_symbol_20);
|
||||
*(tint_symbol_20) = x_75;
|
||||
uint const x_505 = (*(tint_symbol_21)).z;
|
||||
*(tint_symbol_22) = as_type<int>(x_505);
|
||||
int const x_508 = *(tint_symbol_18);
|
||||
param_18 = x_508;
|
||||
int const x_510 = *(tint_symbol_21);
|
||||
int const x_510 = *(tint_symbol_19);
|
||||
param_19 = x_510;
|
||||
int const x_512 = *(tint_symbol_22);
|
||||
int const x_512 = *(tint_symbol_20);
|
||||
param_20 = x_512;
|
||||
mm_matMul_i1_i1_i1_(x_48, x_165, x_185, x_54, &(param_18), &(param_19), &(param_20), tint_symbol_25, tint_symbol_23, tint_symbol_20, tint_symbol_21, tint_symbol_24, tint_symbol_26, tint_symbol_22, tint_symbol_27);
|
||||
mm_matMul_i1_i1_i1_(x_48, x_165, x_185, x_54, &(param_18), &(param_19), &(param_20), tint_symbol_23, tint_symbol_21, tint_symbol_18, tint_symbol_19, tint_symbol_22, tint_symbol_24, tint_symbol_20, tint_symbol_25);
|
||||
return;
|
||||
}
|
||||
|
||||
kernel void tint_symbol_1(uint3 gl_LocalInvocationID_param [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID_param [[thread_position_in_grid]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Uniforms& x_48 [[buffer(3)]], const device ssbA& x_165 [[buffer(1)]], const device ssbB& x_185 [[buffer(2)]], device ssbOut& x_54 [[buffer(0)]]) {
|
||||
threadgroup tint_array_wrapper tint_symbol_28;
|
||||
threadgroup tint_array_wrapper_2 tint_symbol_29;
|
||||
thread uint3 tint_symbol_30 = 0u;
|
||||
thread uint3 tint_symbol_31 = 0u;
|
||||
threadgroup tint_array_wrapper_2 tint_symbol_26;
|
||||
threadgroup tint_array_wrapper tint_symbol_27;
|
||||
thread uint3 tint_symbol_28 = 0u;
|
||||
thread uint3 tint_symbol_29 = 0u;
|
||||
thread int tint_symbol_30 = 0;
|
||||
thread int tint_symbol_31 = 0;
|
||||
thread int tint_symbol_32 = 0;
|
||||
thread int tint_symbol_33 = 0;
|
||||
thread int tint_symbol_34 = 0;
|
||||
thread int tint_symbol_35 = 0;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
tint_array_wrapper const tint_symbol_3 = {.arr={}};
|
||||
tint_symbol_28 = tint_symbol_3;
|
||||
tint_array_wrapper_2 const tint_symbol_4 = {.arr={}};
|
||||
tint_symbol_29 = tint_symbol_4;
|
||||
{
|
||||
uint const i_1 = local_invocation_index;
|
||||
uint const i_2 = (local_invocation_index % 1u);
|
||||
tint_symbol_26.arr[i_1].arr[i_2] = float();
|
||||
}
|
||||
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 64u)) {
|
||||
uint const i = (idx / 64u);
|
||||
uint const i_1 = (idx % 64u);
|
||||
tint_symbol_27.arr[i].arr[i_1] = float();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
tint_symbol_30 = gl_LocalInvocationID_param;
|
||||
tint_symbol_31 = gl_GlobalInvocationID_param;
|
||||
main_1(x_48, x_165, x_185, x_54, &(tint_symbol_32), &(tint_symbol_33), &(tint_symbol_34), &(tint_symbol_31), &(tint_symbol_35), &(tint_symbol_30), &(tint_symbol_28), &(tint_symbol_29));
|
||||
tint_symbol_28 = gl_LocalInvocationID_param;
|
||||
tint_symbol_29 = gl_GlobalInvocationID_param;
|
||||
main_1(x_48, x_165, x_185, x_54, &(tint_symbol_30), &(tint_symbol_31), &(tint_symbol_32), &(tint_symbol_29), &(tint_symbol_33), &(tint_symbol_28), &(tint_symbol_27), &(tint_symbol_26));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 609
|
||||
; Bound: 628
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
@ -121,6 +121,7 @@
|
||||
OpName %param_19 "param_19"
|
||||
OpName %param_20 "param_20"
|
||||
OpName %main "main"
|
||||
OpName %idx "idx"
|
||||
OpDecorate %Uniforms Block
|
||||
OpMemberDecorate %Uniforms 0 Offset 0
|
||||
OpMemberDecorate %Uniforms 1 Offset 16
|
||||
@ -236,8 +237,9 @@
|
||||
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%575 = OpTypeFunction %void
|
||||
%603 = OpConstantNull %_arr__arr_float_uint_64_uint_64
|
||||
%604 = OpConstantNull %_arr__arr_float_uint_1_uint_64
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%606 = OpConstantNull %uint
|
||||
%uint_4096 = OpConstant %uint 4096
|
||||
%coordsInBounds_vi2_vi2_ = OpFunction %bool None %45
|
||||
%coord = OpFunctionParameter %_ptr_Function_v2int
|
||||
%shape = OpFunctionParameter %_ptr_Function_v2int
|
||||
@ -941,20 +943,45 @@
|
||||
OpFunctionEnd
|
||||
%main = OpFunction %void None %575
|
||||
%598 = OpLabel
|
||||
%idx = OpVariable %_ptr_Function_uint Function %606
|
||||
%599 = OpLoad %uint %tint_symbol_2
|
||||
%600 = OpIEqual %bool %599 %uint_0
|
||||
OpSelectionMerge %601 None
|
||||
OpBranchConditional %600 %602 %601
|
||||
%602 = OpLabel
|
||||
OpStore %mm_Asub %603
|
||||
OpStore %mm_Bsub %604
|
||||
OpBranch %601
|
||||
%601 = OpLabel
|
||||
%600 = OpLoad %uint %tint_symbol_2
|
||||
%601 = OpUMod %uint %600 %uint_1
|
||||
%602 = OpAccessChain %_ptr_Workgroup_float %mm_Bsub %599 %601
|
||||
OpStore %602 %85
|
||||
%603 = OpLoad %uint %tint_symbol_2
|
||||
OpStore %idx %603
|
||||
OpBranch %607
|
||||
%607 = OpLabel
|
||||
OpLoopMerge %608 %609 None
|
||||
OpBranch %610
|
||||
%610 = OpLabel
|
||||
%612 = OpLoad %uint %idx
|
||||
%614 = OpULessThan %bool %612 %uint_4096
|
||||
%611 = OpLogicalNot %bool %614
|
||||
OpSelectionMerge %615 None
|
||||
OpBranchConditional %611 %616 %615
|
||||
%616 = OpLabel
|
||||
OpBranch %608
|
||||
%615 = OpLabel
|
||||
%617 = OpLoad %uint %idx
|
||||
%618 = OpUDiv %uint %617 %uint_64
|
||||
%619 = OpLoad %uint %idx
|
||||
%620 = OpUMod %uint %619 %uint_64
|
||||
%621 = OpAccessChain %_ptr_Workgroup_float %mm_Asub %618 %620
|
||||
OpStore %621 %85
|
||||
OpBranch %609
|
||||
%609 = OpLabel
|
||||
%622 = OpLoad %uint %idx
|
||||
%623 = OpIAdd %uint %622 %uint_64
|
||||
OpStore %idx %623
|
||||
OpBranch %607
|
||||
%608 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%606 = OpLoad %v3uint %tint_symbol
|
||||
OpStore %gl_LocalInvocationID %606
|
||||
%607 = OpLoad %v3uint %tint_symbol_1
|
||||
OpStore %gl_GlobalInvocationID %607
|
||||
%608 = OpFunctionCall %void %main_1
|
||||
%625 = OpLoad %v3uint %tint_symbol
|
||||
OpStore %gl_LocalInvocationID %625
|
||||
%626 = OpLoad %v3uint %tint_symbol_1
|
||||
OpStore %gl_GlobalInvocationID %626
|
||||
%627 = OpFunctionCall %void %main_1
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicAdd_794055(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 31
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -26,7 +26,6 @@
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%18 = OpConstantNull %int
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicAdd_794055 = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -37,15 +36,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %21 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%30 = OpFunctionCall %void %atomicAdd_794055
|
||||
%25 = OpFunctionCall %void %atomicAdd_794055
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicAdd_d5db1d(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -25,7 +25,6 @@
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%17 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicAdd_d5db1d = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -36,15 +35,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%21 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %21 %24 %23
|
||||
%24 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %atomicAdd_d5db1d
|
||||
%24 = OpFunctionCall %void %atomicAdd_d5db1d
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicAnd_34edd3(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -25,7 +25,6 @@
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%17 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicAnd_34edd3 = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -36,15 +35,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%21 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %21 %24 %23
|
||||
%24 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %atomicAnd_34edd3
|
||||
%24 = OpFunctionCall %void %atomicAnd_34edd3
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicAnd_45a819(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 31
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -26,7 +26,6 @@
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%18 = OpConstantNull %int
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicAnd_45a819 = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -37,15 +36,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %21 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%30 = OpFunctionCall %void %atomicAnd_45a819
|
||||
%25 = OpFunctionCall %void %atomicAnd_45a819
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -15,7 +15,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ void atomicCompareExchangeWeak_89ea3b(threadgroup atomic_int* const tint_symbol_
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 37
|
||||
; Bound: 33
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -29,7 +29,7 @@
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Function_v2int = OpTypePointer Function %v2int
|
||||
%24 = OpConstantNull %v2int
|
||||
%33 = OpConstantNull %int
|
||||
%29 = OpConstantNull %int
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicCompareExchangeWeak_89ea3b = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -43,15 +43,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%26 = OpLabel
|
||||
%27 = OpLoad %uint %tint_symbol
|
||||
%28 = OpIEqual %bool %27 %uint_0
|
||||
OpSelectionMerge %29 None
|
||||
OpBranchConditional %28 %30 %29
|
||||
%30 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %33
|
||||
OpBranch %29
|
||||
%29 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %29
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%36 = OpFunctionCall %void %atomicCompareExchangeWeak_89ea3b
|
||||
%32 = OpFunctionCall %void %atomicCompareExchangeWeak_89ea3b
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -15,7 +15,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ void atomicCompareExchangeWeak_b2ab2c(threadgroup atomic_uint* const tint_symbol
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 35
|
||||
; Bound: 31
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -27,7 +27,7 @@
|
||||
%bool = OpTypeBool
|
||||
%_ptr_Function_v2uint = OpTypePointer Function %v2uint
|
||||
%22 = OpConstantNull %v2uint
|
||||
%31 = OpConstantNull %uint
|
||||
%27 = OpConstantNull %uint
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicCompareExchangeWeak_b2ab2c = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -41,15 +41,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%24 = OpLabel
|
||||
%25 = OpLoad %uint %tint_symbol
|
||||
%26 = OpIEqual %bool %25 %uint_0
|
||||
OpSelectionMerge %27 None
|
||||
OpBranchConditional %26 %28 %27
|
||||
%28 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %31
|
||||
OpBranch %27
|
||||
%27 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %27
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%34 = OpFunctionCall %void %atomicCompareExchangeWeak_b2ab2c
|
||||
%30 = OpFunctionCall %void %atomicCompareExchangeWeak_b2ab2c
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicExchange_0a5dca(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -25,7 +25,6 @@
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%17 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicExchange_0a5dca = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -36,15 +35,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%21 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %21 %24 %23
|
||||
%24 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %atomicExchange_0a5dca
|
||||
%24 = OpFunctionCall %void %atomicExchange_0a5dca
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicExchange_e114ba(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 31
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -26,7 +26,6 @@
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%18 = OpConstantNull %int
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicExchange_e114ba = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -37,15 +36,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %21 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%30 = OpFunctionCall %void %atomicExchange_e114ba
|
||||
%25 = OpFunctionCall %void %atomicExchange_e114ba
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicLoad_361bf1(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 29
|
||||
; Bound: 24
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -24,7 +24,6 @@
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%16 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicLoad_361bf1 = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -35,15 +34,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%18 = OpLabel
|
||||
%19 = OpLoad %uint %tint_symbol
|
||||
%20 = OpIEqual %bool %19 %uint_0
|
||||
OpSelectionMerge %22 None
|
||||
OpBranchConditional %20 %23 %22
|
||||
%23 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %16
|
||||
OpBranch %22
|
||||
%22 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%28 = OpFunctionCall %void %atomicLoad_361bf1
|
||||
%23 = OpFunctionCall %void %atomicLoad_361bf1
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicLoad_afcc03(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -25,7 +25,6 @@
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%17 = OpConstantNull %int
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicLoad_afcc03 = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -36,15 +35,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%21 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %21 %24 %23
|
||||
%24 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %atomicLoad_afcc03
|
||||
%24 = OpFunctionCall %void %atomicLoad_afcc03
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicMax_a89cc3(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 31
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -26,7 +26,6 @@
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%18 = OpConstantNull %int
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicMax_a89cc3 = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -37,15 +36,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %21 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%30 = OpFunctionCall %void %atomicMax_a89cc3
|
||||
%25 = OpFunctionCall %void %atomicMax_a89cc3
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicMax_beccfc(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -25,7 +25,6 @@
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%17 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicMax_beccfc = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -36,15 +35,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%21 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %21 %24 %23
|
||||
%24 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %atomicMax_beccfc
|
||||
%24 = OpFunctionCall %void %atomicMax_beccfc
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicMin_278235(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 31
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -26,7 +26,6 @@
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%18 = OpConstantNull %int
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicMin_278235 = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -37,15 +36,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %21 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%30 = OpFunctionCall %void %atomicMin_278235
|
||||
%25 = OpFunctionCall %void %atomicMin_278235
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicMin_69d383(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -25,7 +25,6 @@
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%17 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicMin_69d383 = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -36,15 +35,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%21 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %21 %24 %23
|
||||
%24 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %atomicMin_69d383
|
||||
%24 = OpFunctionCall %void %atomicMin_69d383
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicOr_5e3d61(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -25,7 +25,6 @@
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%17 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicOr_5e3d61 = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -36,15 +35,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%21 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %21 %24 %23
|
||||
%24 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %atomicOr_5e3d61
|
||||
%24 = OpFunctionCall %void %atomicOr_5e3d61
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicOr_d09248(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 31
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -26,7 +26,6 @@
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%18 = OpConstantNull %int
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicOr_d09248 = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -37,15 +36,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %21 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%30 = OpFunctionCall %void %atomicOr_d09248
|
||||
%25 = OpFunctionCall %void %atomicOr_d09248
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -12,7 +12,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicStore_726882(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 28
|
||||
; Bound: 23
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -22,8 +22,7 @@
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%bool = OpTypeBool
|
||||
%24 = OpConstantNull %uint
|
||||
%19 = OpConstantNull %uint
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicStore_726882 = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -32,15 +31,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%16 = OpLabel
|
||||
%17 = OpLoad %uint %tint_symbol
|
||||
%18 = OpIEqual %bool %17 %uint_0
|
||||
OpSelectionMerge %20 None
|
||||
OpBranchConditional %18 %21 %20
|
||||
%21 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %24
|
||||
OpBranch %20
|
||||
%20 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %19
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%27 = OpFunctionCall %void %atomicStore_726882
|
||||
%22 = OpFunctionCall %void %atomicStore_726882
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -12,7 +12,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicStore_8bea94(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 29
|
||||
; Bound: 24
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -23,8 +23,7 @@
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%int_1 = OpConstant %int 1
|
||||
%bool = OpTypeBool
|
||||
%25 = OpConstantNull %int
|
||||
%20 = OpConstantNull %int
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicStore_8bea94 = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -33,15 +32,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%17 = OpLabel
|
||||
%18 = OpLoad %uint %tint_symbol
|
||||
%19 = OpIEqual %bool %18 %uint_0
|
||||
OpSelectionMerge %21 None
|
||||
OpBranchConditional %19 %22 %21
|
||||
%22 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %25
|
||||
OpBranch %21
|
||||
%21 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %20
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%28 = OpFunctionCall %void %atomicStore_8bea94
|
||||
%23 = OpFunctionCall %void %atomicStore_8bea94
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
int atomic_result_1 = 0;
|
||||
InterlockedExchange(arg_0, 0, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicXor_75dc95(threadgroup atomic_int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), int(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 31
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -26,7 +26,6 @@
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%18 = OpConstantNull %int
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicXor_75dc95 = OpFunction %void None %7
|
||||
%10 = OpLabel
|
||||
@ -37,15 +36,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %21 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %18
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%30 = OpFunctionCall %void %atomicXor_75dc95
|
||||
%25 = OpFunctionCall %void %atomicXor_75dc95
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -13,7 +13,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
uint atomic_result_1 = 0u;
|
||||
InterlockedExchange(arg_0, 0u, atomic_result_1);
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ void atomicXor_c8e6be(threadgroup atomic_uint* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup atomic_uint tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
atomic_store_explicit(&(tint_symbol_2), uint(), memory_order_relaxed);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -1,11 +1,11 @@
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_0 "arg_0"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -25,7 +25,6 @@
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%17 = OpConstantNull %uint
|
||||
%bool = OpTypeBool
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%atomicXor_c8e6be = OpFunction %void None %6
|
||||
%9 = OpLabel
|
||||
@ -36,15 +35,8 @@
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %6
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%21 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %21 %24 %23
|
||||
%24 = OpLabel
|
||||
OpAtomicStore %arg_0 %uint_2 %uint_0 %17
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %atomicXor_c8e6be
|
||||
%24 = OpFunctionCall %void %atomicXor_c8e6be
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -22,7 +22,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
arg_1 = 0;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -19,7 +19,7 @@ void frexp_0da285(threadgroup int* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup int tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_2 = int();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -5,12 +5,12 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 32
|
||||
; Bound: 26
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%13 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_1 "arg_1"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -30,9 +30,7 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
|
||||
%float_1 = OpConstant %float 1
|
||||
%_ptr_Function_float = OpTypePointer Function %float
|
||||
%18 = OpConstantNull %float
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%bool = OpTypeBool
|
||||
%27 = OpConstantNull %int
|
||||
%21 = OpConstantNull %int
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%frexp_0da285 = OpFunction %void None %7
|
||||
@ -44,15 +42,8 @@ intrinsics/gen/frexp/0da285.wgsl:29:18 warning: use of deprecated intrinsic
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %7
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %uint %tint_symbol
|
||||
%23 = OpIEqual %bool %21 %uint_0
|
||||
OpSelectionMerge %25 None
|
||||
OpBranchConditional %23 %26 %25
|
||||
%26 = OpLabel
|
||||
OpStore %arg_1 %27
|
||||
OpBranch %25
|
||||
%25 = OpLabel
|
||||
OpStore %arg_1 %21
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%31 = OpFunctionCall %void %frexp_0da285
|
||||
%25 = OpFunctionCall %void %frexp_0da285
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -22,7 +22,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
arg_1 = int3(0, 0, 0);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -19,7 +19,7 @@ void frexp_40fc9b(threadgroup int3* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup int3 tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_2 = int3();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -5,12 +5,12 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 33
|
||||
; Bound: 27
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%15 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_1 "arg_1"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -31,9 +31,7 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
%v3float = OpTypeVector %float 3
|
||||
%16 = OpConstantNull %v3float
|
||||
%_ptr_Function_v3float = OpTypePointer Function %v3float
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%bool = OpTypeBool
|
||||
%28 = OpConstantNull %v3int
|
||||
%22 = OpConstantNull %v3int
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%frexp_40fc9b = OpFunction %void None %8
|
||||
@ -45,15 +43,8 @@ intrinsics/gen/frexp/40fc9b.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %8
|
||||
%21 = OpLabel
|
||||
%22 = OpLoad %uint %tint_symbol
|
||||
%24 = OpIEqual %bool %22 %uint_0
|
||||
OpSelectionMerge %26 None
|
||||
OpBranchConditional %24 %27 %26
|
||||
%27 = OpLabel
|
||||
OpStore %arg_1 %28
|
||||
OpBranch %26
|
||||
%26 = OpLabel
|
||||
OpStore %arg_1 %22
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%32 = OpFunctionCall %void %frexp_40fc9b
|
||||
%26 = OpFunctionCall %void %frexp_40fc9b
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -22,7 +22,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
arg_1 = int2(0, 0);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -19,7 +19,7 @@ void frexp_a3f940(threadgroup int2* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup int2 tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_2 = int2();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -5,12 +5,12 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 33
|
||||
; Bound: 27
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%15 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_1 "arg_1"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -31,9 +31,7 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
%v2float = OpTypeVector %float 2
|
||||
%16 = OpConstantNull %v2float
|
||||
%_ptr_Function_v2float = OpTypePointer Function %v2float
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%bool = OpTypeBool
|
||||
%28 = OpConstantNull %v2int
|
||||
%22 = OpConstantNull %v2int
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%frexp_a3f940 = OpFunction %void None %8
|
||||
@ -45,15 +43,8 @@ intrinsics/gen/frexp/a3f940.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %8
|
||||
%21 = OpLabel
|
||||
%22 = OpLoad %uint %tint_symbol
|
||||
%24 = OpIEqual %bool %22 %uint_0
|
||||
OpSelectionMerge %26 None
|
||||
OpBranchConditional %24 %27 %26
|
||||
%27 = OpLabel
|
||||
OpStore %arg_1 %28
|
||||
OpBranch %26
|
||||
%26 = OpLabel
|
||||
OpStore %arg_1 %22
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%32 = OpFunctionCall %void %frexp_a3f940
|
||||
%26 = OpFunctionCall %void %frexp_a3f940
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -22,7 +22,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
arg_1 = int4(0, 0, 0, 0);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -19,7 +19,7 @@ void frexp_b87f4e(threadgroup int4* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup int4 tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_2 = int4();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -5,12 +5,12 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 33
|
||||
; Bound: 27
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%15 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_1 "arg_1"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -31,9 +31,7 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
%v4float = OpTypeVector %float 4
|
||||
%16 = OpConstantNull %v4float
|
||||
%_ptr_Function_v4float = OpTypePointer Function %v4float
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%bool = OpTypeBool
|
||||
%28 = OpConstantNull %v4int
|
||||
%22 = OpConstantNull %v4int
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%frexp_b87f4e = OpFunction %void None %8
|
||||
@ -45,15 +43,8 @@ intrinsics/gen/frexp/b87f4e.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %8
|
||||
%21 = OpLabel
|
||||
%22 = OpLoad %uint %tint_symbol
|
||||
%24 = OpIEqual %bool %22 %uint_0
|
||||
OpSelectionMerge %26 None
|
||||
OpBranchConditional %24 %27 %26
|
||||
%27 = OpLabel
|
||||
OpStore %arg_1 %28
|
||||
OpBranch %26
|
||||
%26 = OpLabel
|
||||
OpStore %arg_1 %22
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%32 = OpFunctionCall %void %frexp_b87f4e
|
||||
%26 = OpFunctionCall %void %frexp_b87f4e
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -15,7 +15,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
arg_1 = float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -19,7 +19,7 @@ void modf_1d59e5(threadgroup float4* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup float4 tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_2 = float4();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -5,12 +5,12 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 24
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%13 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_1 "arg_1"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -29,8 +29,6 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
%8 = OpTypeFunction %void
|
||||
%14 = OpConstantNull %v4float
|
||||
%_ptr_Function_v4float = OpTypePointer Function %v4float
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%bool = OpTypeBool
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%modf_1d59e5 = OpFunction %void None %8
|
||||
@ -42,15 +40,8 @@ intrinsics/gen/modf/1d59e5.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %8
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpStore %arg_1 %14
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %modf_1d59e5
|
||||
%23 = OpFunctionCall %void %modf_1d59e5
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -15,7 +15,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
arg_1 = float2(0.0f, 0.0f);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -19,7 +19,7 @@ void modf_a128ab(threadgroup float2* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup float2 tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_2 = float2();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
@ -5,12 +5,12 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 30
|
||||
; Bound: 24
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%13 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %compute_main "compute_main" %tint_symbol
|
||||
OpEntryPoint GLCompute %compute_main "compute_main"
|
||||
OpExecutionMode %compute_main LocalSize 1 1 1
|
||||
OpName %arg_1 "arg_1"
|
||||
OpName %tint_symbol "tint_symbol"
|
||||
@ -29,8 +29,6 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
%8 = OpTypeFunction %void
|
||||
%14 = OpConstantNull %v2float
|
||||
%_ptr_Function_v2float = OpTypePointer Function %v2float
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%bool = OpTypeBool
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_264 = OpConstant %uint 264
|
||||
%modf_a128ab = OpFunction %void None %8
|
||||
@ -42,15 +40,8 @@ intrinsics/gen/modf/a128ab.wgsl:29:24 warning: use of deprecated intrinsic
|
||||
OpFunctionEnd
|
||||
%compute_main = OpFunction %void None %8
|
||||
%19 = OpLabel
|
||||
%20 = OpLoad %uint %tint_symbol
|
||||
%22 = OpIEqual %bool %20 %uint_0
|
||||
OpSelectionMerge %24 None
|
||||
OpBranchConditional %22 %25 %24
|
||||
%25 = OpLabel
|
||||
OpStore %arg_1 %14
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
OpControlBarrier %uint_2 %uint_2 %uint_264
|
||||
%29 = OpFunctionCall %void %modf_a128ab
|
||||
%23 = OpFunctionCall %void %modf_a128ab
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
@ -15,7 +15,7 @@ struct tint_symbol_1 {
|
||||
[numthreads(1, 1, 1)]
|
||||
void compute_main(tint_symbol_1 tint_symbol) {
|
||||
const uint local_invocation_index = tint_symbol.local_invocation_index;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
arg_1 = float3(0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -19,7 +19,7 @@ void modf_bb9088(threadgroup float3* const tint_symbol_1) {
|
||||
|
||||
kernel void compute_main(uint local_invocation_index [[thread_index_in_threadgroup]]) {
|
||||
threadgroup float3 tint_symbol_2;
|
||||
if ((local_invocation_index == 0u)) {
|
||||
{
|
||||
tint_symbol_2 = float3();
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user