writer/hlsl: Emit helper functions for storage class atomic intrinsics

By generating a helper function for these, we can keep the atomic expression pre-statement-free. This can help prevent for-loops from being transformed into while loops, which can upset FXC.

We can't do the same for workgroup storage atomics, as the InterlockedXXX() methods have the workgroup-storage expression as the first argument, and I'm not aware of any way to make a user-declared parameter be `groupshared`.

Change-Id: I8669127a58dc9cae95ce316523029064b5c9b5fa
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/57462
Commit-Queue: James Price <jrprice@google.com>
Auto-Submit: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: James Price <jrprice@google.com>
This commit is contained in:
Ben Clayton
2021-07-09 16:50:14 +00:00
committed by Tint LUCI CQ
parent e28591101c
commit e027e81bf2
23 changed files with 372 additions and 237 deletions

View File

@@ -171,6 +171,10 @@ bool GeneratorImpl::Generate() {
}
}
if (!helpers_.lines.empty()) {
current_buffer_->Insert(helpers_, 0, 0);
}
return true;
}
@@ -845,7 +849,7 @@ bool GeneratorImpl::EmitStorageBufferAccess(
case Op::kAtomicXor:
case Op::kAtomicExchange:
case Op::kAtomicCompareExchangeWeak:
return EmitStorageAtomicCall(out, expr, intrinsic->op);
return EmitStorageAtomicCall(out, expr, intrinsic);
}
TINT_UNREACHABLE(Writer, diagnostics_)
@@ -857,188 +861,217 @@ bool GeneratorImpl::EmitStorageBufferAccess(
bool GeneratorImpl::EmitStorageAtomicCall(
std::ostream& out,
ast::CallExpression* expr,
transform::DecomposeMemoryAccess::Intrinsic::Op op) {
const transform::DecomposeMemoryAccess::Intrinsic* intrinsic) {
using Op = transform::DecomposeMemoryAccess::Intrinsic::Op;
std::string result = UniqueIdentifier("atomic_result");
auto* result_ty = TypeOf(expr);
if (!result_ty->Is<sem::Void>()) {
auto pre = line();
if (!EmitTypeAndName(pre, TypeOf(expr), ast::StorageClass::kNone,
ast::Access::kUndefined, result)) {
return false;
}
pre << " = ";
if (!EmitZeroValue(pre, result_ty)) {
return false;
}
pre << ";";
}
auto* buffer = expr->params()[0];
auto* offset = expr->params()[1];
auto& buf = helpers_;
auto call_buffer_method = [&](const char* name) {
// First two arguments to the DecomposeMemoryAccess::Intrinsic are the
// buffer and offset. The buffer is the moved to the LHS of the '.', and the
// offset becomes the first argument. The rest of the method's arguments are
// the same.
auto pre = line();
if (!EmitExpression(pre, buffer)) {
return false;
}
pre << "." << name;
{
ScopedParen sp(pre);
if (!EmitExpression(pre, offset)) {
return false;
}
for (size_t i = 2; i < expr->params().size(); i++) {
auto* arg = expr->params()[i];
pre << ", ";
if (!EmitExpression(pre, arg)) {
return false;
// generate_helper() generates a helper function that translates the
// DecomposeMemoryAccess::Intrinsic call into the corresponding HLSL
// atomic intrinsic function.
auto generate_helper = [&]() -> std::string {
auto rmw = [&](const char* wgsl, const char* hlsl) -> std::string {
auto name = UniqueIdentifier(wgsl);
{
auto fn = line(&buf);
if (!EmitTypeAndName(fn, result_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, name)) {
return "";
}
fn << "(RWByteAddressBuffer buffer, uint offset, ";
if (!EmitTypeAndName(fn, result_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, "value")) {
return "";
}
fn << ") {";
}
pre << ", " << result;
}
pre << ";";
buf.IncrementIndent();
TINT_DEFER({
buf.DecrementIndent();
line(&buf) << "}";
line(&buf);
});
out << result;
return true;
{
auto l = line(&buf);
if (!EmitTypeAndName(l, result_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, "original_value")) {
return "";
}
l << " = 0;";
}
line(&buf) << "buffer." << hlsl << "(offset, value, original_value);";
line(&buf) << "return original_value;";
return name;
};
switch (intrinsic->op) {
case Op::kAtomicAdd:
return rmw("atomicAdd", "InterlockedAdd");
case Op::kAtomicMax:
return rmw("atomicMax", "InterlockedMax");
case Op::kAtomicMin:
return rmw("atomicMin", "InterlockedMin");
case Op::kAtomicAnd:
return rmw("atomicAnd", "InterlockedAnd");
case Op::kAtomicOr:
return rmw("atomicOr", "InterlockedOr");
case Op::kAtomicXor:
return rmw("atomicXor", "InterlockedXor");
case Op::kAtomicExchange:
return rmw("atomicExchange", "InterlockedExchange");
case Op::kAtomicLoad: {
// HLSL does not have an InterlockedLoad, so we emulate it with
// InterlockedOr using 0 as the OR value
auto name = UniqueIdentifier("atomicLoad");
{
auto fn = line(&buf);
if (!EmitTypeAndName(fn, result_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, name)) {
return "";
}
fn << "(RWByteAddressBuffer buffer, uint offset) {";
}
buf.IncrementIndent();
TINT_DEFER({
buf.DecrementIndent();
line(&buf) << "}";
line(&buf);
});
{
auto l = line(&buf);
if (!EmitTypeAndName(l, result_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, "value")) {
return "";
}
l << " = 0;";
}
line(&buf) << "buffer.InterlockedOr(offset, 0, value);";
line(&buf) << "return value;";
return name;
}
case Op::kAtomicStore: {
// HLSL does not have an InterlockedStore, so we emulate it with
// InterlockedExchange and discard the returned value
auto* value_ty = TypeOf(expr->params()[2]);
auto name = UniqueIdentifier("atomicStore");
{
auto fn = line(&buf);
fn << "void " << name << "(RWByteAddressBuffer buffer, uint offset, ";
if (!EmitTypeAndName(fn, value_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, "value")) {
return "";
}
fn << ") {";
}
buf.IncrementIndent();
TINT_DEFER({
buf.DecrementIndent();
line(&buf) << "}";
line(&buf);
});
{
auto l = line(&buf);
if (!EmitTypeAndName(l, value_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, "ignored")) {
return "";
}
l << ";";
}
line(&buf) << "buffer.InterlockedExchange(offset, value, ignored);";
return name;
}
case Op::kAtomicCompareExchangeWeak: {
auto* value_ty = TypeOf(expr->params()[2]);
auto name = UniqueIdentifier("atomicCompareExchangeWeak");
{
auto fn = line(&buf);
if (!EmitTypeAndName(fn, result_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, name)) {
return "";
}
fn << "(RWByteAddressBuffer buffer, uint offset, ";
if (!EmitTypeAndName(fn, value_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, "compare")) {
return "";
}
fn << ", ";
if (!EmitTypeAndName(fn, value_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, "value")) {
return "";
}
fn << ") {";
}
buf.IncrementIndent();
TINT_DEFER({
buf.DecrementIndent();
line(&buf) << "}";
line(&buf);
});
{ // T result = {0, 0};
auto l = line(&buf);
if (!EmitTypeAndName(l, result_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, "result")) {
return "";
}
l << " = {0, 0};";
}
line(&buf) << "buffer.InterlockedCompareExchange(offset, compare, "
"value, result.x);";
line(&buf) << "result.y = result.x == compare;";
line(&buf) << "return result;";
return name;
}
default:
break;
}
TINT_UNREACHABLE(Writer, diagnostics_)
<< "unsupported atomic DecomposeMemoryAccess::Intrinsic::Op: "
<< static_cast<int>(intrinsic->op);
return "";
};
switch (op) {
case Op::kAtomicLoad: {
// HLSL does not have an InterlockedLoad, so we emulate it with
// InterlockedOr using 0 as the OR value
auto pre = line();
if (!EmitExpression(pre, buffer)) {
return false;
}
pre << ".InterlockedOr";
{
ScopedParen sp(pre);
if (!EmitExpression(pre, offset)) {
return false;
}
pre << ", 0, " << result;
}
pre << ";";
out << result;
return true;
}
case Op::kAtomicStore: {
// HLSL does not have an InterlockedStore, so we emulate it with
// InterlockedExchange and discard the returned value
auto pre = line();
auto* value = expr->params()[2];
auto* value_ty = TypeOf(value);
if (!EmitTypeAndName(pre, value_ty, ast::StorageClass::kNone,
ast::Access::kUndefined, result)) {
return false;
}
pre << " = ";
if (!EmitZeroValue(pre, value_ty)) {
return false;
}
pre << ";";
if (!EmitExpression(out, buffer)) {
return false;
}
out << ".InterlockedExchange";
{
ScopedParen sp(out);
if (!EmitExpression(out, offset)) {
return false;
}
out << ", ";
if (!EmitExpression(out, value)) {
return false;
}
out << ", " << result;
}
return true;
}
case Op::kAtomicCompareExchangeWeak: {
auto* compare_value = expr->params()[2];
auto* value = expr->params()[3];
std::string compare = UniqueIdentifier("atomic_compare_value");
{ // T atomic_compare_value = compare_value;
auto pre = line();
if (!EmitTypeAndName(pre, TypeOf(compare_value),
ast::StorageClass::kNone, ast::Access::kUndefined,
compare)) {
return false;
}
pre << " = ";
if (!EmitExpression(pre, compare_value)) {
return false;
}
pre << ";";
}
{ // buffer.InterlockedCompareExchange(offset, compare, value, result.x);
auto pre = line();
if (!EmitExpression(pre, buffer)) {
return false;
}
pre << ".InterlockedCompareExchange";
{
ScopedParen sp(pre);
if (!EmitExpression(pre, offset)) {
return false;
}
pre << ", " << compare << ", ";
if (!EmitExpression(pre, value)) {
return false;
}
pre << ", " << result << ".x";
}
pre << ";";
}
{ // result.y = result.x == compare;
line() << result << ".y = " << result << ".x == " << compare << ";";
}
out << result;
return true;
}
case Op::kAtomicAdd:
return call_buffer_method("InterlockedAdd");
case Op::kAtomicMax:
return call_buffer_method("InterlockedMax");
case Op::kAtomicMin:
return call_buffer_method("InterlockedMin");
case Op::kAtomicAnd:
return call_buffer_method("InterlockedAnd");
case Op::kAtomicOr:
return call_buffer_method("InterlockedOr");
case Op::kAtomicXor:
return call_buffer_method("InterlockedXor");
case Op::kAtomicExchange:
return call_buffer_method("InterlockedExchange");
default:
break;
auto func = utils::GetOrCreate(dma_intrinsics_,
DMAIntrinsic{intrinsic->op, intrinsic->type},
generate_helper);
if (func.empty()) {
return false;
}
TINT_UNREACHABLE(Writer, diagnostics_)
<< "unsupported atomic DecomposeMemoryAccess::Intrinsic::Op: "
<< static_cast<int>(op);
return false;
out << func;
{
ScopedParen sp(out);
bool first = true;
for (auto* arg : expr->params()) {
if (!first) {
out << ", ";
}
first = false;
if (!EmitExpression(out, arg)) {
return false;
}
}
}
return true;
}
bool GeneratorImpl::EmitWorkgroupAtomicCall(std::ostream& out,

View File

@@ -34,6 +34,7 @@
#include "src/program_builder.h"
#include "src/scope_stack.h"
#include "src/transform/decompose_memory_access.h"
#include "src/utils/hash.h"
#include "src/writer/text_generator.h"
namespace tint {
@@ -126,12 +127,12 @@ class GeneratorImpl : public TextGenerator {
/// Handles generating an atomic intrinsic call for a storage buffer variable
/// @param out the output of the expression stream
/// @param expr the call expression
/// @param op the atomic op
/// @param intrinsic the atomic intrinsic
/// @returns true if the call expression is emitted
bool EmitStorageAtomicCall(
std::ostream& out,
ast::CallExpression* expr,
transform::DecomposeMemoryAccess::Intrinsic::Op op);
const transform::DecomposeMemoryAccess::Intrinsic* intrinsic);
/// Handles generating an atomic intrinsic call for a workgroup variable
/// @param out the output of the expression stream
/// @param expr the call expression
@@ -389,9 +390,28 @@ class GeneratorImpl : public TextGenerator {
std::string var_name;
};
struct DMAIntrinsic {
transform::DecomposeMemoryAccess::Intrinsic::Op op;
transform::DecomposeMemoryAccess::Intrinsic::DataType type;
bool operator==(const DMAIntrinsic& rhs) const {
return op == rhs.op && type == rhs.type;
}
/// Hasher is a std::hash function for DMAIntrinsic
struct Hasher {
/// @param i the DMAIntrinsic to hash
/// @returns the hash of `i`
inline std::size_t operator()(const DMAIntrinsic& i) const {
return utils::Hash(i.op, i.type);
}
};
};
std::string get_buffer_name(ast::Expression* expr);
TextBuffer helpers_; // Helper functions emitted at the top of the output
std::function<bool()> emit_continuing_;
std::unordered_map<DMAIntrinsic, std::string, DMAIntrinsic::Hasher>
dma_intrinsics_;
std::unordered_map<const sem::Struct*, std::string> structure_builders_;
std::unordered_map<const ast::AssignmentStatement*, const sem::Vector*>
vector_assignments_in_loops_;