Tint: Implement f16 in uniform and storage address space

This CL implements f16 in uniform and storage address space, allowing
using f16 types in uniform and storage buffers on all backends. Tint
uint tests and Dawn E2E tests are added to validate the f16 types work
as expected.

Bug: tint:1473, tint:1502
Change-Id: I15e3de1033d3727f2ea33f4657f682c5f13c2153
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106320
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
This commit is contained in:
Zhaoming Jiang
2022-11-24 05:25:35 +00:00
committed by Dawn LUCI CQ
parent ff2b5e441c
commit ab9b5f3aa5
73 changed files with 15168 additions and 5830 deletions

View File

@@ -153,6 +153,10 @@ bool IntrinsicDataTypeFor(const sem::Type* ty, DecomposeMemoryAccess::Intrinsic:
out = DecomposeMemoryAccess::Intrinsic::DataType::kF32;
return true;
}
if (ty->Is<sem::F16>()) {
out = DecomposeMemoryAccess::Intrinsic::DataType::kF16;
return true;
}
if (auto* vec = ty->As<sem::Vector>()) {
switch (vec->Width()) {
case 2:
@@ -168,6 +172,10 @@ bool IntrinsicDataTypeFor(const sem::Type* ty, DecomposeMemoryAccess::Intrinsic:
out = DecomposeMemoryAccess::Intrinsic::DataType::kVec2F32;
return true;
}
if (vec->type()->Is<sem::F16>()) {
out = DecomposeMemoryAccess::Intrinsic::DataType::kVec2F16;
return true;
}
break;
case 3:
if (vec->type()->Is<sem::I32>()) {
@@ -182,6 +190,10 @@ bool IntrinsicDataTypeFor(const sem::Type* ty, DecomposeMemoryAccess::Intrinsic:
out = DecomposeMemoryAccess::Intrinsic::DataType::kVec3F32;
return true;
}
if (vec->type()->Is<sem::F16>()) {
out = DecomposeMemoryAccess::Intrinsic::DataType::kVec3F16;
return true;
}
break;
case 4:
if (vec->type()->Is<sem::I32>()) {
@@ -196,6 +208,10 @@ bool IntrinsicDataTypeFor(const sem::Type* ty, DecomposeMemoryAccess::Intrinsic:
out = DecomposeMemoryAccess::Intrinsic::DataType::kVec4F32;
return true;
}
if (vec->type()->Is<sem::F16>()) {
out = DecomposeMemoryAccess::Intrinsic::DataType::kVec4F16;
return true;
}
break;
}
return false;
@@ -776,6 +792,9 @@ std::string DecomposeMemoryAccess::Intrinsic::InternalName() const {
case DataType::kI32:
ss << "i32";
break;
case DataType::kF16:
ss << "f16";
break;
case DataType::kVec2U32:
ss << "vec2_u32";
break;
@@ -785,6 +804,9 @@ std::string DecomposeMemoryAccess::Intrinsic::InternalName() const {
case DataType::kVec2I32:
ss << "vec2_i32";
break;
case DataType::kVec2F16:
ss << "vec2_f16";
break;
case DataType::kVec3U32:
ss << "vec3_u32";
break;
@@ -794,6 +816,9 @@ std::string DecomposeMemoryAccess::Intrinsic::InternalName() const {
case DataType::kVec3I32:
ss << "vec3_i32";
break;
case DataType::kVec3F16:
ss << "vec3_f16";
break;
case DataType::kVec4U32:
ss << "vec4_u32";
break;
@@ -803,6 +828,9 @@ std::string DecomposeMemoryAccess::Intrinsic::InternalName() const {
case DataType::kVec4I32:
ss << "vec4_i32";
break;
case DataType::kVec4F16:
ss << "vec4_f16";
break;
}
return ss.str();
}

View File

@@ -60,15 +60,19 @@ class DecomposeMemoryAccess final : public Castable<DecomposeMemoryAccess, Trans
kU32,
kF32,
kI32,
kF16,
kVec2U32,
kVec2F32,
kVec2I32,
kVec2F16,
kVec3U32,
kVec3F32,
kVec3I32,
kVec3F16,
kVec4U32,
kVec4F32,
kVec4I32,
kVec4F16,
};
/// Constructor

File diff suppressed because it is too large Load Diff

View File

@@ -265,8 +265,8 @@ struct Std140::State {
};
/// @returns true if the given matrix needs decomposing to column vectors for std140 layout.
/// TODO(crbug.com/tint/1502): This may need adjusting for `f16` matrices.
static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() == 8; }
/// Std140 layout require matrix stride to be 16, otherwise decomposing is needed.
static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() != 16; }
/// ForkTypes walks the user-declared types in dependency order, forking structures that are
/// used as uniform buffers which (transitively) use matrices that need std140 decomposition to
@@ -474,7 +474,7 @@ struct Std140::State {
// natural size for the matrix. This extra padding needs to be
// applied to the last column vector.
attributes.Push(
b.MemberSize(AInt(size - mat->ColumnType()->Size() * (num_columns - 1))));
b.MemberSize(AInt(size - mat->ColumnType()->Align() * (num_columns - 1))));
}
// Build the member
@@ -645,7 +645,8 @@ struct Std140::State {
return "mat" + std::to_string(mat->columns()) + "x" + std::to_string(mat->rows()) +
"_" + ConvertSuffix(mat->type());
},
[&](const sem::F32*) { return "f32"; },
[&](const sem::F32*) { return "f32"; }, //
[&](const sem::F16*) { return "f16"; },
[&](Default) {
TINT_ICE(Transform, b.Diagnostics())
<< "unhandled type for conversion name: " << src->FriendlyName(ty);

View File

@@ -20,11 +20,12 @@
namespace tint::transform {
/// Std140 is a transform that forks types used in the uniform address space that contain
/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors. Types that transitively use these
/// forked types are also forked. `var<uniform>` variables will use these forked types, and
/// expressions loading from these variables will do appropriate conversions to the regular WGSL
/// types. As `matNx2<f32>` matrices are the only type that violate std140-layout, this
/// transformation is sufficient to have any WGSL structure be std140-layout conformant.
/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors, and `matNxM<f16>` matrices into
/// `N`x`vecM<f16>` column vectors. Types that transitively use these forked types are also forked.
/// `var<uniform>` variables will use these forked types, and expressions loading from these
/// variables will do appropriate conversions to the regular WGSL types. As `matNx2<f32>` and
/// `matNxM<f16>` matrices are the only type that violate std140-layout, this transformation is
/// sufficient to have any WGSL structure be std140-layout conformant.
///
/// @note This transform requires the PromoteSideEffectsToDecl transform to have been run first.
class Std140 final : public Castable<Std140, Transform> {

View File

@@ -2838,6 +2838,15 @@ INSTANTIATE_TEST_SUITE_P(,
{4, 2, MatrixType::f32},
{4, 3, MatrixType::f32},
{4, 4, MatrixType::f32},
{2, 2, MatrixType::f16},
{2, 3, MatrixType::f16},
{2, 4, MatrixType::f16},
{3, 2, MatrixType::f16},
{3, 3, MatrixType::f16},
{3, 4, MatrixType::f16},
{4, 2, MatrixType::f16},
{4, 3, MatrixType::f16},
{4, 4, MatrixType::f16},
}));
using Std140Test_MatrixArray = TransformTestWithParam<MatrixCase>;
@@ -4866,6 +4875,15 @@ INSTANTIATE_TEST_SUITE_P(,
{4, 2, MatrixType::f32},
{4, 3, MatrixType::f32},
{4, 4, MatrixType::f32},
{2, 2, MatrixType::f16},
{2, 3, MatrixType::f16},
{2, 4, MatrixType::f16},
{3, 2, MatrixType::f16},
{3, 3, MatrixType::f16},
{3, 4, MatrixType::f16},
{4, 2, MatrixType::f16},
{4, 3, MatrixType::f16},
{4, 4, MatrixType::f16},
}));
} // namespace

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff