Tint: Implement f16 in uniform and storage address space

This CL implements f16 in uniform and storage address space, allowing using f16 types in uniform and storage buffers on all backends. Tint uint tests and Dawn E2E tests are added to validate the f16 types work as expected. Bug: tint:1473, tint:1502 Change-Id: I15e3de1033d3727f2ea33f4657f682c5f13c2153 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106320 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com> Reviewed-by: Ben Clayton <bclayton@google.com>
2025-12-21 10:49:14 +00:00 · 2022-11-24 05:25:35 +00:00
parent ff2b5e441c
commit ab9b5f3aa5
73 changed files with 15168 additions and 5830 deletions
--- a/src/tint/transform/decompose_memory_access.cc
+++ b/src/tint/transform/decompose_memory_access.cc
@@ -153,6 +153,10 @@ bool IntrinsicDataTypeFor(const sem::Type* ty, DecomposeMemoryAccess::Intrinsic:
        out = DecomposeMemoryAccess::Intrinsic::DataType::kF32;
        return true;
    }
+    if (ty->Is<sem::F16>()) {
+        out = DecomposeMemoryAccess::Intrinsic::DataType::kF16;
+        return true;
+    }
    if (auto* vec = ty->As<sem::Vector>()) {
        switch (vec->Width()) {
            case 2:
@@ -168,6 +172,10 @@ bool IntrinsicDataTypeFor(const sem::Type* ty, DecomposeMemoryAccess::Intrinsic:
                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec2F32;
                    return true;
                }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec2F16;
+                    return true;
+                }
                break;
            case 3:
                if (vec->type()->Is<sem::I32>()) {
@@ -182,6 +190,10 @@ bool IntrinsicDataTypeFor(const sem::Type* ty, DecomposeMemoryAccess::Intrinsic:
                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec3F32;
                    return true;
                }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec3F16;
+                    return true;
+                }
                break;
            case 4:
                if (vec->type()->Is<sem::I32>()) {
@@ -196,6 +208,10 @@ bool IntrinsicDataTypeFor(const sem::Type* ty, DecomposeMemoryAccess::Intrinsic:
                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec4F32;
                    return true;
                }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec4F16;
+                    return true;
+                }
                break;
        }
        return false;
@@ -776,6 +792,9 @@ std::string DecomposeMemoryAccess::Intrinsic::InternalName() const {
        case DataType::kI32:
            ss << "i32";
            break;
+        case DataType::kF16:
+            ss << "f16";
+            break;
        case DataType::kVec2U32:
            ss << "vec2_u32";
            break;
@@ -785,6 +804,9 @@ std::string DecomposeMemoryAccess::Intrinsic::InternalName() const {
        case DataType::kVec2I32:
            ss << "vec2_i32";
            break;
+        case DataType::kVec2F16:
+            ss << "vec2_f16";
+            break;
        case DataType::kVec3U32:
            ss << "vec3_u32";
            break;
@@ -794,6 +816,9 @@ std::string DecomposeMemoryAccess::Intrinsic::InternalName() const {
        case DataType::kVec3I32:
            ss << "vec3_i32";
            break;
+        case DataType::kVec3F16:
+            ss << "vec3_f16";
+            break;
        case DataType::kVec4U32:
            ss << "vec4_u32";
            break;
@@ -803,6 +828,9 @@ std::string DecomposeMemoryAccess::Intrinsic::InternalName() const {
        case DataType::kVec4I32:
            ss << "vec4_i32";
            break;
+        case DataType::kVec4F16:
+            ss << "vec4_f16";
+            break;
    }
    return ss.str();
 }
--- a/src/tint/transform/decompose_memory_access.h
+++ b/src/tint/transform/decompose_memory_access.h
@@ -60,15 +60,19 @@ class DecomposeMemoryAccess final : public Castable<DecomposeMemoryAccess, Trans
            kU32,
            kF32,
            kI32,
+            kF16,
            kVec2U32,
            kVec2F32,
            kVec2I32,
+            kVec2F16,
            kVec3U32,
            kVec3F32,
            kVec3I32,
+            kVec3F16,
            kVec4U32,
            kVec4F32,
            kVec4I32,
+            kVec4F16,
        };

        /// Constructor
--- a/src/tint/transform/decompose_memory_access_test.cc
+++ b/src/tint/transform/decompose_memory_access_test.cc
--- a/src/tint/transform/std140.cc
+++ b/src/tint/transform/std140.cc
@@ -265,8 +265,8 @@ struct Std140::State {
    };

    /// @returns true if the given matrix needs decomposing to column vectors for std140 layout.
-    /// TODO(crbug.com/tint/1502): This may need adjusting for `f16` matrices.
-    static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() == 8; }
+    /// Std140 layout require matrix stride to be 16, otherwise decomposing is needed.
+    static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() != 16; }

    /// ForkTypes walks the user-declared types in dependency order, forking structures that are
    /// used as uniform buffers which (transitively) use matrices that need std140 decomposition to
@@ -474,7 +474,7 @@ struct Std140::State {
                // natural size for the matrix. This extra padding needs to be
                // applied to the last column vector.
                attributes.Push(
-                    b.MemberSize(AInt(size - mat->ColumnType()->Size() * (num_columns - 1))));
+                    b.MemberSize(AInt(size - mat->ColumnType()->Align() * (num_columns - 1))));
            }

            // Build the member
@@ -645,7 +645,8 @@ struct Std140::State {
                return "mat" + std::to_string(mat->columns()) + "x" + std::to_string(mat->rows()) +
                       "_" + ConvertSuffix(mat->type());
            },
-            [&](const sem::F32*) { return "f32"; },
+            [&](const sem::F32*) { return "f32"; },  //
+            [&](const sem::F16*) { return "f16"; },
            [&](Default) {
                TINT_ICE(Transform, b.Diagnostics())
                    << "unhandled type for conversion name: " << src->FriendlyName(ty);
--- a/src/tint/transform/std140.h
+++ b/src/tint/transform/std140.h
@@ -20,11 +20,12 @@
 namespace tint::transform {

 /// Std140 is a transform that forks types used in the uniform address space that contain
-/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors. Types that transitively use these
-/// forked types are also forked. `var<uniform>` variables will use these forked types, and
-/// expressions loading from these variables will do appropriate conversions to the regular WGSL
-/// types. As `matNx2<f32>` matrices are the only type that violate std140-layout, this
-/// transformation is sufficient to have any WGSL structure be std140-layout conformant.
+/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors, and `matNxM<f16>` matrices into
+/// `N`x`vecM<f16>` column vectors. Types that transitively use these forked types are also forked.
+/// `var<uniform>` variables will use these forked types, and expressions loading from these
+/// variables will do appropriate conversions to the regular WGSL types. As `matNx2<f32>` and
+/// `matNxM<f16>` matrices are the only type that violate std140-layout, this transformation is
+/// sufficient to have any WGSL structure be std140-layout conformant.
 ///
 /// @note This transform requires the PromoteSideEffectsToDecl transform to have been run first.
 class Std140 final : public Castable<Std140, Transform> {
--- a/src/tint/transform/std140_exhaustive_test.cc
+++ b/src/tint/transform/std140_exhaustive_test.cc
@@ -2838,6 +2838,15 @@ INSTANTIATE_TEST_SUITE_P(,
                             {4, 2, MatrixType::f32},
                             {4, 3, MatrixType::f32},
                             {4, 4, MatrixType::f32},
+                             {2, 2, MatrixType::f16},
+                             {2, 3, MatrixType::f16},
+                             {2, 4, MatrixType::f16},
+                             {3, 2, MatrixType::f16},
+                             {3, 3, MatrixType::f16},
+                             {3, 4, MatrixType::f16},
+                             {4, 2, MatrixType::f16},
+                             {4, 3, MatrixType::f16},
+                             {4, 4, MatrixType::f16},
                         }));

 using Std140Test_MatrixArray = TransformTestWithParam<MatrixCase>;
@@ -4866,6 +4875,15 @@ INSTANTIATE_TEST_SUITE_P(,
                             {4, 2, MatrixType::f32},
                             {4, 3, MatrixType::f32},
                             {4, 4, MatrixType::f32},
+                             {2, 2, MatrixType::f16},
+                             {2, 3, MatrixType::f16},
+                             {2, 4, MatrixType::f16},
+                             {3, 2, MatrixType::f16},
+                             {3, 3, MatrixType::f16},
+                             {3, 4, MatrixType::f16},
+                             {4, 2, MatrixType::f16},
+                             {4, 3, MatrixType::f16},
+                             {4, 4, MatrixType::f16},
                         }));

 }  // namespace
--- a/src/tint/transform/std140_f16_test.cc
+++ b/src/tint/transform/std140_f16_test.cc
--- a/src/tint/transform/std140_f32_test.cc
+++ b/src/tint/transform/std140_f32_test.cc
--- a/src/tint/transform/std140_test.cc
+++ b/src/tint/transform/std140_test.cc