tint: Implement const-eval of quantizeToF16

Bug: tint:1581 Change-Id: I5cf9806bde7875282d3b67731dbb88666523f598 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/108142 Reviewed-by: Antonio Maiorano <amaiorano@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Kokoro: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
2025-12-10 05:57:51 +00:00 · 2022-11-02 17:29:48 +00:00
parent 749abeaafb
commit c81f9dce07
26 changed files with 193 additions and 240 deletions
--- a/src/tint/intrinsics.def
+++ b/src/tint/intrinsics.def
@@ -514,8 +514,8 @@ fn pack4x8snorm(vec4<f32>) -> u32
 fn pack4x8unorm(vec4<f32>) -> u32
 fn pow<T: f32_f16>(T, T) -> T
 fn pow<N: num, T: f32_f16>(vec<N, T>, vec<N, T>) -> vec<N, T>
-fn quantizeToF16(f32) -> f32
-fn quantizeToF16<N: num>(vec<N, f32>) -> vec<N, f32>
+@const fn quantizeToF16(f32) -> f32
+@const fn quantizeToF16<N: num>(vec<N, f32>) -> vec<N, f32>
 fn radians<T: f32_f16>(T) -> T
 fn radians<N: num, T: f32_f16>(vec<N, T>) -> vec<N, T>
 fn reflect<N: num, T: f32_f16>(vec<N, T>, vec<N, T>) -> vec<N, T>
--- a/src/tint/resolver/const_eval.cc
+++ b/src/tint/resolver/const_eval.cc
@@ -1869,6 +1869,27 @@ ConstEval::Result ConstEval::step(const sem::Type* ty,
    return TransformElements(builder, ty, transform, args[0], args[1]);
 }

+ConstEval::Result ConstEval::quantizeToF16(const sem::Type* ty,
+                                           utils::VectorRef<const sem::Constant*> args,
+                                           const Source&) {
+    auto transform = [&](const sem::Constant* c) {
+        auto conv = CheckedConvert<f32>(f16(c->As<f32>()));
+        if (!conv) {
+            // https://www.w3.org/TR/WGSL/#quantizeToF16-builtin
+            // If e is outside the finite range of binary16, then the result is any value of type
+            // f32
+            switch (conv.Failure()) {
+                case ConversionFailure::kExceedsNegativeLimit:
+                    return CreateElement(builder, c->Type(), f16(f16::kLowestValue));
+                case ConversionFailure::kExceedsPositiveLimit:
+                    return CreateElement(builder, c->Type(), f16(f16::kHighestValue));
+            }
+        }
+        return CreateElement(builder, c->Type(), conv.Get());
+    };
+    return TransformElements(builder, ty, transform, args[0]);
+}
+
 ConstEval::Result ConstEval::Convert(const sem::Type* target_ty,
                                     const sem::Constant* value,
                                     const Source& source) {
--- a/src/tint/resolver/const_eval.h
+++ b/src/tint/resolver/const_eval.h
@@ -530,6 +530,15 @@ class ConstEval {
                utils::VectorRef<const sem::Constant*> args,
                const Source& source);

+    /// quantizeToF16 builtin
+    /// @param ty the expression type
+    /// @param args the input arguments
+    /// @param source the source location of the conversion
+    /// @return the result value, or null if the value cannot be calculated
+    Result quantizeToF16(const sem::Type* ty,
+                         utils::VectorRef<const sem::Constant*> args,
+                         const Source& source);
+
  private:
    /// Adds the given error message to the diagnostics
    void AddError(const std::string& msg, const Source& source) const;
--- a/src/tint/resolver/const_eval_builtin_test.cc
+++ b/src/tint/resolver/const_eval_builtin_test.cc
@@ -842,5 +842,62 @@ INSTANTIATE_TEST_SUITE_P(  //
                                              StepCases<f32>(),
                                              StepCases<f16>()))));

+std::vector<Case> QuantizeToF16Cases() {
+    return {
+        C({0_f}, 0_f),    //
+        C({-0_f}, -0_f),  //
+        C({1_f}, 1_f),    //
+        C({-1_f}, -1_f),  //
+
+        //   0.00006106496 quantized to 0.000061035156 = 0x1p-14
+        C({0.00006106496_f}, 0.000061035156_f),    //
+        C({-0.00006106496_f}, -0.000061035156_f),  //
+
+        //   1.0004883 quantized to 1.0 = 0x1p0
+        C({1.0004883_f}, 1.0_f),    //
+        C({-1.0004883_f}, -1.0_f),  //
+
+        //   8196.0 quantized to 8192.0 = 0x1p13
+        C({8196_f}, 8192_f),    //
+        C({-8196_f}, -8192_f),  //
+
+        // Value in subnormal f16 range
+        C({0x0.034p-14_f}, 0x0.034p-14_f),    //
+        C({-0x0.034p-14_f}, -0x0.034p-14_f),  //
+        C({0x0.068p-14_f}, 0x0.068p-14_f),    //
+        C({-0x0.068p-14_f}, -0x0.068p-14_f),  //
+
+        //   0x0.06b7p-14 quantized to 0x0.068p-14
+        C({0x0.06b7p-14_f}, 0x0.068p-14_f),    //
+        C({-0x0.06b7p-14_f}, -0x0.068p-14_f),  //
+
+        // Value out of f16 range
+        C({65504.003_f}, 65504_f),     //
+        C({-65504.003_f}, -65504_f),   //
+        C({0x1.234p56_f}, 65504_f),    //
+        C({-0x4.321p65_f}, -65504_f),  //
+
+        // Vector tests
+        C({Vec(0_f, -0_f)}, Vec(0_f, -0_f)),  //
+        C({Vec(1_f, -1_f)}, Vec(1_f, -1_f)),  //
+
+        C({Vec(0.00006106496_f, -0.00006106496_f, 1.0004883_f, -1.0004883_f)},
+          Vec(0.000061035156_f, -0.000061035156_f, 1.0_f, -1.0_f)),
+
+        C({Vec(8196_f, 8192_f, 0x0.034p-14_f)}, Vec(8192_f, 8192_f, 0x0.034p-14_f)),
+
+        C({Vec(0x0.034p-14_f, -0x0.034p-14_f, 0x0.068p-14_f, -0x0.068p-14_f)},
+          Vec(0x0.034p-14_f, -0x0.034p-14_f, 0x0.068p-14_f, -0x0.068p-14_f)),
+
+        C({Vec(65504.003_f, 0x1.234p56_f)}, Vec(65504_f, 65504_f)),
+        C({Vec(-0x1.234p56_f, -65504.003_f)}, Vec(-65504_f, -65504_f)),
+    };
+}
+INSTANTIATE_TEST_SUITE_P(  //
+    QuantizeToF16,
+    ResolverConstEvalBuiltinTest,
+    testing::Combine(testing::Values(sem::BuiltinType::kQuantizeToF16),
+                     testing::ValuesIn(QuantizeToF16Cases())));
+
 }  // namespace
 }  // namespace tint::resolver
--- a/src/tint/resolver/intrinsic_table.inl
+++ b/src/tint/resolver/intrinsic_table.inl
@@ -12302,7 +12302,7 @@ constexpr OverloadInfo kOverloads[] = {
    /* parameters */ &kParameters[880],
    /* return matcher indices */ &kMatcherIndices[62],
    /* flags */ OverloadFlags(OverloadFlag::kIsBuiltin, OverloadFlag::kSupportsVertexPipeline, OverloadFlag::kSupportsFragmentPipeline, OverloadFlag::kSupportsComputePipeline),
-    /* const eval */ nullptr,
+    /* const eval */ &ConstEval::quantizeToF16,
  },
  {
    /* [331] */
@@ -12314,7 +12314,7 @@ constexpr OverloadInfo kOverloads[] = {
    /* parameters */ &kParameters[879],
    /* return matcher indices */ &kMatcherIndices[60],
    /* flags */ OverloadFlags(OverloadFlag::kIsBuiltin, OverloadFlag::kSupportsVertexPipeline, OverloadFlag::kSupportsFragmentPipeline, OverloadFlag::kSupportsComputePipeline),
-    /* const eval */ nullptr,
+    /* const eval */ &ConstEval::quantizeToF16,
  },
  {
    /* [332] */