From 0fb4e2c608f3b8235270d7286d703af152626e33 Mon Sep 17 00:00:00 2001 From: Zhaoming Jiang Date: Fri, 10 Jun 2022 18:18:35 +0000 Subject: [PATCH] tint: f16 literal in WGSL lexer and check subnormal f32/f16 hex literal This patch 1. Add F16 literal support in WGSL lexer and parser for both decimal and hex form. Also fix the f16::Quantize method to deal with subnormal cases correctly. 2. Fix exactly-representable check for hex f32 literal to deal with subnormal cases. 3. Implement and fix related unitests for f16 and f32. Bug: tint:1473, tint:1502 Change-Id: Ia4a7c9144ef9323fb23b2200a64e1ca8afb6c334 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/93100 Reviewed-by: David Neto Commit-Queue: Zhaoming Jiang Commit-Queue: David Neto --- src/tint/number.cc | 162 +++++++++- src/tint/number.h | 11 +- src/tint/number_test.cc | 63 +++- src/tint/reader/wgsl/lexer.cc | 114 ++++++- src/tint/reader/wgsl/lexer_test.cc | 48 ++- src/tint/reader/wgsl/parser_impl.cc | 4 + .../wgsl/parser_impl_const_literal_test.cc | 277 ++++++++++++++++-- src/tint/reader/wgsl/token.cc | 4 + src/tint/reader/wgsl/token.h | 2 + src/tint/resolver/resolver.cc | 16 +- src/tint/resolver/resolver_test.cc | 17 ++ 11 files changed, 664 insertions(+), 54 deletions(-) diff --git a/src/tint/number.cc b/src/tint/number.cc index 3ead5b041c..17b005b8e2 100644 --- a/src/tint/number.cc +++ b/src/tint/number.cc @@ -15,9 +15,12 @@ #include "src/tint/number.h" #include +#include #include #include +#include "src/tint/debug.h" + namespace tint { std::ostream& operator<<(std::ostream& out, ConversionFailure failure) { @@ -38,18 +41,165 @@ f16::type f16::Quantize(f16::type value) { return -std::numeric_limits::infinity(); } // Below value must be within the finite range of a f16. + // Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes. + static_assert(std::is_same()); + const uint32_t sign_mask = 0x80000000u; // Mask for the sign bit + const uint32_t exponent_mask = 0x7f800000u; // Mask for 8 exponent bits + uint32_t u32; memcpy(&u32, &value, 4); - if ((u32 & 0x7fffffffu) == 0) { // ~sign + + if ((u32 & ~sign_mask) == 0) { return value; // +/- zero } - if ((u32 & 0x7f800000) == 0x7f800000) { // exponent all 1's + if ((u32 & exponent_mask) == exponent_mask) { // exponent all 1's return value; // inf or nan } - // f32 bits : 1 sign, 8 exponent, 23 mantissa - // f16 bits : 1 sign, 5 exponent, 10 mantissa - // Mask the value to preserve the sign, exponent and most-significant 10 mantissa bits. - u32 = u32 & 0xffffe000u; + + // We are now going to quantize a f32 number into subnormal f16 and store the result value back + // into a f32 variable. Notice that all subnormal f16 values are just normal f32 values. Below + // will show that we can do this quantization by just masking out 13 or more lowest mantissa + // bits of the original f32 number. + // + // Note: + // f32 has 1 sign bit, 8 exponent bits for biased exponent (i.e. unbiased exponent + 127), and + // 23 mantissa bits. Binary form: s_eeeeeeee_mmmmmmmmmmmmmmmmmmmmmmm + // f16 has 1 sign bit, 5 exponent bits for biased exponent (i.e. unbiased exponent + 15), and + // 10 mantissa bits. Binary form: s_eeeee_mmmmmmmmmm + // The largest finite f16 number has a biased exponent of 11110 in binary, or 30 decimal, and so + // a unbiased exponent of 30 - 15 = 15. + // The smallest finite f16 number has a biased exponent of 00001 in binary, or 1 decimal, and so + // a unbiased exponent of 1 - 15 = -14. + // + // We may follow the argument below: + // 1. All normal or subnormal f16 values, range from 0x1.p-24 to 0x1.ffcp15, are exactly + // representable by normal f32 number. + // 1.1. We can denote the set of all f32 number that are exact representation of finite f16 + // values by `R`. + // 1.2. We can do the quantization by mapping a normal f32 value v (in the f16 finite range) + // to a certain f32 number v' in the set R, which is the largest (by the meaning of absolute + // value) one among all values in R that are no larger than v. + // 2. We can decide whether a given normal f32 number v is in the set R, by looking at its + // mantissa bits and biased exponent `e`. Recall that biased exponent e is unbiased exponent + + // 127, and in the range of 1 to 254 for normal f32 number. + // 2.1. If e >= 143, i.e. abs(v) >= 2^16 > f16::kHighest = 0x1.ffcp15, v is larger than any + // finite f16 value and can not be in set R. + // 2.2. If 142 >= e >= 113, or f16::kHighest >= abs(v) >= f16::kSmallest = 2^-14, v falls in + // the range of normal f16 values. In this case, v is in the set R iff the lowest 13 mantissa + // bits are all 0. (See below for proof) + // 2.2.1. If we let v' be v with lowest 13 mantissa bits masked to 0, v' will be in set R + // and the largest one in set R that no larger than v. Such v' is the quantized value of v. + // 2.3. If 112 >= e >= 103, i.e. 2^-14 > abs(v) >= f16::kSmallestSubnormal = 2^-24, v falls in + // the range of subnormal f16 values. In this case, v is in the set R iff the lowest 126-e + // mantissa bits are all 0. Notice that 126-e is in range 14 to 23, inclusive. (See below for + // proof) + // 2.3.1. If we let v' be v with lowest 126-e mantissa bits masked to 0, v' will be in set R + // and the largest on in set R that no larger than v. Such v' is the quantized value of v. + // 2.4. If 2^-24 > abs(v) > 0, i.e. 103 > e, v is smaller than any finite f16 value and not + // equal to 0.0, thus can not be in set R. + // 2.5. If abs(v) = 0, v is in set R and is just +-0.0. + // + // Proof for 2.2: + // Any normal f16 number, in binary form, s_eeeee_mmmmmmmmmm, has value + // (s==0?1:-1)*(1+uint(mmmmm_mmmmm)*(2^-10))*2^(uint(eeeee)-15) + // in which unit(bbbbb) means interprete binary pattern "bbbbb" as unsigned binary number, + // and we have 1 <= uint(eeeee) <= 30. + // This value is equal to a normal f32 number with binary + // s_EEEEEEEE_mmmmmmmmmm0000000000000 + // where uint(EEEEEEEE) = uint(eeeee) + 112, so that unbiased exponent keep unchanged + // uint(EEEEEEEE) - 127 = uint(eeeee) - 15 + // and its value is + // (s==0?1:-1)*(1+uint(mmmmm_mmmmm_00000_00000_000)*(2^-23))*2^(uint(EEEEEEEE)-127) + // == (s==0?1:-1)*(1+uint(mmmmm_mmmmm)*(2^-10))*2^(uint(eeeee)-15) + // Notice that uint(EEEEEEEE) is in range [113, 142], showing that it is a normal f32 number. + // So we proof that any normal f16 number can be exactly representd by a normal f32 number + // with biased exponent in range [113,142] and the lowest 13 mantissa bits 0. + // On the other hand, since mantissa bits mmmmmmmmmm are arbitrary, the value of any f32 + // that has a biased exponent in range [113, 142] and lowest 13 mantissa bits zero is equal + // to a normal f16 value. Hence we proof 2.2. + // + // Proof for 2.3: + // Any subnormal f16 number has a binary form of s_00000_mmmmmmmmmm, and its value is + // (s==0?1:-1)*uint(mmmmmmmmmm)*(2^-10)*(2^-14) = (s==0?1:-1)*uint(mmmmmmmmmm)*(2^-24). + // We discuss on bit pattern of mantissa bits mmmmmmmmmm. + // Case 1: mantissa bits has no leading zero bit, s_00000_1mmmmmmmmm + // In this case the value is + // (s==0?1:-1)*uint(1mmmm_mmmmm)*(2^-10)*(2^-14) + // == (s==0?1:-1)*(uint(1_mmmmm_mmmm)*(2^-9))*(2^-15) + // == (s==0?1:-1)*(1+uint(mmmmm_mmmm)*(2^-9))*(2^-15) + // == (s==0?1:-1)*(1+uint(mmmmm_mmmm0_00000_00000_000)*(2^-23))*(2^-15) + // which is equal to the value of normal f32 number + // s_EEEEEEEE_mmmmm_mmmm0_00000_00000_000 + // where uint(EEEEEEEE) = -15 + 127 = 112. Hence we proof that any subnormal f16 number + // with no leading zero mantissa bit can be exactly represented by a f32 number with + // biased exponent 112 and the lowest 14 mantissa bits zero, and the value of any f32 + // number with biased exponent 112 and the lowest 14 mantissa bits zero are equal to a + // subnormal f16 number with no leading zero mantissa bit. + // Case 2: mantissa bits has 1 leading zero bit, s_00000_01mmmmmmmm + // In this case the value is + // (s==0?1:-1)*uint(01mmm_mmmmm)*(2^-10)*(2^-14) + // == (s==0?1:-1)*(uint(01_mmmmm_mmm)*(2^-8))*(2^-16) + // == (s==0?1:-1)*(1+uint(mmmmm_mmm)*(2^-8))*(2^-16) + // == (s==0?1:-1)*(1+uint(mmmmm_mmm00_00000_00000_000)*(2^-23))*(2^-16) + // which is equal to the value of normal f32 number + // s_EEEEEEEE_mmmmm_mmm00_00000_00000_000 + // where uint(EEEEEEEE) = -16 + 127 = 111. Hence we proof that any subnormal f16 number + // with 1 leading zero mantissa bit can be exactly represented by a f32 number with + // biased exponent 111 and the lowest 15 mantissa bits zero, and the value of any f32 + // number with biased exponent 111 and the lowest 15 mantissa bits zero are equal to a + // subnormal f16 number with 1 leading zero mantissa bit. + // Case 3 to case 8: ...... + // Case 9: mantissa bits has 8 leading zero bit, s_00000_000000001m + // In this case the value is + // (s==0?1:-1)*uint(00000_0001m)*(2^-10)*(2^-14) + // == (s==0?1:-1)*(uint(000000001_m)*(2^-1))*(2^-23) + // == (s==0?1:-1)*(1+uint(m)*(2^-1))*(2^-23) + // == (s==0?1:-1)*(1+uint(m0000_00000_00000_00000_000)*(2^-23))*(2^-23) + // which is equal to the value of normal f32 number + // s_EEEEEEEE_m0000_00000_00000_00000_000 + // where uint(EEEEEEEE) = -23 + 127 = 104. Hence we proof that any subnormal f16 number + // with 8 leading zero mantissa bit can be exactly represented by a f32 number with + // biased exponent 104 and the lowest 22 mantissa bits zero, and the value of any f32 + // number with biased exponent 104 and the lowest 22 mantissa bits zero are equal to a + // subnormal f16 number with 8 leading zero mantissa bit. + // Case 10: mantissa bits has 9 leading zero bit, s_00000_0000000001 + // In this case the value is just +-2^-24 = +-0x1.0p-24, + // the f32 number has biased exponent 103 and all 23 mantissa bits zero. + // Case 11: mantissa bits has 10 leading zero bit, s_00000_0000000000, just 0.0 + // Concluding all these case, we proof that any subnormal f16 number with N leading zero + // mantissa bit can be exactly represented by a f32 number with biased exponent 112-N and the + // lowest 14+N mantissa bits zero, and the value of any f32 number with biased exponent 112-N (= + // e) and the lowest 14+N (= 126-e) mantissa bits zero are equal to a subnormal f16 number with + // N leading zero mantissa bit. N is in range [0, 9], so the f32 number's biased exponent e is + // in range [103, 112], or unbiased exponent in [-24, -15]. + + float abs_value = std::fabs(value); + if (abs_value >= kSmallest) { + // Value falls in the normal f16 range, quantize it to a normal f16 value by masking out + // lowest 13 mantissa bits. + u32 = u32 & ~((1u << 13) - 1); + } else if (abs_value >= kSmallestSubnormal) { + // Value should be quantized to a subnormal f16 value. + + // Get the biased exponent `e` of f32 value, e.g. value 127 representing exponent 2^0. + uint32_t biased_exponent_original = (u32 & exponent_mask) >> 23; + // Since we ensure that kSmallest = 0x1f-14 > abs(value) >= kSmallestSubnormal = 0x1f-24, + // value will have a unbiased exponent in range -24 to -15 (inclusive), and the + // corresponding biased exponent in f32 is in range 103 to 112 (inclusive). + TINT_ASSERT(Semantic, + (103 <= biased_exponent_original) && (biased_exponent_original <= 112)); + + // As we have proved, masking out the lowest 126-e mantissa bits of input value will result + // in a valid subnormal f16 value, which is exactly the required quantization result. + uint32_t discard_bits = 126 - biased_exponent_original; // In range 14 to 23 (inclusive) + TINT_ASSERT(Semantic, (14 <= discard_bits) && (discard_bits <= 23)); + uint32_t discard_mask = (1u << discard_bits) - 1; + u32 = u32 & ~discard_mask; + } else { + // value is too small that it can't even be represented as subnormal f16 number. Quantize + // to zero. + return value > 0 ? 0.0 : -0.0; + } memcpy(&value, &u32, 4); return value; } diff --git a/src/tint/number.h b/src/tint/number.h index b4c5ca4177..7a0b13b6d6 100644 --- a/src/tint/number.h +++ b/src/tint/number.h @@ -86,6 +86,10 @@ struct Number { static constexpr type kSmallest = std::is_integral_v ? 0 : std::numeric_limits::min(); + /// Smallest positive subnormal value of this type, 0 for integral type. + static constexpr type kSmallestSubnormal = + std::is_integral_v ? 0 : std::numeric_limits::denorm_min(); + /// Constructor. The value is zero-initialized. Number() = default; @@ -201,7 +205,12 @@ struct Number { static constexpr type kLowest = -65504.0f; /// Smallest positive normal value of this type. - static constexpr type kSmallest = 0.00006103515625f; // 2⁻¹⁴ + /// binary16 0_00001_0000000000, value is 2⁻¹⁴. + static constexpr type kSmallest = 0x1p-14f; + + /// Smallest positive subnormal value of this type. + /// binary16 0_00000_0000000001, value is 2⁻¹⁴ * 2⁻¹⁰ = 2⁻²⁴. + static constexpr type kSmallestSubnormal = 0x1p-24f; /// Constructor. The value is zero-initialized. Number() = default; diff --git a/src/tint/number_test.cc b/src/tint/number_test.cc index 34b4d39624..52ba4aea3e 100644 --- a/src/tint/number_test.cc +++ b/src/tint/number_test.cc @@ -52,7 +52,7 @@ constexpr double kHighestF16NextULP = 0x1.ffc0000000001p+15; // Smallest positive normal float16 value. constexpr double kSmallestF16 = 0x1p-14; -// Highest subnormal value for a float32. +// Highest subnormal value for a float16. constexpr double kHighestF16Subnormal = 0x0.ffcp-14; constexpr double kLowestF32 = -kHighestF32; @@ -141,6 +141,67 @@ TEST(NumberTest, QuantizeF16) { EXPECT_EQ(f16(inf), inf); EXPECT_EQ(f16(-inf), -inf); EXPECT_TRUE(std::isnan(f16(nan))); + + // Test for subnormal quantization. + // The ULP is based on float rather than double or f16, since F16::Quantize take float as input. + constexpr float lowestPositiveNormalF16 = 0x1p-14; + constexpr float lowestPositiveNormalF16PlusULP = 0x1.000002p-14; + constexpr float lowestPositiveNormalF16MinusULP = 0x1.fffffep-15; + constexpr float highestPositiveSubnormalF16 = 0x0.ffcp-14; + constexpr float highestPositiveSubnormalF16PlusULP = 0x1.ff8002p-15; + constexpr float highestPositiveSubnormalF16MinusULP = 0x1.ff7ffep-15; + constexpr float lowestPositiveSubnormalF16 = 0x1.p-24; + constexpr float lowestPositiveSubnormalF16PlusULP = 0x1.000002p-24; + constexpr float lowestPositiveSubnormalF16MinusULP = 0x1.fffffep-25; + + constexpr float highestNegativeNormalF16 = -lowestPositiveNormalF16; + constexpr float highestNegativeNormalF16PlusULP = -lowestPositiveNormalF16MinusULP; + constexpr float highestNegativeNormalF16MinusULP = -lowestPositiveNormalF16PlusULP; + constexpr float lowestNegativeSubnormalF16 = -highestPositiveSubnormalF16; + constexpr float lowestNegativeSubnormalF16PlusULP = -highestPositiveSubnormalF16MinusULP; + constexpr float lowestNegativeSubnormalF16MinusULP = -highestPositiveSubnormalF16PlusULP; + constexpr float highestNegativeSubnormalF16 = -lowestPositiveSubnormalF16; + constexpr float highestNegativeSubnormalF16PlusULP = -lowestPositiveSubnormalF16MinusULP; + constexpr float highestNegativeSubnormalF16MinusULP = -lowestPositiveSubnormalF16PlusULP; + + // Value larger than or equal to lowest positive normal f16 will be quantized to normal f16. + EXPECT_EQ(f16(lowestPositiveNormalF16PlusULP), lowestPositiveNormalF16); + EXPECT_EQ(f16(lowestPositiveNormalF16), lowestPositiveNormalF16); + // Positive value smaller than lowest positive normal f16 but not smaller than lowest positive + // subnormal f16 will be quantized to subnormal f16 or zero. + EXPECT_EQ(f16(lowestPositiveNormalF16MinusULP), highestPositiveSubnormalF16); + EXPECT_EQ(f16(highestPositiveSubnormalF16PlusULP), highestPositiveSubnormalF16); + EXPECT_EQ(f16(highestPositiveSubnormalF16), highestPositiveSubnormalF16); + EXPECT_EQ(f16(highestPositiveSubnormalF16MinusULP), 0x0.ff8p-14); + EXPECT_EQ(f16(lowestPositiveSubnormalF16PlusULP), lowestPositiveSubnormalF16); + EXPECT_EQ(f16(lowestPositiveSubnormalF16), lowestPositiveSubnormalF16); + // Positive value smaller than lowest positive subnormal f16 will be quantized to zero. + EXPECT_EQ(f16(lowestPositiveSubnormalF16MinusULP), 0.0); + // Test the mantissa discarding, the least significant mantissa bit is 0x1p-24 = 0x0.004p-14. + EXPECT_EQ(f16(0x0.064p-14), 0x0.064p-14); + EXPECT_EQ(f16(0x0.067fecp-14), 0x0.064p-14); + EXPECT_EQ(f16(0x0.063ffep-14), 0x0.060p-14); + EXPECT_EQ(f16(0x0.008p-14), 0x0.008p-14); + EXPECT_EQ(f16(0x0.00bffep-14), 0x0.008p-14); + EXPECT_EQ(f16(0x0.007ffep-14), 0x0.004p-14); + + // Vice versa for negative cases. + EXPECT_EQ(f16(highestNegativeNormalF16MinusULP), highestNegativeNormalF16); + EXPECT_EQ(f16(highestNegativeNormalF16), highestNegativeNormalF16); + EXPECT_EQ(f16(highestNegativeNormalF16PlusULP), lowestNegativeSubnormalF16); + EXPECT_EQ(f16(lowestNegativeSubnormalF16MinusULP), lowestNegativeSubnormalF16); + EXPECT_EQ(f16(lowestNegativeSubnormalF16), lowestNegativeSubnormalF16); + EXPECT_EQ(f16(lowestNegativeSubnormalF16PlusULP), -0x0.ff8p-14); + EXPECT_EQ(f16(highestNegativeSubnormalF16MinusULP), highestNegativeSubnormalF16); + EXPECT_EQ(f16(highestNegativeSubnormalF16), highestNegativeSubnormalF16); + EXPECT_EQ(f16(highestNegativeSubnormalF16PlusULP), 0.0); + // Test the mantissa discarding. + EXPECT_EQ(f16(-0x0.064p-14), -0x0.064p-14); + EXPECT_EQ(f16(-0x0.067fecp-14), -0x0.064p-14); + EXPECT_EQ(f16(-0x0.063ffep-14), -0x0.060p-14); + EXPECT_EQ(f16(-0x0.008p-14), -0x0.008p-14); + EXPECT_EQ(f16(-0x0.00bffep-14), -0x0.008p-14); + EXPECT_EQ(f16(-0x0.007ffep-14), -0x0.004p-14); } using BinaryCheckedCase = std::tuple, AInt, AInt>; diff --git a/src/tint/reader/wgsl/lexer.cc b/src/tint/reader/wgsl/lexer.cc index 58e3c85cc5..c1dceba50f 100644 --- a/src/tint/reader/wgsl/lexer.cc +++ b/src/tint/reader/wgsl/lexer.cc @@ -343,12 +343,16 @@ Token Lexer::try_float() { } bool has_f_suffix = false; + bool has_h_suffix = false; if (end < length() && matches(end, "f")) { end++; has_f_suffix = true; + } else if (end < length() && matches(end, "h")) { + end++; + has_h_suffix = true; } - if (!has_point && !has_exponent && !has_f_suffix) { + if (!has_point && !has_exponent && !has_f_suffix && !has_h_suffix) { // If it only has digits then it's an integer. return {}; } @@ -369,6 +373,14 @@ Token Lexer::try_float() { } } + if (has_h_suffix) { + if (auto f = CheckedConvert(AFloat(value))) { + return {Token::Type::kFloatLiteral_H, source, static_cast(f.Get())}; + } else { + return {Token::Type::kError, source, "value cannot be represented as 'f16'"}; + } + } + if (value == HUGE_VAL || -value == HUGE_VAL) { return {Token::Type::kError, source, "value cannot be represented as 'abstract-float'"}; } else { @@ -547,6 +559,7 @@ Token Lexer::try_hex_float() { int64_t exponent_sign = 1; // If the 'p' part is present, the rest of the exponent must exist. bool has_f_suffix = false; + bool has_h_suffix = false; if (has_exponent) { // Parse the rest of the exponent. // (+|-)? @@ -574,12 +587,15 @@ Token Lexer::try_hex_float() { end++; } - // Parse optional 'f' suffix. For a hex float, it can only exist + // Parse optional 'f' or 'h' suffix. For a hex float, it can only exist // when the exponent is present. Otherwise it will look like // one of the mantissa digits. if (end < length() && matches(end, "f")) { has_f_suffix = true; end++; + } else if (end < length() && matches(end, "h")) { + has_h_suffix = true; + end++; } if (!has_exponent_digits) { @@ -648,7 +664,7 @@ Token Lexer::try_hex_float() { } if (signed_exponent >= kExponentMax || (signed_exponent == kExponentMax && mantissa != 0)) { - std::string type = has_f_suffix ? "f32" : "abstract-float"; + std::string type = has_f_suffix ? "f32" : (has_h_suffix ? "f16" : "abstract-float"); return {Token::Type::kError, source, "value cannot be represented as '" + type + "'"}; } @@ -667,14 +683,98 @@ Token Lexer::try_hex_float() { result_f64 > static_cast(f32::kHighest)) { return {Token::Type::kError, source, "value cannot be represented as 'f32'"}; } - // Check the value can be exactly represented (low 29 mantissa bits must be 0) - if (result_u64 & 0x1fffffff) { + // Check the value can be exactly represented, i.e. only high 23 mantissa bits are valid for + // normal f32 values, and less for subnormal f32 values. The rest low mantissa bits must be + // 0. + int valid_mantissa_bits = 0; + double abs_result_f64 = std::fabs(result_f64); + if (abs_result_f64 >= static_cast(f32::kSmallest)) { + // The result shall be a normal f32 value. + valid_mantissa_bits = 23; + } else if (abs_result_f64 >= static_cast(f32::kSmallestSubnormal)) { + // The result shall be a subnormal f32 value, represented as double. + // The smallest positive normal f32 is f32::kSmallest = 2^-126 = 0x1.0p-126, and the + // smallest positive subnormal f32 is f32::kSmallestSubnormal = 2^-149. Thus, the + // value v in range 2^-126 > v >= 2^-149 must be represented as a subnormal f32 + // number, but is still normal double (f64) number, and has a exponent in range -127 + // to -149, inclusive. + // A value v, if 2^-126 > v >= 2^-127, its binary32 representation will have binary form + // s_00000000_1xxxxxxxxxxxxxxxxxxxxxx, having mantissa of 1 leading 1 bit and 22 + // arbitrary bits. Since this value is represented as normal double number, the + // leading 1 bit is omitted, only the highest 22 mantissia bits can be arbitrary, and + // the rest lowest 40 mantissa bits of f64 number must be zero. + // 2^-127 > v >= 2^-128, binary32 s_00000000_01xxxxxxxxxxxxxxxxxxxxx, having mantissa of + // 1 leading 0 bit, 1 leading 1 bit, and 21 arbitrary bits. The f64 representation + // omits the leading 0 and 1 bits, and only the highest 21 mantissia bits can be + // arbitrary. + // 2^-128 > v >= 2^-129, binary32 s_00000000_001xxxxxxxxxxxxxxxxxxxx, 20 arbitrary bits. + // ... + // 2^-147 > v >= 2^-148, binary32 s_00000000_0000000000000000000001x, 1 arbitrary bit. + // 2^-148 > v >= 2^-149, binary32 s_00000000_00000000000000000000001, 0 arbitrary bit. + int unbiased_exponent = signed_exponent - kExponentBias; + TINT_ASSERT(Reader, (unbiased_exponent <= -127) && (unbiased_exponent >= -149)); + valid_mantissa_bits = unbiased_exponent + 149; // 0 for -149, and 22 for -127 + } else if (abs_result_f64 != 0.0) { + // The result is smaller than the smallest subnormal f32 value, but not equal to zero. + // Such value will never be exactly represented by f32. return {Token::Type::kError, source, "value cannot be exactly represented as 'f32'"}; } + // Check the low 52-valid_mantissa_bits mantissa bits must be 0. + TINT_ASSERT(Reader, (0 <= valid_mantissa_bits) && (valid_mantissa_bits <= 23)); + if (result_u64 & ((uint64_t(1) << (52 - valid_mantissa_bits)) - 1)) { + return {Token::Type::kError, source, "value cannot be exactly represented as 'f32'"}; + } + return {Token::Type::kFloatLiteral_F, source, result_f64}; + } else if (has_h_suffix) { + // Check value fits in f16 + if (result_f64 < static_cast(f16::kLowest) || + result_f64 > static_cast(f16::kHighest)) { + return {Token::Type::kError, source, "value cannot be represented as 'f16'"}; + } + // Check the value can be exactly represented, i.e. only high 10 mantissa bits are valid for + // normal f16 values, and less for subnormal f16 values. The rest low mantissa bits must be + // 0. + int valid_mantissa_bits = 0; + double abs_result_f64 = std::fabs(result_f64); + if (abs_result_f64 >= static_cast(f16::kSmallest)) { + // The result shall be a normal f16 value. + valid_mantissa_bits = 10; + } else if (abs_result_f64 >= static_cast(f16::kSmallestSubnormal)) { + // The result shall be a subnormal f16 value, represented as double. + // The smallest positive normal f16 is f16::kSmallest = 2^-14 = 0x1.0p-14, and the + // smallest positive subnormal f16 is f16::kSmallestSubnormal = 2^-24. Thus, the value + // v in range 2^-14 > v >= 2^-24 must be represented as a subnormal f16 number, but + // is still normal double (f64) number, and has a exponent in range -15 to -24, + // inclusive. + // A value v, if 2^-14 > v >= 2^-15, its binary16 representation will have binary form + // s_00000_1xxxxxxxxx, having mantissa of 1 leading 1 bit and 9 arbitrary bits. Since + // this value is represented as normal double number, the leading 1 bit is omitted, + // only the highest 9 mantissia bits can be arbitrary, and the rest lowest 43 mantissa + // bits of f64 number must be zero. + // 2^-15 > v >= 2^-16, binary16 s_00000_01xxxxxxxx, having mantissa of 1 leading 0 bit, + // 1 leading 1 bit, and 8 arbitrary bits. The f64 representation omits the leading 0 + // and 1 bits, and only the highest 8 mantissia bits can be arbitrary. + // 2^-16 > v >= 2^-17, binary16 s_00000_001xxxxxxx, 7 arbitrary bits. + // ... + // 2^-22 > v >= 2^-23, binary16 s_00000_000000001x, 1 arbitrary bits. + // 2^-23 > v >= 2^-24, binary16 s_00000_0000000001, 0 arbitrary bits. + int unbiased_exponent = signed_exponent - kExponentBias; + TINT_ASSERT(Reader, (unbiased_exponent <= -15) && (unbiased_exponent >= -24)); + valid_mantissa_bits = unbiased_exponent + 24; // 0 for -24, and 9 for -15 + } else if (abs_result_f64 != 0.0) { + // The result is smaller than the smallest subnormal f16 value, but not equal to zero. + // Such value will never be exactly represented by f16. + return {Token::Type::kError, source, "value cannot be exactly represented as 'f16'"}; + } + // Check the low 52-valid_mantissa_bits mantissa bits must be 0. + TINT_ASSERT(Reader, (0 <= valid_mantissa_bits) && (valid_mantissa_bits <= 10)); + if (result_u64 & ((uint64_t(1) << (52 - valid_mantissa_bits)) - 1)) { + return {Token::Type::kError, source, "value cannot be exactly represented as 'f16'"}; + } + return {Token::Type::kFloatLiteral_H, source, result_f64}; } - return {has_f_suffix ? Token::Type::kFloatLiteral_F : Token::Type::kFloatLiteral, source, - result_f64}; + return {Token::Type::kFloatLiteral, source, result_f64}; } Token Lexer::build_token_from_int_if_possible(Source source, size_t start, int32_t base) { diff --git a/src/tint/reader/wgsl/lexer_test.cc b/src/tint/reader/wgsl/lexer_test.cc index 16ae46d430..d8decba4ff 100644 --- a/src/tint/reader/wgsl/lexer_test.cc +++ b/src/tint/reader/wgsl/lexer_test.cc @@ -19,6 +19,7 @@ #include #include "gtest/gtest.h" +#include "src/tint/number.h" namespace tint::reader::wgsl { namespace { @@ -320,6 +321,8 @@ TEST_P(FloatTest, Parse) { auto t = l.next(); if (std::string(params.input).back() == 'f') { EXPECT_TRUE(t.Is(Token::Type::kFloatLiteral_F)); + } else if (std::string(params.input).back() == 'h') { + EXPECT_TRUE(t.Is(Token::Type::kFloatLiteral_H)); } else { EXPECT_TRUE(t.Is(Token::Type::kFloatLiteral)); } @@ -340,6 +343,11 @@ INSTANTIATE_TEST_SUITE_P(LexerTest, FloatData{"1f", 1.0}, FloatData{"-0f", 0.0}, FloatData{"-1f", -1.0}, + // No decimal, with 'h' suffix + FloatData{"0h", 0.0}, + FloatData{"1h", 1.0}, + FloatData{"-0h", 0.0}, + FloatData{"-1h", -1.0}, // Zero, with decimal. FloatData{"0.0", 0.0}, @@ -354,7 +362,14 @@ INSTANTIATE_TEST_SUITE_P(LexerTest, FloatData{".0f", 0.0}, FloatData{"-0.0f", 0.0}, FloatData{"-0.f", 0.0}, - FloatData{"-.0", 0.0}, + FloatData{"-.0f", 0.0}, + // Zero, with decimal and 'h' suffix + FloatData{"0.0h", 0.0}, + FloatData{"0.h", 0.0}, + FloatData{".0h", 0.0}, + FloatData{"-0.0h", 0.0}, + FloatData{"-0.h", 0.0}, + FloatData{"-.0h", 0.0}, // Non-zero with decimal FloatData{"5.7", 5.7}, @@ -370,6 +385,13 @@ INSTANTIATE_TEST_SUITE_P(LexerTest, FloatData{"-5.7f", static_cast(-5.7f)}, FloatData{"-5.f", static_cast(-5.f)}, FloatData{"-.7f", static_cast(-.7f)}, + // Non-zero with decimal and 'h' suffix + FloatData{"5.7h", static_cast(f16::Quantize(5.7f))}, + FloatData{"5.h", static_cast(f16::Quantize(5.f))}, + FloatData{".7h", static_cast(f16::Quantize(.7f))}, + FloatData{"-5.7h", static_cast(f16::Quantize(-5.7f))}, + FloatData{"-5.h", static_cast(f16::Quantize(-5.f))}, + FloatData{"-.7h", static_cast(f16::Quantize(-.7f))}, // No decimal, with exponent FloatData{"1e5", 1e5}, @@ -381,6 +403,11 @@ INSTANTIATE_TEST_SUITE_P(LexerTest, FloatData{"1E5f", static_cast(1e5f)}, FloatData{"1e-5f", static_cast(1e-5f)}, FloatData{"1E-5f", static_cast(1e-5f)}, + // No decimal, with exponent and 'h' suffix + FloatData{"6e4h", static_cast(f16::Quantize(6e4f))}, + FloatData{"6E4h", static_cast(f16::Quantize(6e4f))}, + FloatData{"1e-5h", static_cast(f16::Quantize(1e-5f))}, + FloatData{"1E-5h", static_cast(f16::Quantize(1e-5f))}, // With decimal and exponents FloatData{"0.2e+12", 0.2e12}, FloatData{"1.2e-5", 1.2e-5}, @@ -393,9 +420,16 @@ INSTANTIATE_TEST_SUITE_P(LexerTest, FloatData{"2.57e23f", static_cast(2.57e23f)}, FloatData{"2.5e+0f", static_cast(2.5f)}, FloatData{"2.5e-0f", static_cast(2.5f)}, + // With decimal and exponents and 'h' suffix + FloatData{"0.2e+5h", static_cast(f16::Quantize(0.2e5f))}, + FloatData{"1.2e-5h", static_cast(f16::Quantize(1.2e-5f))}, + FloatData{"6.55e4h", static_cast(f16::Quantize(6.55e4f))}, + FloatData{"2.5e+0h", static_cast(f16::Quantize(2.5f))}, + FloatData{"2.5e-0h", static_cast(f16::Quantize(2.5f))}, // Quantization - FloatData{"3.141592653589793", 3.141592653589793}, // no quantization - FloatData{"3.141592653589793f", 3.1415927410125732} // f32 quantized + FloatData{"3.141592653589793", 3.141592653589793}, // no quantization + FloatData{"3.141592653589793f", 3.1415927410125732}, // f32 quantized + FloatData{"3.141592653589793h", 3.140625} // f16 quantized )); using FloatTest_Invalid = testing::TestWithParam; @@ -404,7 +438,8 @@ TEST_P(FloatTest_Invalid, Handles) { Lexer l(&file); auto t = l.next(); - EXPECT_FALSE(t.Is(Token::Type::kFloatLiteral)); + EXPECT_FALSE(t.Is(Token::Type::kFloatLiteral) || t.Is(Token::Type::kFloatLiteral_F) || + t.Is(Token::Type::kFloatLiteral_H)); } INSTANTIATE_TEST_SUITE_P(LexerTest, FloatTest_Invalid, @@ -423,9 +458,8 @@ INSTANTIATE_TEST_SUITE_P(LexerTest, // Overflow "2.5e+256f", "-2.5e+127f", - // Magnitude smaller than smallest positive f32. - "2.5e-300f", - "-2.5e-300f", + "6.5520e+4h", + "-6.5e+12h", // Decimal exponent must immediately // follow the 'e'. "2.5e 12", diff --git a/src/tint/reader/wgsl/parser_impl.cc b/src/tint/reader/wgsl/parser_impl.cc index 7fd1a810e3..ddd9e96f05 100644 --- a/src/tint/reader/wgsl/parser_impl.cc +++ b/src/tint/reader/wgsl/parser_impl.cc @@ -3018,6 +3018,10 @@ Maybe ParserImpl::const_literal() { return create(t.source(), t.to_f64(), ast::FloatLiteralExpression::Suffix::kF); } + if (match(Token::Type::kFloatLiteral_H)) { + return create(t.source(), t.to_f64(), + ast::FloatLiteralExpression::Suffix::kH); + } if (match(Token::Type::kTrue)) { return create(t.source(), true); } diff --git a/src/tint/reader/wgsl/parser_impl_const_literal_test.cc b/src/tint/reader/wgsl/parser_impl_const_literal_test.cc index b07ec1b66a..16861e407e 100644 --- a/src/tint/reader/wgsl/parser_impl_const_literal_test.cc +++ b/src/tint/reader/wgsl/parser_impl_const_literal_test.cc @@ -151,13 +151,18 @@ TEST_P(ParserImplFloatLiteralTest, Parse) { ASSERT_NE(c.value, nullptr); auto* literal = c->As(); ASSERT_NE(literal, nullptr); - EXPECT_DOUBLE_EQ(literal->value, params.expected) + // Use EXPECT_EQ instead of EXPECT_DOUBLE_EQ here, because EXPECT_DOUBLE_EQ use AlmostEquals(), + // which allows an error up to 4 ULPs. + EXPECT_EQ(literal->value, params.expected) << "\n" << "got: " << std::hexfloat << literal->value << "\n" << "expected: " << std::hexfloat << params.expected; if (params.input.back() == 'f') { EXPECT_EQ(c->As()->suffix, ast::FloatLiteralExpression::Suffix::kF); + } else if (params.input.back() == 'h') { + EXPECT_EQ(c->As()->suffix, + ast::FloatLiteralExpression::Suffix::kH); } else { EXPECT_EQ(c->As()->suffix, ast::FloatLiteralExpression::Suffix::kNone); @@ -181,6 +186,7 @@ INSTANTIATE_TEST_SUITE_P(ParserImplFloatLiteralTest_Float, {"234.e12", 234.e12}, {"234.e12f", static_cast(234.e12f)}, + {"234.e2h", static_cast(f16::Quantize(234.e2))}, // Tiny cases {"1e-5000", 0.0}, @@ -189,6 +195,12 @@ INSTANTIATE_TEST_SUITE_P(ParserImplFloatLiteralTest_Float, {"-1e-5000f", 0.0}, {"1e-50f", 0.0}, {"-1e-50f", 0.0}, + {"1e-5000h", 0.0}, + {"-1e-5000h", 0.0}, + {"1e-50h", 0.0}, + {"-1e-50h", 0.0}, + {"1e-8h", 0.0}, // The smallest positive subnormal f16 is 5.96e-8 + {"-1e-8h", 0.0}, // Nearly overflow {"1.e308", 1.e308}, @@ -209,6 +221,16 @@ INSTANTIATE_TEST_SUITE_P(ParserImplFloatLiteralTest_Float, {"-3.5e37f", static_cast(-3.5e37f)}, {"3.403e37f", static_cast(3.403e37f)}, {"-3.403e37f", static_cast(-3.403e37f)}, + + // Nearly overflow + {"6e4h", 6e4}, + {"-6e4h", -6e4}, + {"8.0e3h", 8.0e3}, + {"-8.0e3h", -8.0e3}, + {"3.5e3h", 3.5e3}, + {"-3.5e3h", -3.5e3}, + {"3.403e3h", 3.402e3}, // Quantized + {"-3.403e3h", -3.402e3}, // Quantized })); const double NegInf = MakeDouble(1, 0x7FF, 0); @@ -229,6 +251,10 @@ FloatLiteralTestCaseList HexFloatCases() { {"-0x1p-1", -0x1p-1}, {"-0x1p-2", -0x1p-2}, {"-0x1.8p-1", -0x1.8p-1}, + {"0x0.4p+1", 0x0.4p+1}, + {"0x0.02p+3", 0x0.02p+3}, + {"0x4.4p+1", 0x4.4p+1}, + {"0x8c.02p+3", 0x8c.02p+3}, // Large numbers {"0x1p+9", 0x1p+9}, @@ -257,6 +283,11 @@ FloatLiteralTestCaseList HexFloatCases() { {"-0x1p-124f", -0x1p-124}, {"-0x1p-125f", -0x1p-125}, + {"0x1p-12h", 0x1p-12}, + {"0x1p-13h", 0x1p-13}, + {"-0x1p-12h", -0x1p-12}, + {"-0x1p-13h", -0x1p-13}, + // Lowest non-denorm {"0x1p-1022", 0x1p-1022}, {"-0x1p-1022", -0x1p-1022}, @@ -264,9 +295,14 @@ FloatLiteralTestCaseList HexFloatCases() { {"0x1p-126f", 0x1p-126}, {"-0x1p-126f", -0x1p-126}, + {"0x1p-14h", 0x1p-14}, + {"-0x1p-14h", -0x1p-14}, + // Denormalized values {"0x1p-1023", 0x1p-1023}, + {"0x0.8p-1022", 0x0.8p-1022}, {"0x1p-1024", 0x1p-1024}, + {"0x0.2p-1021", 0x0.2p-1021}, {"0x1p-1025", 0x1p-1025}, {"0x1p-1026", 0x1p-1026}, {"-0x1p-1023", -0x1p-1023}, @@ -277,7 +313,9 @@ FloatLiteralTestCaseList HexFloatCases() { {"0x1.8p-1024", 0x1.8p-1024}, {"0x1p-127f", 0x1p-127}, + {"0x0.8p-126f", 0x0.8p-126}, {"0x1p-128f", 0x1p-128}, + {"0x0.2p-125f", 0x0.2p-125}, {"0x1p-129f", 0x1p-129}, {"0x1p-130f", 0x1p-130}, {"-0x1p-127f", -0x1p-127}, @@ -287,13 +325,28 @@ FloatLiteralTestCaseList HexFloatCases() { {"0x1.8p-127f", 0x1.8p-127}, {"0x1.8p-128f", 0x1.8p-128}, + {"0x1p-15h", 0x1p-15}, + {"0x0.8p-14h", 0x0.8p-14}, + {"0x1p-16h", 0x1p-16}, + {"0x0.2p-13h", 0x0.2p-13}, + {"0x1p-17h", 0x1p-17}, + {"0x1p-18h", 0x1p-18}, + {"-0x1p-15h", -0x1p-15}, + {"-0x1p-16h", -0x1p-16}, + {"-0x1p-17h", -0x1p-17}, + {"-0x1p-18h", -0x1p-18}, + {"0x1.8p-15h", 0x1.8p-15}, + {"0x1.8p-16h", 0x1.8p-16}, + // F64 extremities - {"0x1p-1074", 0x1p-1074}, // +SmallestDenormal - {"0x1p-1073", 0x1p-1073}, // +BiggerDenormal - {"0x1.ffffffffffffp-1027", 0x1.ffffffffffffp-1027}, // +LargestDenormal - {"-0x1p-1074", -0x1p-1074}, // -SmallestDenormal - {"-0x1p-1073", -0x1p-1073}, // -BiggerDenormal - {"-0x1.ffffffffffffp-1027", -0x1.ffffffffffffp-1027}, // -LargestDenormal + {"0x1p-1074", 0x1p-1074}, // +SmallestDenormal + {"0x1p-1073", 0x1p-1073}, // +BiggerDenormal + {"0x1.ffffffffffffep-1023", 0x1.ffffffffffffep-1023}, // +LargestDenormal + {"0x0.fffffffffffffp-1022", 0x0.fffffffffffffp-1022}, // +LargestDenormal + {"-0x1p-1074", -0x1p-1074}, // -SmallestDenormal + {"-0x1p-1073", -0x1p-1073}, // -BiggerDenormal + {"-0x1.ffffffffffffep-1023", -0x1.ffffffffffffep-1023}, // -LargestDenormal + {"-0x0.fffffffffffffp-1022", -0x0.fffffffffffffp-1022}, // -LargestDenormal {"0x0.cafebeeff000dp-1022", 0x0.cafebeeff000dp-1022}, // +Subnormal {"-0x0.cafebeeff000dp-1022", -0x0.cafebeeff000dp-1022}, // -Subnormal @@ -301,21 +354,29 @@ FloatLiteralTestCaseList HexFloatCases() { {"-0x1.2bfaf8p-1052", -0x1.2bfaf8p-1052}, // +Subnormal {"0x1.55554p-1055", 0x1.55554p-1055}, // +Subnormal {"-0x1.55554p-1055", -0x1.55554p-1055}, // -Subnormal + {"0x1.fffffffffffp-1027", 0x1.fffffffffffp-1027}, // +Subnormal, = 0x0.0fffffffffff8p-1022 + {"-0x1.fffffffffffp-1027", -0x1.fffffffffffp-1027}, // -Subnormal // F32 extremities - {"0x1p-149", 0x1p-149}, // +SmallestDenormal - {"0x1p-148", 0x1p-148}, // +BiggerDenormal - {"0x1.fffffcp-127", 0x1.fffffcp-127}, // +LargestDenormal - {"-0x1p-149", -0x1p-149}, // -SmallestDenormal - {"-0x1p-148", -0x1p-148}, // -BiggerDenormal - {"-0x1.fffffcp-127", -0x1.fffffcp-127}, // -LargestDenormal + {"0x1p-149f", 0x1p-149}, // +SmallestDenormal + {"0x1p-148f", 0x1p-148}, // +BiggerDenormal + {"0x1.fffffcp-127f", 0x1.fffffcp-127}, // +LargestDenormal + {"0x0.fffffep-126f", 0x0.fffffep-126}, // +LargestDenormal + {"0x1.0p-126f", 0x1.0p-126}, // +SmallestNormal + {"0x8.0p-129f", 0x8.0p-129}, // +SmallestNormal + {"-0x1p-149f", -0x1p-149}, // -SmallestDenormal + {"-0x1p-148f", -0x1p-148}, // -BiggerDenormal + {"-0x1.fffffcp-127f", -0x1.fffffcp-127}, // -LargestDenormal + {"-0x0.fffffep-126f", -0x0.fffffep-126}, // -LargestDenormal + {"-0x1.0p-126f", -0x1.0p-126}, // -SmallestNormal + {"-0x8.0p-129f", -0x8.0p-129}, // -SmallestNormal - {"0x0.cafebp-129", 0x0.cafebp-129}, // +Subnormal - {"-0x0.cafebp-129", -0x0.cafebp-129}, // -Subnormal - {"0x1.2bfaf8p-127", 0x1.2bfaf8p-127}, // +Subnormal - {"-0x1.2bfaf8p-127", -0x1.2bfaf8p-127}, // -Subnormal - {"0x1.55554p-130", 0x1.55554p-130}, // +Subnormal - {"-0x1.55554p-130", -0x1.55554p-130}, // -Subnormal + {"0x0.cafebp-129f", 0x0.cafebp-129}, // +Subnormal + {"-0x0.cafebp-129f", -0x0.cafebp-129}, // -Subnormal + {"0x1.2bfaf8p-127f", 0x1.2bfaf8p-127}, // +Subnormal + {"-0x1.2bfaf8p-127f", -0x1.2bfaf8p-127}, // -Subnormal + {"0x1.55554p-130f", 0x1.55554p-130}, // +Subnormal + {"-0x1.55554p-130f", -0x1.55554p-130}, // -Subnormal // F32 exactly representable {"0x1.000002p+0f", 0x1.000002p+0}, @@ -324,10 +385,47 @@ FloatLiteralTestCaseList HexFloatCases() { {"0x8.00003p+0f", 0x8.00003p+0}, {"0x2.123p+0f", 0x2.123p+0}, {"0x2.cafefp+0f", 0x2.cafefp+0}, + {"0x0.0000fep-126f", 0x0.0000fep-126}, // Subnormal + {"-0x0.0000fep-126f", -0x0.0000fep-126}, // Subnormal + {"0x3.f8p-144f", 0x3.f8p-144}, // Subnormal + {"-0x3.f8p-144f", -0x3.f8p-144}, // Subnormal + + // F16 extremities + {"0x1p-24h", 0x1p-24}, // +SmallestDenormal + {"0x1p-23h", 0x1p-23}, // +BiggerDenormal + {"0x1.ff8p-15h", 0x1.ff8p-15}, // +LargestDenormal + {"0x0.ffcp-14h", 0x0.ffcp-14}, // +LargestDenormal + {"0x1.0p-14h", 0x1.0p-14}, // +SmallestNormal + {"0x8.0p-17h", 0x8.0p-17}, // +SmallestNormal + {"-0x1p-24h", -0x1p-24}, // -SmallestDenormal + {"-0x1p-23h", -0x1p-23}, // -BiggerDenormal + {"-0x1.ff8p-15h", -0x1.ff8p-15}, // -LargestDenormal + {"-0x0.ffcp-14h", -0x0.ffcp-14}, // -LargestDenormal + {"-0x1.0p-14h", -0x1.0p-14}, // -SmallestNormal + {"-0x8.0p-17h", -0x8.0p-17}, // -SmallestNormal + + {"0x0.a8p-19h", 0x0.a8p-19}, // +Subnormal + {"-0x0.a8p-19h", -0x0.a8p-19}, // -Subnormal + {"0x1.7ap-17h", 0x1.7ap-17}, // +Subnormal + {"-0x1.7ap-17h", -0x1.7ap-17}, // -Subnormal + {"0x1.dp-20h", 0x1.dp-20}, // +Subnormal + {"-0x1.dp-20h", -0x1.dp-20}, // -Subnormal + + // F16 exactly representable + {"0x1.004p+0h", 0x1.004p+0}, + {"0x8.02p+0h", 0x8.02p+0}, + {"0x8.fep+0h", 0x8.fep+0}, + {"0x8.06p+0h", 0x8.06p+0}, + {"0x2.128p+0h", 0x2.128p+0}, + {"0x2.ca8p+0h", 0x2.ca8p+0}, + {"0x0.0fcp-14h", 0x0.0fcp-14}, // Subnormal + {"-0x0.0fcp-14h", -0x0.0fcp-14}, // Subnormal + {"0x3.f00p-20h", 0x3.f00p-20}, // Subnormal + {"-0x3.f00p-20h", -0x3.f00p-20}, // Subnormal // Underflow -> Zero - {"0x1p-1074", 0.0}, // Exponent underflows - {"-0x1p-1074", 0.0}, + {"0x1p-1075", 0.0}, // Exponent underflows + {"-0x1p-1075", 0.0}, {"0x1p-5000", 0.0}, {"-0x1p-5000", 0.0}, {"0x0.00000000000000000000001p-1022", 0.0}, // Fraction causes underflow @@ -399,6 +497,16 @@ FloatLiteralTestCaseList HexFloatCases() { {"-0x.8p2f", -2.0}, {"-0x1.8p-1f", -0.75}, {"-0x2p-2f", -0.5}, // No binary point + + // Examples with a binary exponent and a 'h' suffix. + {"0x1.p0h", 1.0}, + {"0x.8p2h", 2.0}, + {"0x1.8p-1h", 0.75}, + {"0x2p-2h", 0.5}, // No binary point + {"-0x1.p0h", -1.0}, + {"-0x.8p2h", -2.0}, + {"-0x1.8p-1h", -0.75}, + {"-0x2p-2h", -0.5}, // No binary point }; } INSTANTIATE_TEST_SUITE_P(ParserImplFloatLiteralTest_HexFloat, @@ -541,6 +649,23 @@ INSTANTIATE_TEST_SUITE_P( "-0x1.fffffep+128f", }))); +INSTANTIATE_TEST_SUITE_P( + HexNaNF16, + ParserImplInvalidLiteralTest, + testing::Combine(testing::Values("1:1: value cannot be represented as 'f16'"), + testing::ValuesIn(std::vector{ + "0x1.8p+16h", + "0x1.004p+16h", + "0x1.018p+16h", + "0x1.1ep+16h", + "0x1.ffcp+16h", + "-0x1.8p+16h", + "-0x1.004p+16h", + "-0x1.018p+16h", + "-0x1.1ep+16h", + "-0x1.ffcp+16h", + }))); + INSTANTIATE_TEST_SUITE_P( HexOverflowAFloat, ParserImplInvalidLiteralTest, @@ -577,17 +702,94 @@ INSTANTIATE_TEST_SUITE_P( "-0x32p+500f", }))); +INSTANTIATE_TEST_SUITE_P( + HexOverflowF16, + ParserImplInvalidLiteralTest, + testing::Combine(testing::Values("1:1: value cannot be represented as 'f16'"), + testing::ValuesIn(std::vector{ + "0x1p+16h", + "-0x1p+16h", + "0x1.1p+16h", + "-0x1.1p+16h", + "0x1p+17h", + "-0x1p+17h", + "0x32p+15h", + "-0x32p+15h", + "0x32p+500h", + "-0x32p+500h", + }))); + INSTANTIATE_TEST_SUITE_P( HexNotExactlyRepresentableF32, ParserImplInvalidLiteralTest, testing::Combine(testing::Values("1:1: value cannot be exactly represented as 'f32'"), testing::ValuesIn(std::vector{ - "0x1.000001p+0f", // Quantizes to 0x1.0p+0 - "0x8.0000f8p+0f", // Quantizes to 0x8.0000fp+0 - "0x8.000038p+0f", // Quantizes to 0x8.00003p+0 - "0x2.cafef00dp+0f", // Quantizes to 0x2.cafefp+0 + "0x1.000001p+0f", // Quantizes to 0x1.0p+0 + "0x1.0000008p+0f", // Quantizes to 0x1.0p+0 + "0x1.0000000000001p+0f", // Quantizes to 0x1.0p+0 + "0x8.0000f8p+0f", // Quantizes to 0x8.0000fp+0 + "0x8.000038p+0f", // Quantizes to 0x8.00003p+0 + "0x2.cafef00dp+0f", // Quantizes to 0x2.cafefp+0 + "0x0.0000ffp-126f", // Subnormal, quantizes to 0x0.0000fep-126 + "0x3.fcp-144f", // Subnormal, quantizes to 0x3.f8p-144 + "-0x0.0000ffp-126f", // Subnormal, quantizes to -0x0.0000fep-126 + "-0x3.fcp-144f", // Subnormal, quantizes to -0x3.f8p-144 + "0x0.ffffffp-126f", // Subnormal, quantizes to 0x0.fffffep-144 + "0x0.fffffe0000001p-126f", // Subnormal, quantizes to 0x0.fffffep-144 + "-0x0.ffffffp-126f", // Subnormal, quantizes to -0x0.fffffep-144 + "-0x0.fffffe0000001p-126f", // Subnormal, quantizes to -0x0.fffffep-144 + "0x1.8p-149f", // Subnormal, quantizes to 0x1.0p-149f + "0x1.4p-149f", // Subnormal, quantizes to 0x1.0p-149f + "0x1.000002p-149f", // Subnormal, quantizes to 0x1.0p-149f + "0x1.0000000000001p-149f", // Subnormal, quantizes to 0x1.0p-149f + "-0x1.8p-149f", // Subnormal, quantizes to -0x1.0p-149f + "-0x1.4p-149f", // Subnormal, quantizes to -0x1.0p-149f + "-0x1.000002p-149f", // Subnormal, quantizes to -0x1.0p-149f + "-0x1.0000000000001p-149f", // Subnormal, quantizes to -0x1.0p-149f + "0x1.0p-150f", // Smaller than the smallest subnormal, quantizes to 0.0 + "0x1.8p-150f", // Smaller than the smallest subnormal, quantizes to 0.0 + "-0x1.0p-150f", // Smaller than the smallest subnormal, quantizes to -0.0 + "-0x1.8p-150f", // Smaller than the smallest subnormal, quantizes to -0.0 }))); +INSTANTIATE_TEST_SUITE_P( + HexNotExactlyRepresentableF16, + ParserImplInvalidLiteralTest, + testing::Combine( + testing::Values("1:1: value cannot be exactly represented as 'f16'"), + testing::ValuesIn(std::vector{ + "0x1.002p+0h", // Quantizes to 0x1.0p+0, has 11 mantissa bits rather than 10 + "0x1.001p+0h", // Quantizes to 0x1.0p+0, has 12 mantissa bits rather than 10 + "0x1.0000000000001p+0h", // Quantizes to 0x1.0p+0, has 52 mantissa bits rather than 10 + "0x8.0fp+0h", // Quantizes to 0x8.0ep+0 + "0x8.31p+0h", // Quantizes to 0x8.30p+0 + "0x2.ca80dp+0h", // Quantizes to 0x2.ca8p+0 + "0x4.ba8p+0h", // Quantizes to 0x4.bap+0 + "0x4.011p+0h", // Quantizes to 0x4.01p+0 + "0x0.0fep-14h", // Subnormal, quantizes to 0x0.0fcp-14 + "0x3.f8p-20h", // Subnormal, quantizes to 0x3.f0p-20 + "-0x0.0fep-14h", // Subnormal, quantizes to -0x0.0fcp-14 + "-0x3.f8p-20h", // Subnormal, quantizes to -0x3.f0p-20 + "0x0.ffep-14h", // Subnormal, quantizes to 0x0.ffcp-14 + "0x0.ffe0000000001p-14h", // Subnormal, quantizes to 0x0.ffcp-14 + "0x0.fffffffffffffp-14h", // Subnormal, quantizes to 0x0.ffcp-14 + "-0x0.ffep-14h", // Subnormal, quantizes to -0x0.ffcp-14 + "-0x0.ffe0000000001p-14h", // Subnormal, quantizes to -0x0.ffcp-14 + "-0x0.fffffffffffffp-14h", // Subnormal, quantizes to -0x0.ffcp-14 + "0x1.8p-24h", // Subnormal, quantizes to 0x1.0p-24f + "0x1.4p-24h", // Subnormal, quantizes to 0x1.0p-24f + "0x1.004p-24h", // Subnormal, quantizes to 0x1.0p-24f + "0x1.0000000000001p-24h", // Subnormal, quantizes to 0x1.0p-24f + "-0x1.8p-24h", // Subnormal, quantizes to -0x1.0p-24f + "-0x1.4p-24h", // Subnormal, quantizes to -0x1.0p-24f + "-0x1.004p-24h", // Subnormal, quantizes to -0x1.0p-24f + "-0x1.0000000000001p-24h", // Subnormal, quantizes to -0x1.0p-24f + "0x1.0p-25h", // Smaller than the smallest subnormal, quantizes to 0.0 + "0x1.8p-25h", // Smaller than the smallest subnormal, quantizes to 0.0 + "-0x1.0p-25h", // Smaller than the smallest subnormal, quantizes to -0.0 + "-0x1.8p-25h", // Smaller than the smallest subnormal, quantizes to -0.0 + }))); + INSTANTIATE_TEST_SUITE_P( DecOverflowAFloat, ParserImplInvalidLiteralTest, @@ -622,6 +824,25 @@ INSTANTIATE_TEST_SUITE_P( "-1.2e+256f", }))); +INSTANTIATE_TEST_SUITE_P( + DecOverflowF16, + ParserImplInvalidLiteralTest, + testing::Combine(testing::Values("1:1: value cannot be represented as 'f16'"), + testing::ValuesIn(std::vector{ + "1.0e5h", + "-1.0e5h", + "7.0e4h", + "-7.0e4h", + "6.6e4h", + "-6.6e4h", + "6.56e4h", + "-6.56e4h", + "6.554e4h", + "-6.554e4h", + "1.2e+32h", + "-1.2e+32h", + }))); + TEST_F(ParserImplTest, ConstLiteral_FloatHighest) { const auto highest = std::numeric_limits::max(); const auto expected_highest = 340282346638528859811704183484516925440.0f; @@ -636,8 +857,7 @@ TEST_F(ParserImplTest, ConstLiteral_FloatHighest) { EXPECT_FALSE(p->has_error()) << p->error(); ASSERT_NE(c.value, nullptr); ASSERT_TRUE(c->Is()); - EXPECT_DOUBLE_EQ(c->As()->value, - std::numeric_limits::max()); + EXPECT_EQ(c->As()->value, std::numeric_limits::max()); EXPECT_EQ(c->As()->suffix, ast::FloatLiteralExpression::Suffix::kNone); EXPECT_EQ(c->source.range, (Source::Range{{1u, 1u}, {1u, 42u}})); @@ -660,8 +880,7 @@ TEST_F(ParserImplTest, ConstLiteral_FloatLowest) { EXPECT_FALSE(p->has_error()) << p->error(); ASSERT_NE(c.value, nullptr); ASSERT_TRUE(c->Is()); - EXPECT_DOUBLE_EQ(c->As()->value, - std::numeric_limits::lowest()); + EXPECT_EQ(c->As()->value, std::numeric_limits::lowest()); EXPECT_EQ(c->As()->suffix, ast::FloatLiteralExpression::Suffix::kNone); EXPECT_EQ(c->source.range, (Source::Range{{1u, 1u}, {1u, 43u}})); diff --git a/src/tint/reader/wgsl/token.cc b/src/tint/reader/wgsl/token.cc index 4680eee0d7..dcd72cff02 100644 --- a/src/tint/reader/wgsl/token.cc +++ b/src/tint/reader/wgsl/token.cc @@ -29,6 +29,8 @@ std::string_view Token::TypeToName(Type type) { return "abstract float literal"; case Token::Type::kFloatLiteral_F: return "'f'-suffixed float literal"; + case Token::Type::kFloatLiteral_H: + return "'h'-suffixed float literal"; case Token::Type::kIntLiteral: return "abstract integer literal"; case Token::Type::kIntLiteral_I: @@ -311,6 +313,8 @@ std::string Token::to_str() const { return std::to_string(std::get(value_)); case Type::kFloatLiteral_F: return std::to_string(std::get(value_)) + "f"; + case Type::kFloatLiteral_H: + return std::to_string(std::get(value_)) + "h"; case Type::kIntLiteral: return std::to_string(std::get(value_)); case Type::kIntLiteral_I: diff --git a/src/tint/reader/wgsl/token.h b/src/tint/reader/wgsl/token.h index 0a68f9b055..9587f360c0 100644 --- a/src/tint/reader/wgsl/token.h +++ b/src/tint/reader/wgsl/token.h @@ -42,6 +42,8 @@ class Token { kFloatLiteral, /// A float literal with an 'f' suffix kFloatLiteral_F, + /// A float literal with an 'h' suffix + kFloatLiteral_H, /// An integer literal with no suffix kIntLiteral, /// An integer literal with an 'i' suffix diff --git a/src/tint/resolver/resolver.cc b/src/tint/resolver/resolver.cc index 90ae4b0822..b1f5a95b1c 100644 --- a/src/tint/resolver/resolver.cc +++ b/src/tint/resolver/resolver.cc @@ -1658,10 +1658,15 @@ sem::Expression* Resolver::Literal(const ast::LiteralExpression* literal) { return nullptr; }, [&](const ast::FloatLiteralExpression* f) -> sem::Type* { - if (f->suffix == ast::FloatLiteralExpression::Suffix::kNone) { - return builder_->create(); + switch (f->suffix) { + case ast::FloatLiteralExpression::Suffix::kNone: + return builder_->create(); + case ast::FloatLiteralExpression::Suffix::kF: + return builder_->create(); + case ast::FloatLiteralExpression::Suffix::kH: + return builder_->create(); } - return builder_->create(); + return nullptr; }, [&](const ast::BoolLiteralExpression*) { return builder_->create(); }, [&](Default) { return nullptr; }); @@ -1672,6 +1677,11 @@ sem::Expression* Resolver::Literal(const ast::LiteralExpression* literal) { return nullptr; } + if ((ty->Is()) && (!enabled_extensions_.contains(tint::ast::Extension::kF16))) { + AddError("f16 literal used without 'f16' extension enabled", literal->source); + return nullptr; + } + auto val = EvaluateConstantValue(literal, ty); if (!val) { return nullptr; diff --git a/src/tint/resolver/resolver_test.cc b/src/tint/resolver/resolver_test.cc index 16725ba88a..38f4d49441 100644 --- a/src/tint/resolver/resolver_test.cc +++ b/src/tint/resolver/resolver_test.cc @@ -2134,5 +2134,22 @@ TEST_F(ResolverTest, MaxExpressionDepth_Fail) { std::to_string(kMaxExpressionDepth))); } +TEST_F(ResolverTest, Literal_F16WithoutExtension) { + // fn test() {_ = 1.23h;} + WrapInFunction(Ignore(Expr(f16(1.23f)))); + + EXPECT_FALSE(r()->Resolve()); + EXPECT_THAT(r()->error(), HasSubstr("error: f16 literal used without 'f16' extension enabled")); +} + +TEST_F(ResolverTest, Literal_F16WithExtension) { + // enable f16; + // fn test() {_ = 1.23h;} + Enable(ast::Extension::kF16); + WrapInFunction(Ignore(Expr(f16(1.23f)))); + + EXPECT_TRUE(r()->Resolve()); +} + } // namespace } // namespace tint::resolver