HexFoat: detect exponent overflow and report errors

Make ParserImp::const_literal() bubble up any error by the tokenizer. These were being ignored. Also: * Detect and report significand too large * Detect and report missing exponent * Fix invalid mantissa overflow detection for fractional trailing zeroes * Fix zero with non-zero exponent triggering an assert, and instead, make the result zero (added tests for this). Bug: chromium:1235132 Bug: tint:77 Change-Id: I364a4c944121a2c55ff3161de1bb50126c8a5526 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60680 Reviewed-by: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Antonio Maiorano <amaiorano@google.com>
2025-12-12 14:46:08 +00:00 · 2021-08-03 17:12:59 +00:00
parent b75e4b96a6
commit 72f9ce98d1
5 changed files with 141 additions and 51 deletions
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc
@@ -17,6 +17,7 @@
 #include <cmath>
 #include <cstring>
 #include <limits>
+#include <utility>

 #include "src/debug.h"

@@ -309,8 +310,12 @@ Token Lexer::try_hex_float() {
  // `set_next_mantissa_bit_to` sets next `mantissa` bit starting from msb to
  // lsb to value 1 if `set` is true, 0 otherwise
  uint32_t mantissa_next_bit = kTotalMsb;
-  auto set_next_mantissa_bit_to = [&](bool set) -> bool {
-    if (mantissa_next_bit > kTotalMsb) {
+  auto set_next_mantissa_bit_to = [&](bool set, bool integer_part) -> bool {
+    // If adding bits for the integer part, we can overflow whether we set the
+    // bit or not. For the fractional part, we can only overflow when setting
+    // the bit.
+    const bool check_overflow = integer_part || set;
+    if (check_overflow && (mantissa_next_bit > kTotalMsb)) {
      return false;  // Overflowed mantissa
    }
    if (set) {
@@ -320,26 +325,56 @@ Token Lexer::try_hex_float() {
    return true;
  };

+  // Collect integer range (if any)
+  auto integer_range = std::make_pair(end, end);
+  while (end < len_ && is_hex(content_->data[end])) {
+    integer_range.second = ++end;
+  }
+
+  // .?
+  if (matches(end, ".")) {
+    end++;
+  }
+
+  // Collect fractional range (if any)
+  auto fractional_range = std::make_pair(end, end);
+  while (end < len_ && is_hex(content_->data[end])) {
+    fractional_range.second = ++end;
+  }
+
+  // Must have at least an integer or fractional part
+  if ((integer_range.first == integer_range.second) &&
+      (fractional_range.first == fractional_range.second)) {
+    return {};
+  }
+
+  // (p|P)
+  if (matches(end, "p") || matches(end, "P")) {
+    end++;
+  } else {
+    return {};
+  }
+
+  // At this point, we know for sure our token is a hex float value.
+
  // Parse integer part
  // [0-9a-fA-F]*
-  bool has_integer = false;
  bool has_zero_integer = true;
  bool leading_bit_seen = false;
-  while (end < len_ && is_hex(content_->data[end])) {
-    has_integer = true;
-
-    const auto nibble = hex_value(content_->data[end]);
+  for (auto i = integer_range.first; i < integer_range.second; ++i) {
+    const auto nibble = hex_value(content_->data[i]);
    if (nibble != 0) {
      has_zero_integer = false;
    }

-    for (int32_t i = 3; i >= 0; --i) {
-      auto v = 1 & (nibble >> i);
+    for (int32_t bit = 3; bit >= 0; --bit) {
+      auto v = 1 & (nibble >> bit);

      // Skip leading 0s and the first 1
      if (leading_bit_seen) {
-        if (!set_next_mantissa_bit_to(v != 0)) {
-          return {};
+        if (!set_next_mantissa_bit_to(v != 0, true)) {
+          return {Token::Type::kError, source,
+                  "mantissa is too large for hex float"};
        }
        ++exponent;
      } else {
@@ -348,24 +383,15 @@ Token Lexer::try_hex_float() {
        }
      }
    }
-
-    end++;
-  }
-
-  // .?
-  if (matches(end, ".")) {
-    end++;
  }

  // Parse fractional part
  // [0-9a-fA-F]*
-  bool has_fractional = false;
  leading_bit_seen = false;
-  while (end < len_ && is_hex(content_->data[end])) {
-    has_fractional = true;
-    auto nibble = hex_value(content_->data[end]);
-    for (int32_t i = 3; i >= 0; --i) {
-      auto v = 1 & (nibble >> i);
+  for (auto i = fractional_range.first; i < fractional_range.second; ++i) {
+    auto nibble = hex_value(content_->data[i]);
+    for (int32_t bit = 3; bit >= 0; --bit) {
+      auto v = 1 & (nibble >> bit);

      if (v == 1) {
        leading_bit_seen = true;
@@ -377,24 +403,12 @@ Token Lexer::try_hex_float() {
      if (has_zero_integer && !leading_bit_seen) {
        --exponent;
      } else {
-        if (!set_next_mantissa_bit_to(v != 0)) {
-          return {};
+        if (!set_next_mantissa_bit_to(v != 0, false)) {
+          return {Token::Type::kError, source,
+                  "mantissa is too large for hex float"};
        }
      }
    }
-
-    end++;
-  }
-
-  if (!(has_integer || has_fractional)) {
-    return {};
-  }
-
-  // (p|P)
-  if (matches(end, "p") || matches(end, "P")) {
-    end++;
-  } else {
-    return {};
  }

  // (+|-)?
@@ -409,14 +423,20 @@ Token Lexer::try_hex_float() {
  // Parse exponent from input
  // [0-9]+
  bool has_exponent = false;
-  int32_t input_exponent = 0;
+  uint32_t input_exponent = 0;
  while (end < len_ && isdigit(content_->data[end])) {
    has_exponent = true;
+    auto prev_exponent = input_exponent;
    input_exponent = (input_exponent * 10) + dec_value(content_->data[end]);
+    if (prev_exponent > input_exponent) {
+      return {Token::Type::kError, source,
+              "exponent is too large for hex float"};
+    }
    end++;
  }
  if (!has_exponent) {
-    return {};
+    return {Token::Type::kError, source,
+            "expected an exponent value for hex float"};
  }

  pos_ = end;
@@ -430,9 +450,12 @@ Token Lexer::try_hex_float() {
  // Note: it's not enough to check mantissa == 0 as we drop initial bit from
  // integer part.
  bool is_zero = has_zero_integer && mantissa == 0;
-  TINT_ASSERT(Reader, !is_zero || (exponent == 0 && mantissa == 0));
+  TINT_ASSERT(Reader, !is_zero || mantissa == 0);

-  if (!is_zero) {
+  if (is_zero) {
+    // If value is zero, then ignore the exponent and produce a zero
+    exponent = 0;
+  } else {
    // Bias exponent if non-zero
    // After this, if exponent is <= 0, our value is a denormal
    exponent += kExponentBias;
--- a/src/reader/wgsl/parser_impl.cc
+++ b/src/reader/wgsl/parser_impl.cc
@@ -2819,6 +2819,9 @@ Maybe<ast::AssignmentStatement*> ParserImpl::assignment_stmt() {
 //   | FALSE
 Maybe<ast::Literal*> ParserImpl::const_literal() {
  auto t = peek();
+  if (t.IsError()) {
+    return add_error(t.source(), t.to_str());
+  }
  if (match(Token::Type::kTrue)) {
    return create<ast::BoolLiteral>(t.source(), true);
  }
@@ -2835,7 +2838,8 @@ Maybe<ast::Literal*> ParserImpl::const_literal() {
    auto p = peek();
    if (p.IsIdentifier() && p.to_str() == "f") {
      next();  // Consume 'f'
-      add_error(p.source(), "float literals must not be suffixed with 'f'");
+      return add_error(p.source(),
+                       "float literals must not be suffixed with 'f'");
    }
    return create<ast::FloatLiteral>(t.source(), t.to_f32());
  }
--- a/src/reader/wgsl/parser_impl_const_literal_test.cc
+++ b/src/reader/wgsl/parser_impl_const_literal_test.cc
@@ -81,7 +81,8 @@ TEST_F(ParserImplTest, ConstLiteral_InvalidFloat) {
  auto p = parser("1.2e+256");
  auto c = p->const_literal();
  EXPECT_FALSE(c.matched);
-  EXPECT_FALSE(c.errored);
+  EXPECT_TRUE(c.errored);
+  EXPECT_EQ(p->error(), "1:1: f32 (1.2e+256) too large");
  ASSERT_EQ(c.value, nullptr);
 }

@@ -228,6 +229,13 @@ FloatLiteralTestCase hexfloat_literal_test_cases[] = {
    {"0x0.01p-142", 0.f},
    {"-0x0.01p-142", -0.f},  // Fraction causes additional underflow

+    // Zero with non-zero exponent -> Zero
+    {"0x0p+0", 0.f},
+    {"0x0p+1", 0.f},
+    {"0x0p-1", 0.f},
+    {"0x0p+9999999999", 0.f},
+    {"0x0p-9999999999", 0.f},
+
    // Test parsing
    {"0x0p0", 0.f},
    {"0x0p-0", 0.f},
@@ -252,6 +260,59 @@ INSTANTIATE_TEST_SUITE_P(ParserImplFloatLiteralTest_HexFloat,
                         ParserImplFloatLiteralTest,
                         testing::ValuesIn(hexfloat_literal_test_cases));

+struct InvalidLiteralTestCase {
+  const char* input;
+  const char* error_msg;
+};
+class ParserImplInvalidLiteralTest
+    : public ParserImplTestWithParam<InvalidLiteralTestCase> {};
+TEST_P(ParserImplInvalidLiteralTest, Parse) {
+  auto params = GetParam();
+  SCOPED_TRACE(params.input);
+  auto p = parser(params.input);
+  auto c = p->const_literal();
+  EXPECT_FALSE(c.matched);
+  EXPECT_TRUE(c.errored);
+  EXPECT_EQ(p->error(), params.error_msg);
+  ASSERT_EQ(c.value, nullptr);
+}
+
+InvalidLiteralTestCase invalid_hexfloat_mantissa_too_large_cases[] = {
+    {"0x1.ffffffff8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1f.fffffff8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1ff.ffffff8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1fff.fffff8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1ffff.ffff8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1fffff.fff8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1ffffff.ff8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1fffffff.f8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1ffffffff.8p0", "1:1: mantissa is too large for hex float"},
+    {"0x1ffffffff8.p0", "1:1: mantissa is too large for hex float"},
+};
+INSTANTIATE_TEST_SUITE_P(
+    ParserImplInvalidLiteralTest_HexFloatMantissaTooLarge,
+    ParserImplInvalidLiteralTest,
+    testing::ValuesIn(invalid_hexfloat_mantissa_too_large_cases));
+
+InvalidLiteralTestCase invalid_hexfloat_exponent_too_large_cases[] = {
+    {"0x0p+4294967296", "1:1: exponent is too large for hex float"},
+    {"0x0p-4294967296", "1:1: exponent is too large for hex float"},
+};
+INSTANTIATE_TEST_SUITE_P(
+    ParserImplInvalidLiteralTest_HexFloatExponentTooLarge,
+    ParserImplInvalidLiteralTest,
+    testing::ValuesIn(invalid_hexfloat_exponent_too_large_cases));
+
+InvalidLiteralTestCase invalid_hexfloat_exponent_missing_cases[] = {
+    {"0x0p", "1:1: expected an exponent value for hex float"},
+    {"0x1.0p", "1:1: expected an exponent value for hex float"},
+    {"0x0.1p", "1:1: expected an exponent value for hex float"},
+};
+INSTANTIATE_TEST_SUITE_P(
+    ParserImplInvalidLiteralTest_HexFloatExponentMissing,
+    ParserImplInvalidLiteralTest,
+    testing::ValuesIn(invalid_hexfloat_exponent_missing_cases));
+
 TEST_F(ParserImplTest, ConstLiteral_FloatHighest) {
  const auto highest = std::numeric_limits<float>::max();
  const auto expected_highest = 340282346638528859811704183484516925440.0f;
--- a/src/reader/wgsl/parser_impl_error_msg_test.cc
+++ b/src/reader/wgsl/parser_impl_error_msg_test.cc
@@ -332,23 +332,23 @@ TEST_F(ParserImplErrorTest, FunctionDeclDecoWorkgroupSizeMissingRParen) {
 }

 TEST_F(ParserImplErrorTest, FunctionDeclDecoWorkgroupSizeXInvalid) {
-  EXPECT("[[workgroup_size(@)]] fn f() {}",
+  EXPECT("[[workgroup_size()]] fn f() {}",
         "test.wgsl:1:18 error: expected workgroup_size x parameter\n"
-         "[[workgroup_size(@)]] fn f() {}\n"
+         "[[workgroup_size()]] fn f() {}\n"
         "                 ^\n");
 }

 TEST_F(ParserImplErrorTest, FunctionDeclDecoWorkgroupSizeYInvalid) {
-  EXPECT("[[workgroup_size(1, @)]] fn f() {}",
+  EXPECT("[[workgroup_size(1, )]] fn f() {}",
         "test.wgsl:1:21 error: expected workgroup_size y parameter\n"
-         "[[workgroup_size(1, @)]] fn f() {}\n"
+         "[[workgroup_size(1, )]] fn f() {}\n"
         "                    ^\n");
 }

 TEST_F(ParserImplErrorTest, FunctionDeclDecoWorkgroupSizeZInvalid) {
-  EXPECT("[[workgroup_size(1, 2, @)]] fn f() {}",
+  EXPECT("[[workgroup_size(1, 2, )]] fn f() {}",
         "test.wgsl:1:24 error: expected workgroup_size z parameter\n"
-         "[[workgroup_size(1, 2, @)]] fn f() {}\n"
+         "[[workgroup_size(1, 2, )]] fn f() {}\n"
         "                       ^\n");
 }

--- a/src/reader/wgsl/token.h
+++ b/src/reader/wgsl/token.h
@@ -372,6 +372,8 @@ class Token {
  bool IsUninitialized() const { return type_ == Type::kUninitialized; }
  /// @returns true if the token is EOF
  bool IsEof() const { return type_ == Type::kEOF; }
+  /// @returns true if the token is Error
+  bool IsError() const { return type_ == Type::kError; }
  /// @returns true if the token is an identifier
  bool IsIdentifier() const { return type_ == Type::kIdentifier; }
  /// @returns true if the token is a literal