Block comments must be terminated before end of input

Lexer methods scanning for comments and whitespace can now return an error. Fixes: tint:1309 Change-Id: Ica8e393d3410b1bda2a293db0d9b0006770770ea Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/69361 Auto-Submit: David Neto <dneto@google.com> Commit-Queue: James Price <jrprice@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: James Price <jrprice@google.com>
2025-12-11 14:41:50 +00:00 · 2021-11-17 18:55:31 +00:00
parent be11f9f9ca
commit d018d2e5bc
4 changed files with 69 additions and 16 deletions
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc
@@ -61,14 +61,12 @@ Lexer::Lexer(const std::string& file_path, const Source::FileContent* content)
 Lexer::~Lexer() = default;

 Token Lexer::next() {
-  skip_whitespace();
-  skip_comments();
-
-  if (is_eof()) {
-    return {Token::Type::kEOF, begin_source()};
+  auto t = skip_whitespace_and_comments();
+  if (!t.IsUninitialized()) {
+    return t;
  }

-  auto t = try_hex_float();
+  t = try_hex_float();
  if (!t.IsUninitialized()) {
    return t;
  }
@@ -140,7 +138,7 @@ bool Lexer::matches(size_t pos, const std::string& substr) {
  return content_->data.substr(pos, substr.size()) == substr;
 }

-void Lexer::skip_whitespace() {
+Token Lexer::skip_whitespace_and_comments() {
  for (;;) {
    auto pos = pos_;
    while (!is_eof() && is_whitespace(content_->data[pos_])) {
@@ -155,27 +153,41 @@ void Lexer::skip_whitespace() {
      location_.column++;
    }

-    skip_comments();
+    auto t = skip_comment();
+    if (!t.IsUninitialized()) {
+      return t;
+    }

    // If the cursor didn't advance we didn't remove any whitespace
    // so we're done.
    if (pos == pos_)
      break;
  }
+  if (is_eof()) {
+    return {Token::Type::kEOF, begin_source()};
  }

-void Lexer::skip_comments() {
+  return {};
+}
+
+Token Lexer::skip_comment() {
  if (matches(pos_, "//")) {
-    // Line comment: ignore everything until the end of line.
+    // Line comment: ignore everything until the end of line
+    // or end of input.
    while (!is_eof() && !matches(pos_, "\n")) {
      pos_++;
      location_.column++;
    }
-    return;
+    return {};
  }

  if (matches(pos_, "/*")) {
    // Block comment: ignore everything until the closing '*/' token.
+
+    // Record source location of the initial '/*'
+    auto source = begin_source();
+    source.range.end.column += 1;
+
    pos_ += 2;
    location_.column += 2;

@@ -202,8 +214,12 @@ void Lexer::skip_comments() {
        location_.column++;
      }
    }
+    if (depth > 0) {
+      return {Token::Type::kError, source, "unterminated block comment"};
    }
  }
+  return {};
+}

 Token Lexer::try_float() {
  auto start = pos_;
--- a/src/reader/wgsl/lexer.h
+++ b/src/reader/wgsl/lexer.h
@@ -32,13 +32,19 @@ class Lexer {
  Lexer(const std::string& file_path, const Source::FileContent* content);
  ~Lexer();

-  /// Returns the next token in the input stream
+  /// Returns the next token in the input stream.
  /// @return Token
  Token next();

 private:
-  void skip_whitespace();
-  void skip_comments();
+  /// Advances past whitespace and comments, if present
+  /// at the current position.
+  /// @returns uninitialized token on success, or error
+  Token skip_whitespace_and_comments();
+  /// Advances past a comment at the current position,
+  /// if one exists.
+  /// @returns uninitialized token on success, or error
+  Token skip_comment();

  Token build_token_from_int_if_possible(Source source,
                                         size_t start,
@@ -55,6 +61,7 @@ class Lexer {
  Source begin_source() const;
  void end_source(Source&) const;

+  /// @returns true if the end of the input has been reached.
  bool is_eof() const;
  /// @param ch a character
  /// @returns true if 'ch' is an alphabetic character
--- a/src/reader/wgsl/lexer_test.cc
+++ b/src/reader/wgsl/lexer_test.cc
@@ -110,6 +110,24 @@ text // nested line comments are ignored /* more text
  EXPECT_TRUE(t.IsEof());
 }

+TEST_F(LexerTest, Skips_Comments_Block_Unterminated) {
+  // I had to break up the /* because otherwise the clang readability check
+  // errored out saying it could not find the end of a multi-line comment.
+  Source::FileContent content(R"(
+  /)"
+                              R"(*
+abcd)");
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  ASSERT_TRUE(t.Is(Token::Type::kError));
+  EXPECT_EQ(t.to_str(), "unterminated block comment");
+  EXPECT_EQ(t.source().range.begin.line, 2u);
+  EXPECT_EQ(t.source().range.begin.column, 3u);
+  EXPECT_EQ(t.source().range.end.line, 2u);
+  EXPECT_EQ(t.source().range.end.column, 4u);
+}
+
 struct FloatData {
  const char* input;
  float result;
--- a/src/reader/wgsl/parser_impl_test.cc
+++ b/src/reader/wgsl/parser_impl_test.cc
@@ -85,7 +85,7 @@ fn main() {
  EXPECT_EQ(p->error(), "5:1: exponent is too large for hex float");
 }

-TEST_F(ParserImplTest, Comments) {
+TEST_F(ParserImplTest, Comments_TerminatedBlockComment) {
  auto p = parser(R"(
 /**
 * Here is my shader.
@@ -99,12 +99,24 @@ no
 parameters
 */) -> [[location(0)]] vec4<f32> {
  return/*block_comments_delimit_tokens*/vec4<f32>(.4, .2, .3, 1);
-}/* unterminated block comments are OK at EOF...)");
+}/* block comments are OK at EOF...*/)");

  ASSERT_TRUE(p->Parse()) << p->error();
  ASSERT_EQ(1u, p->program().AST().Functions().size());
 }

+TEST_F(ParserImplTest, Comments_UnterminatedBlockComment) {
+  auto p = parser(R"(
+[[stage(fragment)]]
+fn main() -> [[location(0)]] vec4<f32> {
+  return vec4<f32>(.4, .2, .3, 1);
+} /* unterminated block comments are invalid ...)");
+
+  ASSERT_FALSE(p->Parse());
+  ASSERT_TRUE(p->has_error());
+  EXPECT_EQ(p->error(), "5:3: unterminated block comment") << p->error();
+}
+
 }  // namespace
 }  // namespace wgsl
 }  // namespace reader