Terminate line comments with \v, \f, and \r

The WGSL spec says that line comments are terminated by any blankspace other than a space or a horizontal tab. Also rename is_whitespace to is_blankspace and tighten up the definition to only include the characters listed in the WGSL spec. Change-Id: I4fee0175980ab70e9baf107a6e79ab5c2e4f906d Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/83920 Reviewed-by: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: James Price <jrprice@google.com>
2025-07-07 05:36:04 +00:00 · 2022-03-16 21:45:44 +00:00 · 2022-03-16 21:45:44 +00:00 · 453d5ae84e
commit 453d5ae84e
parent 2a761f736a
4 changed files with 54 additions and 15 deletions
--- a/src/tint/reader/wgsl/lexer.cc
+++ b/src/tint/reader/wgsl/lexer.cc
@ -28,8 +28,10 @@ namespace reader {
 namespace wgsl {
 namespace {

-bool is_whitespace(char c) {
-  return std::isspace(static_cast<unsigned char>(c));
+bool is_blankspace(char c) {
+  // See https://www.w3.org/TR/WGSL/#blankspace.
+  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' ||
+         c == '\r';
 }

 uint32_t dec_value(char c) {
@ -62,7 +64,7 @@ Lexer::Lexer(const Source::File* file)
 Lexer::~Lexer() = default;

 Token Lexer::next() {
-  if (auto t = skip_whitespace_and_comments(); !t.IsUninitialized()) {
+  if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) {
    return t;
  }

@ -128,10 +130,10 @@ bool Lexer::matches(size_t pos, std::string_view substr) {
  return file_->content.data_view.substr(pos, substr.size()) == substr;
 }

-Token Lexer::skip_whitespace_and_comments() {
+Token Lexer::skip_blankspace_and_comments() {
  for (;;) {
    auto pos = pos_;
-    while (!is_eof() && is_whitespace(file_->content.data[pos_])) {
+    while (!is_eof() && is_blankspace(file_->content.data[pos_])) {
      if (matches(pos_, "\n")) {
        pos_++;
        location_.line++;
@ -148,7 +150,7 @@ Token Lexer::skip_whitespace_and_comments() {
      return t;
    }

-    // If the cursor didn't advance we didn't remove any whitespace
+    // If the cursor didn't advance we didn't remove any blankspace
    // so we're done.
    if (pos == pos_)
      break;
@ -162,9 +164,10 @@ Token Lexer::skip_whitespace_and_comments() {

 Token Lexer::skip_comment() {
  if (matches(pos_, "//")) {
-    // Line comment: ignore everything until the end of line
-    // or end of input.
-    while (!is_eof() && !matches(pos_, "\n")) {
+    // Line comment: ignore everything until the end of input or a blankspace
+    // character other than space or horizontal tab.
+    while (!is_eof() && !(is_blankspace(file_->content.data[pos_]) &&
+                          !matches(pos_, " ") && !matches(pos_, "\t"))) {
      if (is_null()) {
        return {Token::Type::kError, begin_source(), "null character found"};
      }
--- a/src/tint/reader/wgsl/lexer.h
+++ b/src/tint/reader/wgsl/lexer.h
@ -36,10 +36,9 @@ class Lexer {
  Token next();

 private:
-  /// Advances past whitespace and comments, if present
-  /// at the current position.
+  /// Advances past blankspace and comments, if present at the current position.
  /// @returns error token, EOF, or uninitialized
-  Token skip_whitespace_and_comments();
+  Token skip_blankspace_and_comments();
  /// Advances past a comment at the current position, if one exists.
  /// Returns an error if there was an unterminated block comment,
  /// or a null character was present.
--- a/src/tint/reader/wgsl/lexer_test.cc
+++ b/src/tint/reader/wgsl/lexer_test.cc
@ -32,7 +32,7 @@ TEST_F(LexerTest, Empty) {
  EXPECT_TRUE(t.IsEof());
 }

-TEST_F(LexerTest, Skips_Whitespace) {
+TEST_F(LexerTest, Skips_Blankspace) {
  Source::File file("", "\t\r\n\t    ident\t\n\t  \r ");
  Lexer l(&file);

@ -75,6 +75,43 @@ ident1 //ends with comment
  EXPECT_TRUE(t.IsEof());
 }

+using LineCommentTerminatorTest = testing::TestWithParam<char>;
+TEST_P(LineCommentTerminatorTest, Terminators) {
+  // Test that line comments are ended by blankspace characters other than space
+  // and horizontal tab.
+  char c = GetParam();
+  std::string src = "let// This is a comment";
+  src += c;
+  src += "ident";
+  Source::File file("", src);
+  Lexer l(&file);
+
+  auto t = l.next();
+  EXPECT_TRUE(t.Is(Token::Type::kLet));
+  EXPECT_EQ(t.source().range.begin.line, 1u);
+  EXPECT_EQ(t.source().range.begin.column, 1u);
+  EXPECT_EQ(t.source().range.end.line, 1u);
+  EXPECT_EQ(t.source().range.end.column, 4u);
+
+  if (c != ' ' && c != '\t') {
+    size_t line = c == '\n' ? 2u : 1u;
+    size_t col = c == '\n' ? 1u : 25u;
+    t = l.next();
+    EXPECT_TRUE(t.IsIdentifier());
+    EXPECT_EQ(t.source().range.begin.line, line);
+    EXPECT_EQ(t.source().range.begin.column, col);
+    EXPECT_EQ(t.source().range.end.line, line);
+    EXPECT_EQ(t.source().range.end.column, col + 5);
+    EXPECT_EQ(t.to_str(), "ident");
+  }
+
+  t = l.next();
+  EXPECT_TRUE(t.IsEof());
+}
+INSTANTIATE_TEST_SUITE_P(LexerTest,
+                         LineCommentTerminatorTest,
+                         testing::Values(' ', '\t', '\n', '\v', '\f', '\r'));
+
 TEST_F(LexerTest, Skips_Comments_Block) {
  Source::File file("", R"(/* comment
 text */ident)");
@ -128,7 +165,7 @@ abcd)");
  EXPECT_EQ(t.source().range.end.column, 4u);
 }

-TEST_F(LexerTest, Null_InWhitespace_IsError) {
+TEST_F(LexerTest, Null_InBlankspace_IsError) {
  Source::File file("", std::string{' ', 0, ' '});
  Lexer l(&file);

--- a/src/tint/writer/text_generator.h
+++ b/src/tint/writer/text_generator.h
@ -32,7 +32,7 @@ class TextGenerator {
 public:
  /// Line holds a single line of text
  struct Line {
-    /// The indentation of the line in whitespaces
+    /// The indentation of the line in blankspace
    uint32_t indent = 0;
    /// The content of the line, without a trailing newline character
    std::string content;