diff --git a/src/reader/wgsl/lexer.cc b/src/reader/wgsl/lexer.cc index c22b5a2c17..00c0f8d79d 100644 --- a/src/reader/wgsl/lexer.cc +++ b/src/reader/wgsl/lexer.cc @@ -96,7 +96,8 @@ Token Lexer::next() { return t; } - return {Token::Type::kError, begin_source(), "invalid character found"}; + return {Token::Type::kError, begin_source(), + (is_null() ? "null character found" : "invalid character found")}; } Source Lexer::begin_source() const { @@ -116,6 +117,10 @@ bool Lexer::is_eof() const { return pos_ >= len_; } +bool Lexer::is_null() const { + return (pos_ < len_) && (content_->data[pos_] == 0); +} + bool Lexer::is_alpha(char ch) const { return std::isalpha(ch); } @@ -175,6 +180,9 @@ Token Lexer::skip_comment() { // Line comment: ignore everything until the end of line // or end of input. while (!is_eof() && !matches(pos_, "\n")) { + if (is_null()) { + return {Token::Type::kError, begin_source(), "null character found"}; + } pos_++; location_.column++; } @@ -208,6 +216,8 @@ Token Lexer::skip_comment() { pos_++; location_.line++; location_.column = 1; + } else if (is_null()) { + return {Token::Type::kError, begin_source(), "null character found"}; } else { // Anything else: skip and update source location. pos_++; @@ -653,10 +663,11 @@ Token Lexer::try_hex_integer() { end++; } - if (!matches(end, "0x")) { + if (matches(end, "0x")) { + end += 2; + } else { return {}; } - end += 2; auto first = end; while (!is_eof() && is_hex(content_->data[end])) { diff --git a/src/reader/wgsl/lexer.h b/src/reader/wgsl/lexer.h index d84e654613..c823f593ae 100644 --- a/src/reader/wgsl/lexer.h +++ b/src/reader/wgsl/lexer.h @@ -41,8 +41,9 @@ class Lexer { /// at the current position. /// @returns error token, EOF, or uninitialized Token skip_whitespace_and_comments(); - /// Advances past a comment at the current position, - /// if one exists. + /// Advances past a comment at the current position, if one exists. + /// Returns an error if there was an unterminated block comment, + /// or a null character was present. /// @returns uninitialized token on success, or error Token skip_comment(); @@ -72,6 +73,9 @@ class Lexer { /// @returns true if the end of the input has been reached. bool is_eof() const; + /// @returns true if there is another character on the input and + /// it is not null. + bool is_null() const; /// @param ch a character /// @returns true if 'ch' is an alphabetic character bool is_alpha(char ch) const; diff --git a/src/reader/wgsl/lexer_test.cc b/src/reader/wgsl/lexer_test.cc index b1f1742037..24faeb397f 100644 --- a/src/reader/wgsl/lexer_test.cc +++ b/src/reader/wgsl/lexer_test.cc @@ -128,6 +128,64 @@ abcd)"); EXPECT_EQ(t.source().range.end.column, 4u); } +TEST_F(LexerTest, Null_InWhitespace_IsError) { + Source::FileContent content(std::string{' ', 0, ' '}); + Lexer l("test.wgsl", &content); + + auto t = l.next(); + EXPECT_TRUE(t.IsError()); + EXPECT_EQ(t.source().range.begin.line, 1u); + EXPECT_EQ(t.source().range.begin.column, 2u); + EXPECT_EQ(t.source().range.end.line, 1u); + EXPECT_EQ(t.source().range.end.column, 2u); + EXPECT_EQ(t.to_str(), "null character found"); +} + +TEST_F(LexerTest, Null_InLineComment_IsError) { + Source::FileContent content(std::string{'/', '/', ' ', 0, ' '}); + Lexer l("test.wgsl", &content); + + auto t = l.next(); + EXPECT_TRUE(t.IsError()); + EXPECT_EQ(t.source().range.begin.line, 1u); + EXPECT_EQ(t.source().range.begin.column, 4u); + EXPECT_EQ(t.source().range.end.line, 1u); + EXPECT_EQ(t.source().range.end.column, 4u); + EXPECT_EQ(t.to_str(), "null character found"); +} + +TEST_F(LexerTest, Null_InBlockComment_IsError) { + Source::FileContent content(std::string{'/', '*', ' ', 0, '*', '/'}); + Lexer l("test.wgsl", &content); + + auto t = l.next(); + EXPECT_TRUE(t.IsError()); + EXPECT_EQ(t.source().range.begin.line, 1u); + EXPECT_EQ(t.source().range.begin.column, 4u); + EXPECT_EQ(t.source().range.end.line, 1u); + EXPECT_EQ(t.source().range.end.column, 4u); + EXPECT_EQ(t.to_str(), "null character found"); +} + +TEST_F(LexerTest, Null_InIdentifier_IsError) { + // Try inserting a null in an identifier. Other valid token + // kinds will behave similarly, so use the identifier case + // as a representative. + Source::FileContent content(std::string{'a', 0, 'c'}); + Lexer l("test.wgsl", &content); + + auto t = l.next(); + EXPECT_TRUE(t.IsIdentifier()); + EXPECT_EQ(t.to_str(), "a"); + t = l.next(); + EXPECT_TRUE(t.IsError()); + EXPECT_EQ(t.source().range.begin.line, 1u); + EXPECT_EQ(t.source().range.begin.column, 2u); + EXPECT_EQ(t.source().range.end.line, 1u); + EXPECT_EQ(t.source().range.end.column, 2u); + EXPECT_EQ(t.to_str(), "null character found"); +} + struct FloatData { const char* input; float result;