Block comments must be terminated before end of input

Lexer methods scanning for comments and whitespace can now
return an error.

Fixes: tint:1309
Change-Id: Ica8e393d3410b1bda2a293db0d9b0006770770ea
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/69361
Auto-Submit: David Neto <dneto@google.com>
Commit-Queue: James Price <jrprice@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: James Price <jrprice@google.com>
This commit is contained in:
David Neto 2021-11-17 18:55:31 +00:00 committed by Tint LUCI CQ
parent be11f9f9ca
commit d018d2e5bc
4 changed files with 69 additions and 16 deletions

View File

@ -61,14 +61,12 @@ Lexer::Lexer(const std::string& file_path, const Source::FileContent* content)
Lexer::~Lexer() = default; Lexer::~Lexer() = default;
Token Lexer::next() { Token Lexer::next() {
skip_whitespace(); auto t = skip_whitespace_and_comments();
skip_comments(); if (!t.IsUninitialized()) {
return t;
if (is_eof()) {
return {Token::Type::kEOF, begin_source()};
} }
auto t = try_hex_float(); t = try_hex_float();
if (!t.IsUninitialized()) { if (!t.IsUninitialized()) {
return t; return t;
} }
@ -140,7 +138,7 @@ bool Lexer::matches(size_t pos, const std::string& substr) {
return content_->data.substr(pos, substr.size()) == substr; return content_->data.substr(pos, substr.size()) == substr;
} }
void Lexer::skip_whitespace() { Token Lexer::skip_whitespace_and_comments() {
for (;;) { for (;;) {
auto pos = pos_; auto pos = pos_;
while (!is_eof() && is_whitespace(content_->data[pos_])) { while (!is_eof() && is_whitespace(content_->data[pos_])) {
@ -155,27 +153,41 @@ void Lexer::skip_whitespace() {
location_.column++; location_.column++;
} }
skip_comments(); auto t = skip_comment();
if (!t.IsUninitialized()) {
return t;
}
// If the cursor didn't advance we didn't remove any whitespace // If the cursor didn't advance we didn't remove any whitespace
// so we're done. // so we're done.
if (pos == pos_) if (pos == pos_)
break; break;
} }
if (is_eof()) {
return {Token::Type::kEOF, begin_source()};
}
return {};
} }
void Lexer::skip_comments() { Token Lexer::skip_comment() {
if (matches(pos_, "//")) { if (matches(pos_, "//")) {
// Line comment: ignore everything until the end of line. // Line comment: ignore everything until the end of line
// or end of input.
while (!is_eof() && !matches(pos_, "\n")) { while (!is_eof() && !matches(pos_, "\n")) {
pos_++; pos_++;
location_.column++; location_.column++;
} }
return; return {};
} }
if (matches(pos_, "/*")) { if (matches(pos_, "/*")) {
// Block comment: ignore everything until the closing '*/' token. // Block comment: ignore everything until the closing '*/' token.
// Record source location of the initial '/*'
auto source = begin_source();
source.range.end.column += 1;
pos_ += 2; pos_ += 2;
location_.column += 2; location_.column += 2;
@ -202,7 +214,11 @@ void Lexer::skip_comments() {
location_.column++; location_.column++;
} }
} }
if (depth > 0) {
return {Token::Type::kError, source, "unterminated block comment"};
}
} }
return {};
} }
Token Lexer::try_float() { Token Lexer::try_float() {

View File

@ -32,13 +32,19 @@ class Lexer {
Lexer(const std::string& file_path, const Source::FileContent* content); Lexer(const std::string& file_path, const Source::FileContent* content);
~Lexer(); ~Lexer();
/// Returns the next token in the input stream /// Returns the next token in the input stream.
/// @return Token /// @return Token
Token next(); Token next();
private: private:
void skip_whitespace(); /// Advances past whitespace and comments, if present
void skip_comments(); /// at the current position.
/// @returns uninitialized token on success, or error
Token skip_whitespace_and_comments();
/// Advances past a comment at the current position,
/// if one exists.
/// @returns uninitialized token on success, or error
Token skip_comment();
Token build_token_from_int_if_possible(Source source, Token build_token_from_int_if_possible(Source source,
size_t start, size_t start,
@ -55,6 +61,7 @@ class Lexer {
Source begin_source() const; Source begin_source() const;
void end_source(Source&) const; void end_source(Source&) const;
/// @returns true if the end of the input has been reached.
bool is_eof() const; bool is_eof() const;
/// @param ch a character /// @param ch a character
/// @returns true if 'ch' is an alphabetic character /// @returns true if 'ch' is an alphabetic character

View File

@ -110,6 +110,24 @@ text // nested line comments are ignored /* more text
EXPECT_TRUE(t.IsEof()); EXPECT_TRUE(t.IsEof());
} }
TEST_F(LexerTest, Skips_Comments_Block_Unterminated) {
// I had to break up the /* because otherwise the clang readability check
// errored out saying it could not find the end of a multi-line comment.
Source::FileContent content(R"(
/)"
R"(*
abcd)");
Lexer l("test.wgsl", &content);
auto t = l.next();
ASSERT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(), "unterminated block comment");
EXPECT_EQ(t.source().range.begin.line, 2u);
EXPECT_EQ(t.source().range.begin.column, 3u);
EXPECT_EQ(t.source().range.end.line, 2u);
EXPECT_EQ(t.source().range.end.column, 4u);
}
struct FloatData { struct FloatData {
const char* input; const char* input;
float result; float result;

View File

@ -85,7 +85,7 @@ fn main() {
EXPECT_EQ(p->error(), "5:1: exponent is too large for hex float"); EXPECT_EQ(p->error(), "5:1: exponent is too large for hex float");
} }
TEST_F(ParserImplTest, Comments) { TEST_F(ParserImplTest, Comments_TerminatedBlockComment) {
auto p = parser(R"( auto p = parser(R"(
/** /**
* Here is my shader. * Here is my shader.
@ -99,12 +99,24 @@ no
parameters parameters
*/) -> [[location(0)]] vec4<f32> { */) -> [[location(0)]] vec4<f32> {
return/*block_comments_delimit_tokens*/vec4<f32>(.4, .2, .3, 1); return/*block_comments_delimit_tokens*/vec4<f32>(.4, .2, .3, 1);
}/* unterminated block comments are OK at EOF...)"); }/* block comments are OK at EOF...*/)");
ASSERT_TRUE(p->Parse()) << p->error(); ASSERT_TRUE(p->Parse()) << p->error();
ASSERT_EQ(1u, p->program().AST().Functions().size()); ASSERT_EQ(1u, p->program().AST().Functions().size());
} }
TEST_F(ParserImplTest, Comments_UnterminatedBlockComment) {
auto p = parser(R"(
[[stage(fragment)]]
fn main() -> [[location(0)]] vec4<f32> {
return vec4<f32>(.4, .2, .3, 1);
} /* unterminated block comments are invalid ...)");
ASSERT_FALSE(p->Parse());
ASSERT_TRUE(p->has_error());
EXPECT_EQ(p->error(), "5:3: unterminated block comment") << p->error();
}
} // namespace } // namespace
} // namespace wgsl } // namespace wgsl
} // namespace reader } // namespace reader