Terminate line comments with \v, \f, and \r

The WGSL spec says that line comments are terminated by any blankspace
other than a space or a horizontal tab.

Also rename is_whitespace to is_blankspace and tighten up the
definition to only include the characters listed in the WGSL spec.

Change-Id: I4fee0175980ab70e9baf107a6e79ab5c2e4f906d
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/83920
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: James Price <jrprice@google.com>
This commit is contained in:
James Price 2022-03-16 21:45:44 +00:00 committed by Tint LUCI CQ
parent 2a761f736a
commit 453d5ae84e
4 changed files with 54 additions and 15 deletions

View File

@ -28,8 +28,10 @@ namespace reader {
namespace wgsl {
namespace {
bool is_whitespace(char c) {
return std::isspace(static_cast<unsigned char>(c));
bool is_blankspace(char c) {
// See https://www.w3.org/TR/WGSL/#blankspace.
return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' ||
c == '\r';
}
uint32_t dec_value(char c) {
@ -62,7 +64,7 @@ Lexer::Lexer(const Source::File* file)
Lexer::~Lexer() = default;
Token Lexer::next() {
if (auto t = skip_whitespace_and_comments(); !t.IsUninitialized()) {
if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) {
return t;
}
@ -128,10 +130,10 @@ bool Lexer::matches(size_t pos, std::string_view substr) {
return file_->content.data_view.substr(pos, substr.size()) == substr;
}
Token Lexer::skip_whitespace_and_comments() {
Token Lexer::skip_blankspace_and_comments() {
for (;;) {
auto pos = pos_;
while (!is_eof() && is_whitespace(file_->content.data[pos_])) {
while (!is_eof() && is_blankspace(file_->content.data[pos_])) {
if (matches(pos_, "\n")) {
pos_++;
location_.line++;
@ -148,7 +150,7 @@ Token Lexer::skip_whitespace_and_comments() {
return t;
}
// If the cursor didn't advance we didn't remove any whitespace
// If the cursor didn't advance we didn't remove any blankspace
// so we're done.
if (pos == pos_)
break;
@ -162,9 +164,10 @@ Token Lexer::skip_whitespace_and_comments() {
Token Lexer::skip_comment() {
if (matches(pos_, "//")) {
// Line comment: ignore everything until the end of line
// or end of input.
while (!is_eof() && !matches(pos_, "\n")) {
// Line comment: ignore everything until the end of input or a blankspace
// character other than space or horizontal tab.
while (!is_eof() && !(is_blankspace(file_->content.data[pos_]) &&
!matches(pos_, " ") && !matches(pos_, "\t"))) {
if (is_null()) {
return {Token::Type::kError, begin_source(), "null character found"};
}

View File

@ -36,10 +36,9 @@ class Lexer {
Token next();
private:
/// Advances past whitespace and comments, if present
/// at the current position.
/// Advances past blankspace and comments, if present at the current position.
/// @returns error token, EOF, or uninitialized
Token skip_whitespace_and_comments();
Token skip_blankspace_and_comments();
/// Advances past a comment at the current position, if one exists.
/// Returns an error if there was an unterminated block comment,
/// or a null character was present.

View File

@ -32,7 +32,7 @@ TEST_F(LexerTest, Empty) {
EXPECT_TRUE(t.IsEof());
}
TEST_F(LexerTest, Skips_Whitespace) {
TEST_F(LexerTest, Skips_Blankspace) {
Source::File file("", "\t\r\n\t ident\t\n\t \r ");
Lexer l(&file);
@ -75,6 +75,43 @@ ident1 //ends with comment
EXPECT_TRUE(t.IsEof());
}
using LineCommentTerminatorTest = testing::TestWithParam<char>;
TEST_P(LineCommentTerminatorTest, Terminators) {
// Test that line comments are ended by blankspace characters other than space
// and horizontal tab.
char c = GetParam();
std::string src = "let// This is a comment";
src += c;
src += "ident";
Source::File file("", src);
Lexer l(&file);
auto t = l.next();
EXPECT_TRUE(t.Is(Token::Type::kLet));
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 4u);
if (c != ' ' && c != '\t') {
size_t line = c == '\n' ? 2u : 1u;
size_t col = c == '\n' ? 1u : 25u;
t = l.next();
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, line);
EXPECT_EQ(t.source().range.begin.column, col);
EXPECT_EQ(t.source().range.end.line, line);
EXPECT_EQ(t.source().range.end.column, col + 5);
EXPECT_EQ(t.to_str(), "ident");
}
t = l.next();
EXPECT_TRUE(t.IsEof());
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
LineCommentTerminatorTest,
testing::Values(' ', '\t', '\n', '\v', '\f', '\r'));
TEST_F(LexerTest, Skips_Comments_Block) {
Source::File file("", R"(/* comment
text */ident)");
@ -128,7 +165,7 @@ abcd)");
EXPECT_EQ(t.source().range.end.column, 4u);
}
TEST_F(LexerTest, Null_InWhitespace_IsError) {
TEST_F(LexerTest, Null_InBlankspace_IsError) {
Source::File file("", std::string{' ', 0, ' '});
Lexer l(&file);

View File

@ -32,7 +32,7 @@ class TextGenerator {
public:
/// Line holds a single line of text
struct Line {
/// The indentation of the line in whitespaces
/// The indentation of the line in blankspace
uint32_t indent = 0;
/// The content of the line, without a trailing newline character
std::string content;