null character on input is an error

Fixes: tint:1311
Change-Id: Id80adc2c14c6d2cd5ee884e081d1d84f021e6620
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/72200
Auto-Submit: David Neto <dneto@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: David Neto <dneto@google.com>
This commit is contained in:
David Neto 2021-12-09 15:57:30 +00:00 committed by Tint LUCI CQ
parent 170a50353c
commit 01e4b54497
3 changed files with 78 additions and 5 deletions

View File

@ -96,7 +96,8 @@ Token Lexer::next() {
return t;
}
return {Token::Type::kError, begin_source(), "invalid character found"};
return {Token::Type::kError, begin_source(),
(is_null() ? "null character found" : "invalid character found")};
}
Source Lexer::begin_source() const {
@ -116,6 +117,10 @@ bool Lexer::is_eof() const {
return pos_ >= len_;
}
bool Lexer::is_null() const {
return (pos_ < len_) && (content_->data[pos_] == 0);
}
bool Lexer::is_alpha(char ch) const {
return std::isalpha(ch);
}
@ -175,6 +180,9 @@ Token Lexer::skip_comment() {
// Line comment: ignore everything until the end of line
// or end of input.
while (!is_eof() && !matches(pos_, "\n")) {
if (is_null()) {
return {Token::Type::kError, begin_source(), "null character found"};
}
pos_++;
location_.column++;
}
@ -208,6 +216,8 @@ Token Lexer::skip_comment() {
pos_++;
location_.line++;
location_.column = 1;
} else if (is_null()) {
return {Token::Type::kError, begin_source(), "null character found"};
} else {
// Anything else: skip and update source location.
pos_++;
@ -653,10 +663,11 @@ Token Lexer::try_hex_integer() {
end++;
}
if (!matches(end, "0x")) {
if (matches(end, "0x")) {
end += 2;
} else {
return {};
}
end += 2;
auto first = end;
while (!is_eof() && is_hex(content_->data[end])) {

View File

@ -41,8 +41,9 @@ class Lexer {
/// at the current position.
/// @returns error token, EOF, or uninitialized
Token skip_whitespace_and_comments();
/// Advances past a comment at the current position,
/// if one exists.
/// Advances past a comment at the current position, if one exists.
/// Returns an error if there was an unterminated block comment,
/// or a null character was present.
/// @returns uninitialized token on success, or error
Token skip_comment();
@ -72,6 +73,9 @@ class Lexer {
/// @returns true if the end of the input has been reached.
bool is_eof() const;
/// @returns true if there is another character on the input and
/// it is not null.
bool is_null() const;
/// @param ch a character
/// @returns true if 'ch' is an alphabetic character
bool is_alpha(char ch) const;

View File

@ -128,6 +128,64 @@ abcd)");
EXPECT_EQ(t.source().range.end.column, 4u);
}
TEST_F(LexerTest, Null_InWhitespace_IsError) {
Source::FileContent content(std::string{' ', 0, ' '});
Lexer l("test.wgsl", &content);
auto t = l.next();
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 2u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 2u);
EXPECT_EQ(t.to_str(), "null character found");
}
TEST_F(LexerTest, Null_InLineComment_IsError) {
Source::FileContent content(std::string{'/', '/', ' ', 0, ' '});
Lexer l("test.wgsl", &content);
auto t = l.next();
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 4u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 4u);
EXPECT_EQ(t.to_str(), "null character found");
}
TEST_F(LexerTest, Null_InBlockComment_IsError) {
Source::FileContent content(std::string{'/', '*', ' ', 0, '*', '/'});
Lexer l("test.wgsl", &content);
auto t = l.next();
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 4u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 4u);
EXPECT_EQ(t.to_str(), "null character found");
}
TEST_F(LexerTest, Null_InIdentifier_IsError) {
// Try inserting a null in an identifier. Other valid token
// kinds will behave similarly, so use the identifier case
// as a representative.
Source::FileContent content(std::string{'a', 0, 'c'});
Lexer l("test.wgsl", &content);
auto t = l.next();
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.to_str(), "a");
t = l.next();
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 2u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 2u);
EXPECT_EQ(t.to_str(), "null character found");
}
struct FloatData {
const char* input;
float result;