Check number of digits in integer during tokenization

While looking ahead to determine if a token is an integer, check the
number of digits to make sure that it can actually fit in the internal
representation.

This is an optimization on the existing code, to cause an early exit
and prevent pathological cases with huge integers from consuming too
much processing time, when they will never succeed.

From a functional perspective this has not effect on whether or not a
token will be accepted as an integer, so almost all of the tests do no
need an update. The one exception is a case where the lexer now
catches the invalid integer earlier in the tokenization, so the error
message is a shorter.

This does not handle the equivalent problem for float literals, though
I believe that only exists for non-hex floats.

BUG=chromium:1240715

Change-Id: I27e43711d5f5eda1d54a4128ba514f810abd0313
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/62280
Auto-Submit: Ryan Harrison <rharrison@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
This commit is contained in:
Ryan Harrison 2021-08-27 08:29:37 +00:00 committed by Tint LUCI CQ
parent 9021eb5594
commit 200cdd2052
2 changed files with 65 additions and 5 deletions

View File

@ -543,6 +543,7 @@ Token Lexer::build_token_from_int_if_possible(Source source,
}
Token Lexer::try_hex_integer() {
constexpr size_t kMaxDigits = 8; // Valid for both 32-bit integer types
auto start = pos_;
auto end = pos_;
@ -551,13 +552,23 @@ Token Lexer::try_hex_integer() {
if (matches(end, "-")) {
end++;
}
if (!matches(end, "0x")) {
return Token();
return {};
}
end += 2;
auto first = end;
while (!is_eof() && is_hex(content_->data[end])) {
end += 1;
end++;
auto digits = end - first;
if (digits > kMaxDigits) {
return {Token::Type::kError, source,
"integer literal (" +
content_->data.substr(start, end - 1 - start) +
"...) has too many digits"};
}
}
pos_ = end;
@ -567,6 +578,7 @@ Token Lexer::try_hex_integer() {
}
Token Lexer::try_integer() {
constexpr size_t kMaxDigits = 10; // Valid for both 32-bit integer types
auto start = pos_;
auto end = start;
@ -575,6 +587,7 @@ Token Lexer::try_integer() {
if (matches(end, "-")) {
end++;
}
if (end >= len_ || !is_digit(content_->data[end])) {
return {};
}
@ -582,6 +595,14 @@ Token Lexer::try_integer() {
auto first = end;
while (end < len_ && is_digit(content_->data[end])) {
end++;
auto digits = end - first;
if (digits > kMaxDigits) {
return {Token::Type::kError, source,
"integer literal (" +
content_->data.substr(start, end - 1 - start) +
"...) has too many digits"};
}
}
// If the first digit is a zero this must only be zero as leading zeros

View File

@ -251,6 +251,27 @@ TEST_F(LexerTest, IntegerTest_HexSignedTooSmall) {
EXPECT_EQ(t.to_str(), "i32 (-0x8000000F) too small");
}
TEST_F(LexerTest, IntegerTest_HexSignedTooManyDigits) {
{
Source::FileContent content("-0x100000000000000000000000");
Lexer l("test.wgsl", &content);
auto t = l.next();
ASSERT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(),
"integer literal (-0x10000000...) has too many digits");
}
{
Source::FileContent content("0x100000000000000");
Lexer l("test.wgsl", &content);
auto t = l.next();
ASSERT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(),
"integer literal (0x10000000...) has too many digits");
}
}
struct HexUnsignedIntData {
const char* input;
uint32_t result;
@ -287,13 +308,13 @@ INSTANTIATE_TEST_SUITE_P(
HexUnsignedIntData{"0xFFFFFFFFu",
std::numeric_limits<uint32_t>::max()}));
TEST_F(LexerTest, IntegerTest_HexUnsignedTooLarge) {
Source::FileContent content("0xffffffffffu");
TEST_F(LexerTest, IntegerTest_HexUnsignedTooManyDigits) {
Source::FileContent content("0x1000000000000000000000u");
Lexer l("test.wgsl", &content);
auto t = l.next();
ASSERT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(), "u32 (0xffffffffff) too large");
EXPECT_EQ(t.to_str(), "integer literal (0x10000000...) has too many digits");
}
struct UnsignedIntData {
@ -325,6 +346,15 @@ INSTANTIATE_TEST_SUITE_P(LexerTest,
UnsignedIntData{"4294967295u",
4294967295u}));
TEST_F(LexerTest, IntegerTest_UnsignedTooManyDigits) {
Source::FileContent content("10000000000000000000000u");
Lexer l("test.wgsl", &content);
auto t = l.next();
ASSERT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(), "integer literal (1000000000...) has too many digits");
}
struct SignedIntData {
const char* input;
int32_t result;
@ -357,6 +387,15 @@ INSTANTIATE_TEST_SUITE_P(
SignedIntData{"2147483647", 2147483647},
SignedIntData{"-2147483648", -2147483648LL}));
TEST_F(LexerTest, IntegerTest_SignedTooManyDigits) {
Source::FileContent content("-10000000000000000");
Lexer l("test.wgsl", &content);
auto t = l.next();
ASSERT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(), "integer literal (-1000000000...) has too many digits");
}
using IntegerTest_Invalid = testing::TestWithParam<const char*>;
TEST_P(IntegerTest_Invalid, Parses) {
Source::FileContent content(GetParam());