Check number of digits in integer during tokenization
While looking ahead to determine if a token is an integer, check the number of digits to make sure that it can actually fit in the internal representation. This is an optimization on the existing code, to cause an early exit and prevent pathological cases with huge integers from consuming too much processing time, when they will never succeed. From a functional perspective this has not effect on whether or not a token will be accepted as an integer, so almost all of the tests do no need an update. The one exception is a case where the lexer now catches the invalid integer earlier in the tokenization, so the error message is a shorter. This does not handle the equivalent problem for float literals, though I believe that only exists for non-hex floats. BUG=chromium:1240715 Change-Id: I27e43711d5f5eda1d54a4128ba514f810abd0313 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/62280 Auto-Submit: Ryan Harrison <rharrison@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Reviewed-by: Ben Clayton <bclayton@google.com>
This commit is contained in:
parent
9021eb5594
commit
200cdd2052
|
@ -543,6 +543,7 @@ Token Lexer::build_token_from_int_if_possible(Source source,
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::try_hex_integer() {
|
Token Lexer::try_hex_integer() {
|
||||||
|
constexpr size_t kMaxDigits = 8; // Valid for both 32-bit integer types
|
||||||
auto start = pos_;
|
auto start = pos_;
|
||||||
auto end = pos_;
|
auto end = pos_;
|
||||||
|
|
||||||
|
@ -551,13 +552,23 @@ Token Lexer::try_hex_integer() {
|
||||||
if (matches(end, "-")) {
|
if (matches(end, "-")) {
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!matches(end, "0x")) {
|
if (!matches(end, "0x")) {
|
||||||
return Token();
|
return {};
|
||||||
}
|
}
|
||||||
end += 2;
|
end += 2;
|
||||||
|
|
||||||
|
auto first = end;
|
||||||
while (!is_eof() && is_hex(content_->data[end])) {
|
while (!is_eof() && is_hex(content_->data[end])) {
|
||||||
end += 1;
|
end++;
|
||||||
|
|
||||||
|
auto digits = end - first;
|
||||||
|
if (digits > kMaxDigits) {
|
||||||
|
return {Token::Type::kError, source,
|
||||||
|
"integer literal (" +
|
||||||
|
content_->data.substr(start, end - 1 - start) +
|
||||||
|
"...) has too many digits"};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pos_ = end;
|
pos_ = end;
|
||||||
|
@ -567,6 +578,7 @@ Token Lexer::try_hex_integer() {
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::try_integer() {
|
Token Lexer::try_integer() {
|
||||||
|
constexpr size_t kMaxDigits = 10; // Valid for both 32-bit integer types
|
||||||
auto start = pos_;
|
auto start = pos_;
|
||||||
auto end = start;
|
auto end = start;
|
||||||
|
|
||||||
|
@ -575,6 +587,7 @@ Token Lexer::try_integer() {
|
||||||
if (matches(end, "-")) {
|
if (matches(end, "-")) {
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end >= len_ || !is_digit(content_->data[end])) {
|
if (end >= len_ || !is_digit(content_->data[end])) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -582,6 +595,14 @@ Token Lexer::try_integer() {
|
||||||
auto first = end;
|
auto first = end;
|
||||||
while (end < len_ && is_digit(content_->data[end])) {
|
while (end < len_ && is_digit(content_->data[end])) {
|
||||||
end++;
|
end++;
|
||||||
|
|
||||||
|
auto digits = end - first;
|
||||||
|
if (digits > kMaxDigits) {
|
||||||
|
return {Token::Type::kError, source,
|
||||||
|
"integer literal (" +
|
||||||
|
content_->data.substr(start, end - 1 - start) +
|
||||||
|
"...) has too many digits"};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the first digit is a zero this must only be zero as leading zeros
|
// If the first digit is a zero this must only be zero as leading zeros
|
||||||
|
|
|
@ -251,6 +251,27 @@ TEST_F(LexerTest, IntegerTest_HexSignedTooSmall) {
|
||||||
EXPECT_EQ(t.to_str(), "i32 (-0x8000000F) too small");
|
EXPECT_EQ(t.to_str(), "i32 (-0x8000000F) too small");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(LexerTest, IntegerTest_HexSignedTooManyDigits) {
|
||||||
|
{
|
||||||
|
Source::FileContent content("-0x100000000000000000000000");
|
||||||
|
Lexer l("test.wgsl", &content);
|
||||||
|
|
||||||
|
auto t = l.next();
|
||||||
|
ASSERT_TRUE(t.Is(Token::Type::kError));
|
||||||
|
EXPECT_EQ(t.to_str(),
|
||||||
|
"integer literal (-0x10000000...) has too many digits");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Source::FileContent content("0x100000000000000");
|
||||||
|
Lexer l("test.wgsl", &content);
|
||||||
|
|
||||||
|
auto t = l.next();
|
||||||
|
ASSERT_TRUE(t.Is(Token::Type::kError));
|
||||||
|
EXPECT_EQ(t.to_str(),
|
||||||
|
"integer literal (0x10000000...) has too many digits");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct HexUnsignedIntData {
|
struct HexUnsignedIntData {
|
||||||
const char* input;
|
const char* input;
|
||||||
uint32_t result;
|
uint32_t result;
|
||||||
|
@ -287,13 +308,13 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
HexUnsignedIntData{"0xFFFFFFFFu",
|
HexUnsignedIntData{"0xFFFFFFFFu",
|
||||||
std::numeric_limits<uint32_t>::max()}));
|
std::numeric_limits<uint32_t>::max()}));
|
||||||
|
|
||||||
TEST_F(LexerTest, IntegerTest_HexUnsignedTooLarge) {
|
TEST_F(LexerTest, IntegerTest_HexUnsignedTooManyDigits) {
|
||||||
Source::FileContent content("0xffffffffffu");
|
Source::FileContent content("0x1000000000000000000000u");
|
||||||
Lexer l("test.wgsl", &content);
|
Lexer l("test.wgsl", &content);
|
||||||
|
|
||||||
auto t = l.next();
|
auto t = l.next();
|
||||||
ASSERT_TRUE(t.Is(Token::Type::kError));
|
ASSERT_TRUE(t.Is(Token::Type::kError));
|
||||||
EXPECT_EQ(t.to_str(), "u32 (0xffffffffff) too large");
|
EXPECT_EQ(t.to_str(), "integer literal (0x10000000...) has too many digits");
|
||||||
}
|
}
|
||||||
|
|
||||||
struct UnsignedIntData {
|
struct UnsignedIntData {
|
||||||
|
@ -325,6 +346,15 @@ INSTANTIATE_TEST_SUITE_P(LexerTest,
|
||||||
UnsignedIntData{"4294967295u",
|
UnsignedIntData{"4294967295u",
|
||||||
4294967295u}));
|
4294967295u}));
|
||||||
|
|
||||||
|
TEST_F(LexerTest, IntegerTest_UnsignedTooManyDigits) {
|
||||||
|
Source::FileContent content("10000000000000000000000u");
|
||||||
|
Lexer l("test.wgsl", &content);
|
||||||
|
|
||||||
|
auto t = l.next();
|
||||||
|
ASSERT_TRUE(t.Is(Token::Type::kError));
|
||||||
|
EXPECT_EQ(t.to_str(), "integer literal (1000000000...) has too many digits");
|
||||||
|
}
|
||||||
|
|
||||||
struct SignedIntData {
|
struct SignedIntData {
|
||||||
const char* input;
|
const char* input;
|
||||||
int32_t result;
|
int32_t result;
|
||||||
|
@ -357,6 +387,15 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
SignedIntData{"2147483647", 2147483647},
|
SignedIntData{"2147483647", 2147483647},
|
||||||
SignedIntData{"-2147483648", -2147483648LL}));
|
SignedIntData{"-2147483648", -2147483648LL}));
|
||||||
|
|
||||||
|
TEST_F(LexerTest, IntegerTest_SignedTooManyDigits) {
|
||||||
|
Source::FileContent content("-10000000000000000");
|
||||||
|
Lexer l("test.wgsl", &content);
|
||||||
|
|
||||||
|
auto t = l.next();
|
||||||
|
ASSERT_TRUE(t.Is(Token::Type::kError));
|
||||||
|
EXPECT_EQ(t.to_str(), "integer literal (-1000000000...) has too many digits");
|
||||||
|
}
|
||||||
|
|
||||||
using IntegerTest_Invalid = testing::TestWithParam<const char*>;
|
using IntegerTest_Invalid = testing::TestWithParam<const char*>;
|
||||||
TEST_P(IntegerTest_Invalid, Parses) {
|
TEST_P(IntegerTest_Invalid, Parses) {
|
||||||
Source::FileContent content(GetParam());
|
Source::FileContent content(GetParam());
|
||||||
|
|
Loading…
Reference in New Issue