Optimize the lexer match method.

This CL updates match to compare against characters instead of strings.
If the prefix character doesn't match we stop checking for any
punctuation which starts with that character.

Change-Id: Ifddc1ff3f3936ab3a53c37e080394ed35dc2aecf
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/97065
Auto-Submit: Dan Sinclair <dsinclair@chromium.org>
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
dan sinclair 2022-07-25 14:54:18 +00:00 committed by Dawn LUCI CQ
parent 08482ec7ed
commit 833ccab384
2 changed files with 158 additions and 126 deletions

View File

@ -204,6 +204,13 @@ bool Lexer::matches(size_t pos, std::string_view sub_string) {
return substr(pos, sub_string.size()) == sub_string; return substr(pos, sub_string.size()) == sub_string;
} }
bool Lexer::matches(size_t pos, char ch) {
if (pos >= length()) {
return false;
}
return line()[pos] == ch;
}
Token Lexer::skip_blankspace_and_comments() { Token Lexer::skip_blankspace_and_comments() {
for (;;) { for (;;) {
auto loc = location_; auto loc = location_;
@ -298,7 +305,7 @@ Token Lexer::try_float() {
auto source = begin_source(); auto source = begin_source();
bool has_mantissa_digits = false; bool has_mantissa_digits = false;
if (matches(end, "-")) { if (matches(end, '-')) {
end++; end++;
} }
while (end < length() && is_digit(at(end))) { while (end < length() && is_digit(at(end))) {
@ -307,7 +314,7 @@ Token Lexer::try_float() {
} }
bool has_point = false; bool has_point = false;
if (end < length() && matches(end, ".")) { if (end < length() && matches(end, '.')) {
has_point = true; has_point = true;
end++; end++;
} }
@ -323,9 +330,9 @@ Token Lexer::try_float() {
// Parse the exponent if one exists // Parse the exponent if one exists
bool has_exponent = false; bool has_exponent = false;
if (end < length() && (matches(end, "e") || matches(end, "E"))) { if (end < length() && (matches(end, 'e') || matches(end, 'E'))) {
end++; end++;
if (end < length() && (matches(end, "+") || matches(end, "-"))) { if (end < length() && (matches(end, '+') || matches(end, '-'))) {
end++; end++;
} }
@ -344,10 +351,10 @@ Token Lexer::try_float() {
bool has_f_suffix = false; bool has_f_suffix = false;
bool has_h_suffix = false; bool has_h_suffix = false;
if (end < length() && matches(end, "f")) { if (end < length() && matches(end, 'f')) {
end++; end++;
has_f_suffix = true; has_f_suffix = true;
} else if (end < length() && matches(end, "h")) { } else if (end < length() && matches(end, 'h')) {
end++; end++;
has_h_suffix = true; has_h_suffix = true;
} }
@ -410,12 +417,12 @@ Token Lexer::try_hex_float() {
// -? // -?
uint64_t sign_bit = 0; uint64_t sign_bit = 0;
if (matches(end, "-")) { if (matches(end, '-')) {
sign_bit = 1; sign_bit = 1;
end++; end++;
} }
// 0[xX] // 0[xX]
if (matches(end, "0x") || matches(end, "0X")) { if (matches(end, '0') && (matches(end + 1, 'x') || matches(end + 1, 'X'))) {
end += 2; end += 2;
} else { } else {
return {}; return {};
@ -461,7 +468,7 @@ Token Lexer::try_hex_float() {
// .? // .?
bool hex_point = false; bool hex_point = false;
if (matches(end, ".")) { if (matches(end, '.')) {
hex_point = true; hex_point = true;
end++; end++;
} }
@ -479,7 +486,7 @@ Token Lexer::try_hex_float() {
} }
// Is the binary exponent present? It's optional. // Is the binary exponent present? It's optional.
const bool has_exponent = (matches(end, "p") || matches(end, "P")); const bool has_exponent = (matches(end, 'p') || matches(end, 'P'));
if (has_exponent) { if (has_exponent) {
end++; end++;
} }
@ -560,9 +567,9 @@ Token Lexer::try_hex_float() {
if (has_exponent) { if (has_exponent) {
// Parse the rest of the exponent. // Parse the rest of the exponent.
// (+|-)? // (+|-)?
if (matches(end, "+")) { if (matches(end, '+')) {
end++; end++;
} else if (matches(end, "-")) { } else if (matches(end, '-')) {
exponent_sign = -1; exponent_sign = -1;
end++; end++;
} }
@ -587,10 +594,10 @@ Token Lexer::try_hex_float() {
// Parse optional 'f' or 'h' suffix. For a hex float, it can only exist // Parse optional 'f' or 'h' suffix. For a hex float, it can only exist
// when the exponent is present. Otherwise it will look like // when the exponent is present. Otherwise it will look like
// one of the mantissa digits. // one of the mantissa digits.
if (end < length() && matches(end, "f")) { if (end < length() && matches(end, 'f')) {
has_f_suffix = true; has_f_suffix = true;
end++; end++;
} else if (end < length() && matches(end, "h")) { } else if (end < length() && matches(end, 'h')) {
has_h_suffix = true; has_h_suffix = true;
end++; end++;
} }
@ -794,7 +801,7 @@ Token Lexer::build_token_from_int_if_possible(Source source, size_t start, int32
advance(static_cast<size_t>(end_ptr - start_ptr)); advance(static_cast<size_t>(end_ptr - start_ptr));
} }
if (matches(pos(), "u")) { if (matches(pos(), 'u')) {
if (!overflow && CheckedConvert<u32>(AInt(res))) { if (!overflow && CheckedConvert<u32>(AInt(res))) {
advance(1); advance(1);
end_source(source); end_source(source);
@ -803,7 +810,7 @@ Token Lexer::build_token_from_int_if_possible(Source source, size_t start, int32
return {Token::Type::kError, source, "value cannot be represented as 'u32'"}; return {Token::Type::kError, source, "value cannot be represented as 'u32'"};
} }
if (matches(pos(), "i")) { if (matches(pos(), 'i')) {
if (!overflow && CheckedConvert<i32>(AInt(res))) { if (!overflow && CheckedConvert<i32>(AInt(res))) {
advance(1); advance(1);
end_source(source); end_source(source);
@ -825,11 +832,11 @@ Token Lexer::try_hex_integer() {
auto source = begin_source(); auto source = begin_source();
if (matches(curr, "-")) { if (matches(curr, '-')) {
curr++; curr++;
} }
if (matches(curr, "0x") || matches(curr, "0X")) { if (matches(curr, '0') && (matches(curr + 1, 'x') || matches(curr + 1, 'X'))) {
curr += 2; curr += 2;
} else { } else {
return {}; return {};
@ -849,7 +856,7 @@ Token Lexer::try_integer() {
auto source = begin_source(); auto source = begin_source();
if (matches(curr, "-")) { if (matches(curr, '-')) {
curr++; curr++;
} }
@ -927,138 +934,162 @@ Token Lexer::try_punctuation() {
auto source = begin_source(); auto source = begin_source();
auto type = Token::Type::kUninitialized; auto type = Token::Type::kUninitialized;
if (matches(pos(), "@")) { if (matches(pos(), '@')) {
type = Token::Type::kAttr; type = Token::Type::kAttr;
advance(1); advance(1);
} else if (matches(pos(), "(")) { } else if (matches(pos(), '(')) {
type = Token::Type::kParenLeft; type = Token::Type::kParenLeft;
advance(1); advance(1);
} else if (matches(pos(), ")")) { } else if (matches(pos(), ')')) {
type = Token::Type::kParenRight; type = Token::Type::kParenRight;
advance(1); advance(1);
} else if (matches(pos(), "[")) { } else if (matches(pos(), '[')) {
type = Token::Type::kBracketLeft; type = Token::Type::kBracketLeft;
advance(1); advance(1);
} else if (matches(pos(), "]")) { } else if (matches(pos(), ']')) {
type = Token::Type::kBracketRight; type = Token::Type::kBracketRight;
advance(1); advance(1);
} else if (matches(pos(), "{")) { } else if (matches(pos(), '{')) {
type = Token::Type::kBraceLeft; type = Token::Type::kBraceLeft;
advance(1); advance(1);
} else if (matches(pos(), "}")) { } else if (matches(pos(), '}')) {
type = Token::Type::kBraceRight; type = Token::Type::kBraceRight;
advance(1); advance(1);
} else if (matches(pos(), "&&")) { } else if (matches(pos(), '&')) {
type = Token::Type::kAndAnd; if (matches(pos() + 1, '&')) {
advance(2); type = Token::Type::kAndAnd;
} else if (matches(pos(), "&=")) { advance(2);
type = Token::Type::kAndEqual; } else if (matches(pos() + 1, '=')) {
advance(2); type = Token::Type::kAndEqual;
} else if (matches(pos(), "&")) { advance(2);
type = Token::Type::kAnd; } else {
advance(1); type = Token::Type::kAnd;
} else if (matches(pos(), "/=")) { advance(1);
type = Token::Type::kDivisionEqual; }
advance(2); } else if (matches(pos(), '/')) {
} else if (matches(pos(), "/")) { if (matches(pos() + 1, '=')) {
type = Token::Type::kForwardSlash; type = Token::Type::kDivisionEqual;
advance(1); advance(2);
} else if (matches(pos(), "!=")) { } else {
type = Token::Type::kNotEqual; type = Token::Type::kForwardSlash;
advance(2); advance(1);
} else if (matches(pos(), "!")) { }
type = Token::Type::kBang; } else if (matches(pos(), '!')) {
advance(1); if (matches(pos() + 1, '=')) {
} else if (matches(pos(), ":")) { type = Token::Type::kNotEqual;
advance(2);
} else {
type = Token::Type::kBang;
advance(1);
}
} else if (matches(pos(), ':')) {
type = Token::Type::kColon; type = Token::Type::kColon;
advance(1); advance(1);
} else if (matches(pos(), ",")) { } else if (matches(pos(), ',')) {
type = Token::Type::kComma; type = Token::Type::kComma;
advance(1); advance(1);
} else if (matches(pos(), "==")) { } else if (matches(pos(), '=')) {
type = Token::Type::kEqualEqual; if (matches(pos() + 1, '=')) {
advance(2); type = Token::Type::kEqualEqual;
} else if (matches(pos(), "=")) { advance(2);
type = Token::Type::kEqual; } else {
advance(1); type = Token::Type::kEqual;
} else if (matches(pos(), ">=")) { advance(1);
type = Token::Type::kGreaterThanEqual; }
advance(2); } else if (matches(pos(), '>')) {
} else if (matches(pos(), ">>")) { if (matches(pos() + 1, '=')) {
type = Token::Type::kShiftRight; type = Token::Type::kGreaterThanEqual;
advance(2); advance(2);
} else if (matches(pos(), ">")) { } else if (matches(pos() + 1, '>')) {
type = Token::Type::kGreaterThan; type = Token::Type::kShiftRight;
advance(1); advance(2);
} else if (matches(pos(), "<=")) { } else {
type = Token::Type::kLessThanEqual; type = Token::Type::kGreaterThan;
advance(2); advance(1);
} else if (matches(pos(), "<<")) { }
type = Token::Type::kShiftLeft; } else if (matches(pos(), '<')) {
advance(2); if (matches(pos() + 1, '=')) {
} else if (matches(pos(), "<")) { type = Token::Type::kLessThanEqual;
type = Token::Type::kLessThan; advance(2);
advance(1); } else if (matches(pos() + 1, '<')) {
} else if (matches(pos(), "%=")) { type = Token::Type::kShiftLeft;
type = Token::Type::kModuloEqual; advance(2);
advance(2); } else {
} else if (matches(pos(), "%")) { type = Token::Type::kLessThan;
type = Token::Type::kMod; advance(1);
advance(1); }
} else if (matches(pos(), "->")) { } else if (matches(pos(), '%')) {
type = Token::Type::kArrow; if (matches(pos() + 1, '=')) {
advance(2); type = Token::Type::kModuloEqual;
} else if (matches(pos(), "--")) { advance(2);
type = Token::Type::kMinusMinus; } else {
advance(2); type = Token::Type::kMod;
} else if (matches(pos(), "-=")) { advance(1);
type = Token::Type::kMinusEqual; }
advance(2); } else if (matches(pos(), '-')) {
} else if (matches(pos(), "-")) { if (matches(pos() + 1, '>')) {
type = Token::Type::kMinus; type = Token::Type::kArrow;
advance(1); advance(2);
} else if (matches(pos(), ".")) { } else if (matches(pos() + 1, '-')) {
type = Token::Type::kMinusMinus;
advance(2);
} else if (matches(pos() + 1, '=')) {
type = Token::Type::kMinusEqual;
advance(2);
} else {
type = Token::Type::kMinus;
advance(1);
}
} else if (matches(pos(), '.')) {
type = Token::Type::kPeriod; type = Token::Type::kPeriod;
advance(1); advance(1);
} else if (matches(pos(), "++")) { } else if (matches(pos(), '+')) {
type = Token::Type::kPlusPlus; if (matches(pos() + 1, '+')) {
advance(2); type = Token::Type::kPlusPlus;
} else if (matches(pos(), "+=")) { advance(2);
type = Token::Type::kPlusEqual; } else if (matches(pos() + 1, '=')) {
advance(2); type = Token::Type::kPlusEqual;
} else if (matches(pos(), "+")) { advance(2);
type = Token::Type::kPlus; } else {
advance(1); type = Token::Type::kPlus;
} else if (matches(pos(), "||")) { advance(1);
type = Token::Type::kOrOr; }
advance(2); } else if (matches(pos(), '|')) {
} else if (matches(pos(), "|=")) { if (matches(pos() + 1, '|')) {
type = Token::Type::kOrEqual; type = Token::Type::kOrOr;
advance(2); advance(2);
} else if (matches(pos(), "|")) { } else if (matches(pos() + 1, '=')) {
type = Token::Type::kOr; type = Token::Type::kOrEqual;
advance(1); advance(2);
} else if (matches(pos(), ";")) { } else {
type = Token::Type::kOr;
advance(1);
}
} else if (matches(pos(), ';')) {
type = Token::Type::kSemicolon; type = Token::Type::kSemicolon;
advance(1); advance(1);
} else if (matches(pos(), "*=")) { } else if (matches(pos(), '*')) {
type = Token::Type::kTimesEqual; if (matches(pos() + 1, '=')) {
advance(2); type = Token::Type::kTimesEqual;
} else if (matches(pos(), "*")) { advance(2);
type = Token::Type::kStar; } else {
advance(1); type = Token::Type::kStar;
} else if (matches(pos(), "~")) { advance(1);
}
} else if (matches(pos(), '~')) {
type = Token::Type::kTilde; type = Token::Type::kTilde;
advance(1); advance(1);
} else if (matches(pos(), "_")) { } else if (matches(pos(), '_')) {
type = Token::Type::kUnderscore; type = Token::Type::kUnderscore;
advance(1); advance(1);
} else if (matches(pos(), "^=")) { } else if (matches(pos(), '^')) {
type = Token::Type::kXorEqual; if (matches(pos() + 1, '=')) {
advance(2); type = Token::Type::kXorEqual;
} else if (matches(pos(), "^")) { advance(2);
type = Token::Type::kXor; } else {
advance(1); type = Token::Type::kXor;
advance(1);
}
} }
end_source(source); end_source(source);

View File

@ -96,7 +96,8 @@ class Lexer {
bool is_hex(char ch) const; bool is_hex(char ch) const;
/// @returns true if string at `pos` matches `substr` /// @returns true if string at `pos` matches `substr`
bool matches(size_t pos, std::string_view substr); bool matches(size_t pos, std::string_view substr);
/// @returns true if char at `pos` matches `ch`
bool matches(size_t pos, char ch);
/// The source file content /// The source file content
Source::File const* const file_; Source::File const* const file_;
/// The current location within the input /// The current location within the input