mirror of
https://github.com/encounter/dawn-cmake.git
synced 2025-05-16 20:31:20 +00:00
Optimize the lexer match method.
This CL updates match to compare against characters instead of strings. If the prefix character doesn't match we stop checking for any punctuation which starts with that character. Change-Id: Ifddc1ff3f3936ab3a53c37e080394ed35dc2aecf Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/97065 Auto-Submit: Dan Sinclair <dsinclair@chromium.org> Reviewed-by: Ben Clayton <bclayton@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
parent
08482ec7ed
commit
833ccab384
@ -204,6 +204,13 @@ bool Lexer::matches(size_t pos, std::string_view sub_string) {
|
|||||||
return substr(pos, sub_string.size()) == sub_string;
|
return substr(pos, sub_string.size()) == sub_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Lexer::matches(size_t pos, char ch) {
|
||||||
|
if (pos >= length()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return line()[pos] == ch;
|
||||||
|
}
|
||||||
|
|
||||||
Token Lexer::skip_blankspace_and_comments() {
|
Token Lexer::skip_blankspace_and_comments() {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
auto loc = location_;
|
auto loc = location_;
|
||||||
@ -298,7 +305,7 @@ Token Lexer::try_float() {
|
|||||||
auto source = begin_source();
|
auto source = begin_source();
|
||||||
bool has_mantissa_digits = false;
|
bool has_mantissa_digits = false;
|
||||||
|
|
||||||
if (matches(end, "-")) {
|
if (matches(end, '-')) {
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
while (end < length() && is_digit(at(end))) {
|
while (end < length() && is_digit(at(end))) {
|
||||||
@ -307,7 +314,7 @@ Token Lexer::try_float() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool has_point = false;
|
bool has_point = false;
|
||||||
if (end < length() && matches(end, ".")) {
|
if (end < length() && matches(end, '.')) {
|
||||||
has_point = true;
|
has_point = true;
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
@ -323,9 +330,9 @@ Token Lexer::try_float() {
|
|||||||
|
|
||||||
// Parse the exponent if one exists
|
// Parse the exponent if one exists
|
||||||
bool has_exponent = false;
|
bool has_exponent = false;
|
||||||
if (end < length() && (matches(end, "e") || matches(end, "E"))) {
|
if (end < length() && (matches(end, 'e') || matches(end, 'E'))) {
|
||||||
end++;
|
end++;
|
||||||
if (end < length() && (matches(end, "+") || matches(end, "-"))) {
|
if (end < length() && (matches(end, '+') || matches(end, '-'))) {
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -344,10 +351,10 @@ Token Lexer::try_float() {
|
|||||||
|
|
||||||
bool has_f_suffix = false;
|
bool has_f_suffix = false;
|
||||||
bool has_h_suffix = false;
|
bool has_h_suffix = false;
|
||||||
if (end < length() && matches(end, "f")) {
|
if (end < length() && matches(end, 'f')) {
|
||||||
end++;
|
end++;
|
||||||
has_f_suffix = true;
|
has_f_suffix = true;
|
||||||
} else if (end < length() && matches(end, "h")) {
|
} else if (end < length() && matches(end, 'h')) {
|
||||||
end++;
|
end++;
|
||||||
has_h_suffix = true;
|
has_h_suffix = true;
|
||||||
}
|
}
|
||||||
@ -410,12 +417,12 @@ Token Lexer::try_hex_float() {
|
|||||||
|
|
||||||
// -?
|
// -?
|
||||||
uint64_t sign_bit = 0;
|
uint64_t sign_bit = 0;
|
||||||
if (matches(end, "-")) {
|
if (matches(end, '-')) {
|
||||||
sign_bit = 1;
|
sign_bit = 1;
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
// 0[xX]
|
// 0[xX]
|
||||||
if (matches(end, "0x") || matches(end, "0X")) {
|
if (matches(end, '0') && (matches(end + 1, 'x') || matches(end + 1, 'X'))) {
|
||||||
end += 2;
|
end += 2;
|
||||||
} else {
|
} else {
|
||||||
return {};
|
return {};
|
||||||
@ -461,7 +468,7 @@ Token Lexer::try_hex_float() {
|
|||||||
|
|
||||||
// .?
|
// .?
|
||||||
bool hex_point = false;
|
bool hex_point = false;
|
||||||
if (matches(end, ".")) {
|
if (matches(end, '.')) {
|
||||||
hex_point = true;
|
hex_point = true;
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
@ -479,7 +486,7 @@ Token Lexer::try_hex_float() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Is the binary exponent present? It's optional.
|
// Is the binary exponent present? It's optional.
|
||||||
const bool has_exponent = (matches(end, "p") || matches(end, "P"));
|
const bool has_exponent = (matches(end, 'p') || matches(end, 'P'));
|
||||||
if (has_exponent) {
|
if (has_exponent) {
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
@ -560,9 +567,9 @@ Token Lexer::try_hex_float() {
|
|||||||
if (has_exponent) {
|
if (has_exponent) {
|
||||||
// Parse the rest of the exponent.
|
// Parse the rest of the exponent.
|
||||||
// (+|-)?
|
// (+|-)?
|
||||||
if (matches(end, "+")) {
|
if (matches(end, '+')) {
|
||||||
end++;
|
end++;
|
||||||
} else if (matches(end, "-")) {
|
} else if (matches(end, '-')) {
|
||||||
exponent_sign = -1;
|
exponent_sign = -1;
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
@ -587,10 +594,10 @@ Token Lexer::try_hex_float() {
|
|||||||
// Parse optional 'f' or 'h' suffix. For a hex float, it can only exist
|
// Parse optional 'f' or 'h' suffix. For a hex float, it can only exist
|
||||||
// when the exponent is present. Otherwise it will look like
|
// when the exponent is present. Otherwise it will look like
|
||||||
// one of the mantissa digits.
|
// one of the mantissa digits.
|
||||||
if (end < length() && matches(end, "f")) {
|
if (end < length() && matches(end, 'f')) {
|
||||||
has_f_suffix = true;
|
has_f_suffix = true;
|
||||||
end++;
|
end++;
|
||||||
} else if (end < length() && matches(end, "h")) {
|
} else if (end < length() && matches(end, 'h')) {
|
||||||
has_h_suffix = true;
|
has_h_suffix = true;
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
@ -794,7 +801,7 @@ Token Lexer::build_token_from_int_if_possible(Source source, size_t start, int32
|
|||||||
advance(static_cast<size_t>(end_ptr - start_ptr));
|
advance(static_cast<size_t>(end_ptr - start_ptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matches(pos(), "u")) {
|
if (matches(pos(), 'u')) {
|
||||||
if (!overflow && CheckedConvert<u32>(AInt(res))) {
|
if (!overflow && CheckedConvert<u32>(AInt(res))) {
|
||||||
advance(1);
|
advance(1);
|
||||||
end_source(source);
|
end_source(source);
|
||||||
@ -803,7 +810,7 @@ Token Lexer::build_token_from_int_if_possible(Source source, size_t start, int32
|
|||||||
return {Token::Type::kError, source, "value cannot be represented as 'u32'"};
|
return {Token::Type::kError, source, "value cannot be represented as 'u32'"};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matches(pos(), "i")) {
|
if (matches(pos(), 'i')) {
|
||||||
if (!overflow && CheckedConvert<i32>(AInt(res))) {
|
if (!overflow && CheckedConvert<i32>(AInt(res))) {
|
||||||
advance(1);
|
advance(1);
|
||||||
end_source(source);
|
end_source(source);
|
||||||
@ -825,11 +832,11 @@ Token Lexer::try_hex_integer() {
|
|||||||
|
|
||||||
auto source = begin_source();
|
auto source = begin_source();
|
||||||
|
|
||||||
if (matches(curr, "-")) {
|
if (matches(curr, '-')) {
|
||||||
curr++;
|
curr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matches(curr, "0x") || matches(curr, "0X")) {
|
if (matches(curr, '0') && (matches(curr + 1, 'x') || matches(curr + 1, 'X'))) {
|
||||||
curr += 2;
|
curr += 2;
|
||||||
} else {
|
} else {
|
||||||
return {};
|
return {};
|
||||||
@ -849,7 +856,7 @@ Token Lexer::try_integer() {
|
|||||||
|
|
||||||
auto source = begin_source();
|
auto source = begin_source();
|
||||||
|
|
||||||
if (matches(curr, "-")) {
|
if (matches(curr, '-')) {
|
||||||
curr++;
|
curr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -927,138 +934,162 @@ Token Lexer::try_punctuation() {
|
|||||||
auto source = begin_source();
|
auto source = begin_source();
|
||||||
auto type = Token::Type::kUninitialized;
|
auto type = Token::Type::kUninitialized;
|
||||||
|
|
||||||
if (matches(pos(), "@")) {
|
if (matches(pos(), '@')) {
|
||||||
type = Token::Type::kAttr;
|
type = Token::Type::kAttr;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "(")) {
|
} else if (matches(pos(), '(')) {
|
||||||
type = Token::Type::kParenLeft;
|
type = Token::Type::kParenLeft;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), ")")) {
|
} else if (matches(pos(), ')')) {
|
||||||
type = Token::Type::kParenRight;
|
type = Token::Type::kParenRight;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "[")) {
|
} else if (matches(pos(), '[')) {
|
||||||
type = Token::Type::kBracketLeft;
|
type = Token::Type::kBracketLeft;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "]")) {
|
} else if (matches(pos(), ']')) {
|
||||||
type = Token::Type::kBracketRight;
|
type = Token::Type::kBracketRight;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "{")) {
|
} else if (matches(pos(), '{')) {
|
||||||
type = Token::Type::kBraceLeft;
|
type = Token::Type::kBraceLeft;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "}")) {
|
} else if (matches(pos(), '}')) {
|
||||||
type = Token::Type::kBraceRight;
|
type = Token::Type::kBraceRight;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "&&")) {
|
} else if (matches(pos(), '&')) {
|
||||||
type = Token::Type::kAndAnd;
|
if (matches(pos() + 1, '&')) {
|
||||||
advance(2);
|
type = Token::Type::kAndAnd;
|
||||||
} else if (matches(pos(), "&=")) {
|
advance(2);
|
||||||
type = Token::Type::kAndEqual;
|
} else if (matches(pos() + 1, '=')) {
|
||||||
advance(2);
|
type = Token::Type::kAndEqual;
|
||||||
} else if (matches(pos(), "&")) {
|
advance(2);
|
||||||
type = Token::Type::kAnd;
|
} else {
|
||||||
advance(1);
|
type = Token::Type::kAnd;
|
||||||
} else if (matches(pos(), "/=")) {
|
advance(1);
|
||||||
type = Token::Type::kDivisionEqual;
|
}
|
||||||
advance(2);
|
} else if (matches(pos(), '/')) {
|
||||||
} else if (matches(pos(), "/")) {
|
if (matches(pos() + 1, '=')) {
|
||||||
type = Token::Type::kForwardSlash;
|
type = Token::Type::kDivisionEqual;
|
||||||
advance(1);
|
advance(2);
|
||||||
} else if (matches(pos(), "!=")) {
|
} else {
|
||||||
type = Token::Type::kNotEqual;
|
type = Token::Type::kForwardSlash;
|
||||||
advance(2);
|
advance(1);
|
||||||
} else if (matches(pos(), "!")) {
|
}
|
||||||
type = Token::Type::kBang;
|
} else if (matches(pos(), '!')) {
|
||||||
advance(1);
|
if (matches(pos() + 1, '=')) {
|
||||||
} else if (matches(pos(), ":")) {
|
type = Token::Type::kNotEqual;
|
||||||
|
advance(2);
|
||||||
|
} else {
|
||||||
|
type = Token::Type::kBang;
|
||||||
|
advance(1);
|
||||||
|
}
|
||||||
|
} else if (matches(pos(), ':')) {
|
||||||
type = Token::Type::kColon;
|
type = Token::Type::kColon;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), ",")) {
|
} else if (matches(pos(), ',')) {
|
||||||
type = Token::Type::kComma;
|
type = Token::Type::kComma;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "==")) {
|
} else if (matches(pos(), '=')) {
|
||||||
type = Token::Type::kEqualEqual;
|
if (matches(pos() + 1, '=')) {
|
||||||
advance(2);
|
type = Token::Type::kEqualEqual;
|
||||||
} else if (matches(pos(), "=")) {
|
advance(2);
|
||||||
type = Token::Type::kEqual;
|
} else {
|
||||||
advance(1);
|
type = Token::Type::kEqual;
|
||||||
} else if (matches(pos(), ">=")) {
|
advance(1);
|
||||||
type = Token::Type::kGreaterThanEqual;
|
}
|
||||||
advance(2);
|
} else if (matches(pos(), '>')) {
|
||||||
} else if (matches(pos(), ">>")) {
|
if (matches(pos() + 1, '=')) {
|
||||||
type = Token::Type::kShiftRight;
|
type = Token::Type::kGreaterThanEqual;
|
||||||
advance(2);
|
advance(2);
|
||||||
} else if (matches(pos(), ">")) {
|
} else if (matches(pos() + 1, '>')) {
|
||||||
type = Token::Type::kGreaterThan;
|
type = Token::Type::kShiftRight;
|
||||||
advance(1);
|
advance(2);
|
||||||
} else if (matches(pos(), "<=")) {
|
} else {
|
||||||
type = Token::Type::kLessThanEqual;
|
type = Token::Type::kGreaterThan;
|
||||||
advance(2);
|
advance(1);
|
||||||
} else if (matches(pos(), "<<")) {
|
}
|
||||||
type = Token::Type::kShiftLeft;
|
} else if (matches(pos(), '<')) {
|
||||||
advance(2);
|
if (matches(pos() + 1, '=')) {
|
||||||
} else if (matches(pos(), "<")) {
|
type = Token::Type::kLessThanEqual;
|
||||||
type = Token::Type::kLessThan;
|
advance(2);
|
||||||
advance(1);
|
} else if (matches(pos() + 1, '<')) {
|
||||||
} else if (matches(pos(), "%=")) {
|
type = Token::Type::kShiftLeft;
|
||||||
type = Token::Type::kModuloEqual;
|
advance(2);
|
||||||
advance(2);
|
} else {
|
||||||
} else if (matches(pos(), "%")) {
|
type = Token::Type::kLessThan;
|
||||||
type = Token::Type::kMod;
|
advance(1);
|
||||||
advance(1);
|
}
|
||||||
} else if (matches(pos(), "->")) {
|
} else if (matches(pos(), '%')) {
|
||||||
type = Token::Type::kArrow;
|
if (matches(pos() + 1, '=')) {
|
||||||
advance(2);
|
type = Token::Type::kModuloEqual;
|
||||||
} else if (matches(pos(), "--")) {
|
advance(2);
|
||||||
type = Token::Type::kMinusMinus;
|
} else {
|
||||||
advance(2);
|
type = Token::Type::kMod;
|
||||||
} else if (matches(pos(), "-=")) {
|
advance(1);
|
||||||
type = Token::Type::kMinusEqual;
|
}
|
||||||
advance(2);
|
} else if (matches(pos(), '-')) {
|
||||||
} else if (matches(pos(), "-")) {
|
if (matches(pos() + 1, '>')) {
|
||||||
type = Token::Type::kMinus;
|
type = Token::Type::kArrow;
|
||||||
advance(1);
|
advance(2);
|
||||||
} else if (matches(pos(), ".")) {
|
} else if (matches(pos() + 1, '-')) {
|
||||||
|
type = Token::Type::kMinusMinus;
|
||||||
|
advance(2);
|
||||||
|
} else if (matches(pos() + 1, '=')) {
|
||||||
|
type = Token::Type::kMinusEqual;
|
||||||
|
advance(2);
|
||||||
|
} else {
|
||||||
|
type = Token::Type::kMinus;
|
||||||
|
advance(1);
|
||||||
|
}
|
||||||
|
} else if (matches(pos(), '.')) {
|
||||||
type = Token::Type::kPeriod;
|
type = Token::Type::kPeriod;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "++")) {
|
} else if (matches(pos(), '+')) {
|
||||||
type = Token::Type::kPlusPlus;
|
if (matches(pos() + 1, '+')) {
|
||||||
advance(2);
|
type = Token::Type::kPlusPlus;
|
||||||
} else if (matches(pos(), "+=")) {
|
advance(2);
|
||||||
type = Token::Type::kPlusEqual;
|
} else if (matches(pos() + 1, '=')) {
|
||||||
advance(2);
|
type = Token::Type::kPlusEqual;
|
||||||
} else if (matches(pos(), "+")) {
|
advance(2);
|
||||||
type = Token::Type::kPlus;
|
} else {
|
||||||
advance(1);
|
type = Token::Type::kPlus;
|
||||||
} else if (matches(pos(), "||")) {
|
advance(1);
|
||||||
type = Token::Type::kOrOr;
|
}
|
||||||
advance(2);
|
} else if (matches(pos(), '|')) {
|
||||||
} else if (matches(pos(), "|=")) {
|
if (matches(pos() + 1, '|')) {
|
||||||
type = Token::Type::kOrEqual;
|
type = Token::Type::kOrOr;
|
||||||
advance(2);
|
advance(2);
|
||||||
} else if (matches(pos(), "|")) {
|
} else if (matches(pos() + 1, '=')) {
|
||||||
type = Token::Type::kOr;
|
type = Token::Type::kOrEqual;
|
||||||
advance(1);
|
advance(2);
|
||||||
} else if (matches(pos(), ";")) {
|
} else {
|
||||||
|
type = Token::Type::kOr;
|
||||||
|
advance(1);
|
||||||
|
}
|
||||||
|
} else if (matches(pos(), ';')) {
|
||||||
type = Token::Type::kSemicolon;
|
type = Token::Type::kSemicolon;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "*=")) {
|
} else if (matches(pos(), '*')) {
|
||||||
type = Token::Type::kTimesEqual;
|
if (matches(pos() + 1, '=')) {
|
||||||
advance(2);
|
type = Token::Type::kTimesEqual;
|
||||||
} else if (matches(pos(), "*")) {
|
advance(2);
|
||||||
type = Token::Type::kStar;
|
} else {
|
||||||
advance(1);
|
type = Token::Type::kStar;
|
||||||
} else if (matches(pos(), "~")) {
|
advance(1);
|
||||||
|
}
|
||||||
|
} else if (matches(pos(), '~')) {
|
||||||
type = Token::Type::kTilde;
|
type = Token::Type::kTilde;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "_")) {
|
} else if (matches(pos(), '_')) {
|
||||||
type = Token::Type::kUnderscore;
|
type = Token::Type::kUnderscore;
|
||||||
advance(1);
|
advance(1);
|
||||||
} else if (matches(pos(), "^=")) {
|
} else if (matches(pos(), '^')) {
|
||||||
type = Token::Type::kXorEqual;
|
if (matches(pos() + 1, '=')) {
|
||||||
advance(2);
|
type = Token::Type::kXorEqual;
|
||||||
} else if (matches(pos(), "^")) {
|
advance(2);
|
||||||
type = Token::Type::kXor;
|
} else {
|
||||||
advance(1);
|
type = Token::Type::kXor;
|
||||||
|
advance(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
end_source(source);
|
end_source(source);
|
||||||
|
@ -96,7 +96,8 @@ class Lexer {
|
|||||||
bool is_hex(char ch) const;
|
bool is_hex(char ch) const;
|
||||||
/// @returns true if string at `pos` matches `substr`
|
/// @returns true if string at `pos` matches `substr`
|
||||||
bool matches(size_t pos, std::string_view substr);
|
bool matches(size_t pos, std::string_view substr);
|
||||||
|
/// @returns true if char at `pos` matches `ch`
|
||||||
|
bool matches(size_t pos, char ch);
|
||||||
/// The source file content
|
/// The source file content
|
||||||
Source::File const* const file_;
|
Source::File const* const file_;
|
||||||
/// The current location within the input
|
/// The current location within the input
|
||||||
|
Loading…
x
Reference in New Issue
Block a user