Optimize the lexer match method.

This CL updates match to compare against characters instead of strings. If the prefix character doesn't match we stop checking for any punctuation which starts with that character. Change-Id: Ifddc1ff3f3936ab3a53c37e080394ed35dc2aecf Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/97065 Auto-Submit: Dan Sinclair <dsinclair@chromium.org> Reviewed-by: Ben Clayton <bclayton@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
2025-12-18 17:35:30 +00:00 · 2022-07-25 14:54:18 +00:00
parent 08482ec7ed
commit 833ccab384
2 changed files with 158 additions and 126 deletions
--- a/src/tint/reader/wgsl/lexer.cc
+++ b/src/tint/reader/wgsl/lexer.cc
@@ -204,6 +204,13 @@ bool Lexer::matches(size_t pos, std::string_view sub_string) {
    return substr(pos, sub_string.size()) == sub_string;
 }

+bool Lexer::matches(size_t pos, char ch) {
+    if (pos >= length()) {
+        return false;
+    }
+    return line()[pos] == ch;
+}
+
 Token Lexer::skip_blankspace_and_comments() {
    for (;;) {
        auto loc = location_;
@@ -298,7 +305,7 @@ Token Lexer::try_float() {
    auto source = begin_source();
    bool has_mantissa_digits = false;

-    if (matches(end, "-")) {
+    if (matches(end, '-')) {
        end++;
    }
    while (end < length() && is_digit(at(end))) {
@@ -307,7 +314,7 @@ Token Lexer::try_float() {
    }

    bool has_point = false;
-    if (end < length() && matches(end, ".")) {
+    if (end < length() && matches(end, '.')) {
        has_point = true;
        end++;
    }
@@ -323,9 +330,9 @@ Token Lexer::try_float() {

    // Parse the exponent if one exists
    bool has_exponent = false;
-    if (end < length() && (matches(end, "e") || matches(end, "E"))) {
+    if (end < length() && (matches(end, 'e') || matches(end, 'E'))) {
        end++;
-        if (end < length() && (matches(end, "+") || matches(end, "-"))) {
+        if (end < length() && (matches(end, '+') || matches(end, '-'))) {
            end++;
        }

@@ -344,10 +351,10 @@ Token Lexer::try_float() {

    bool has_f_suffix = false;
    bool has_h_suffix = false;
-    if (end < length() && matches(end, "f")) {
+    if (end < length() && matches(end, 'f')) {
        end++;
        has_f_suffix = true;
-    } else if (end < length() && matches(end, "h")) {
+    } else if (end < length() && matches(end, 'h')) {
        end++;
        has_h_suffix = true;
    }
@@ -410,12 +417,12 @@ Token Lexer::try_hex_float() {

    // -?
    uint64_t sign_bit = 0;
-    if (matches(end, "-")) {
+    if (matches(end, '-')) {
        sign_bit = 1;
        end++;
    }
    // 0[xX]
-    if (matches(end, "0x") || matches(end, "0X")) {
+    if (matches(end, '0') && (matches(end + 1, 'x') || matches(end + 1, 'X'))) {
        end += 2;
    } else {
        return {};
@@ -461,7 +468,7 @@ Token Lexer::try_hex_float() {

    // .?
    bool hex_point = false;
-    if (matches(end, ".")) {
+    if (matches(end, '.')) {
        hex_point = true;
        end++;
    }
@@ -479,7 +486,7 @@ Token Lexer::try_hex_float() {
    }

    // Is the binary exponent present?  It's optional.
-    const bool has_exponent = (matches(end, "p") || matches(end, "P"));
+    const bool has_exponent = (matches(end, 'p') || matches(end, 'P'));
    if (has_exponent) {
        end++;
    }
@@ -560,9 +567,9 @@ Token Lexer::try_hex_float() {
    if (has_exponent) {
        // Parse the rest of the exponent.
        // (+|-)?
-        if (matches(end, "+")) {
+        if (matches(end, '+')) {
            end++;
-        } else if (matches(end, "-")) {
+        } else if (matches(end, '-')) {
            exponent_sign = -1;
            end++;
        }
@@ -587,10 +594,10 @@ Token Lexer::try_hex_float() {
        // Parse optional 'f' or 'h' suffix.  For a hex float, it can only exist
        // when the exponent is present. Otherwise it will look like
        // one of the mantissa digits.
-        if (end < length() && matches(end, "f")) {
+        if (end < length() && matches(end, 'f')) {
            has_f_suffix = true;
            end++;
-        } else if (end < length() && matches(end, "h")) {
+        } else if (end < length() && matches(end, 'h')) {
            has_h_suffix = true;
            end++;
        }
@@ -794,7 +801,7 @@ Token Lexer::build_token_from_int_if_possible(Source source, size_t start, int32
        advance(static_cast<size_t>(end_ptr - start_ptr));
    }

-    if (matches(pos(), "u")) {
+    if (matches(pos(), 'u')) {
        if (!overflow && CheckedConvert<u32>(AInt(res))) {
            advance(1);
            end_source(source);
@@ -803,7 +810,7 @@ Token Lexer::build_token_from_int_if_possible(Source source, size_t start, int32
        return {Token::Type::kError, source, "value cannot be represented as 'u32'"};
    }

-    if (matches(pos(), "i")) {
+    if (matches(pos(), 'i')) {
        if (!overflow && CheckedConvert<i32>(AInt(res))) {
            advance(1);
            end_source(source);
@@ -825,11 +832,11 @@ Token Lexer::try_hex_integer() {

    auto source = begin_source();

-    if (matches(curr, "-")) {
+    if (matches(curr, '-')) {
        curr++;
    }

-    if (matches(curr, "0x") || matches(curr, "0X")) {
+    if (matches(curr, '0') && (matches(curr + 1, 'x') || matches(curr + 1, 'X'))) {
        curr += 2;
    } else {
        return {};
@@ -849,7 +856,7 @@ Token Lexer::try_integer() {

    auto source = begin_source();

-    if (matches(curr, "-")) {
+    if (matches(curr, '-')) {
        curr++;
    }

@@ -927,138 +934,162 @@ Token Lexer::try_punctuation() {
    auto source = begin_source();
    auto type = Token::Type::kUninitialized;

-    if (matches(pos(), "@")) {
+    if (matches(pos(), '@')) {
        type = Token::Type::kAttr;
        advance(1);
-    } else if (matches(pos(), "(")) {
+    } else if (matches(pos(), '(')) {
        type = Token::Type::kParenLeft;
        advance(1);
-    } else if (matches(pos(), ")")) {
+    } else if (matches(pos(), ')')) {
        type = Token::Type::kParenRight;
        advance(1);
-    } else if (matches(pos(), "[")) {
+    } else if (matches(pos(), '[')) {
        type = Token::Type::kBracketLeft;
        advance(1);
-    } else if (matches(pos(), "]")) {
+    } else if (matches(pos(), ']')) {
        type = Token::Type::kBracketRight;
        advance(1);
-    } else if (matches(pos(), "{")) {
+    } else if (matches(pos(), '{')) {
        type = Token::Type::kBraceLeft;
        advance(1);
-    } else if (matches(pos(), "}")) {
+    } else if (matches(pos(), '}')) {
        type = Token::Type::kBraceRight;
        advance(1);
-    } else if (matches(pos(), "&&")) {
-        type = Token::Type::kAndAnd;
-        advance(2);
-    } else if (matches(pos(), "&=")) {
-        type = Token::Type::kAndEqual;
-        advance(2);
-    } else if (matches(pos(), "&")) {
-        type = Token::Type::kAnd;
-        advance(1);
-    } else if (matches(pos(), "/=")) {
-        type = Token::Type::kDivisionEqual;
-        advance(2);
-    } else if (matches(pos(), "/")) {
-        type = Token::Type::kForwardSlash;
-        advance(1);
-    } else if (matches(pos(), "!=")) {
-        type = Token::Type::kNotEqual;
-        advance(2);
-    } else if (matches(pos(), "!")) {
-        type = Token::Type::kBang;
-        advance(1);
-    } else if (matches(pos(), ":")) {
+    } else if (matches(pos(), '&')) {
+        if (matches(pos() + 1, '&')) {
+            type = Token::Type::kAndAnd;
+            advance(2);
+        } else if (matches(pos() + 1, '=')) {
+            type = Token::Type::kAndEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kAnd;
+            advance(1);
+        }
+    } else if (matches(pos(), '/')) {
+        if (matches(pos() + 1, '=')) {
+            type = Token::Type::kDivisionEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kForwardSlash;
+            advance(1);
+        }
+    } else if (matches(pos(), '!')) {
+        if (matches(pos() + 1, '=')) {
+            type = Token::Type::kNotEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kBang;
+            advance(1);
+        }
+    } else if (matches(pos(), ':')) {
        type = Token::Type::kColon;
        advance(1);
-    } else if (matches(pos(), ",")) {
+    } else if (matches(pos(), ',')) {
        type = Token::Type::kComma;
        advance(1);
-    } else if (matches(pos(), "==")) {
-        type = Token::Type::kEqualEqual;
-        advance(2);
-    } else if (matches(pos(), "=")) {
-        type = Token::Type::kEqual;
-        advance(1);
-    } else if (matches(pos(), ">=")) {
-        type = Token::Type::kGreaterThanEqual;
-        advance(2);
-    } else if (matches(pos(), ">>")) {
-        type = Token::Type::kShiftRight;
-        advance(2);
-    } else if (matches(pos(), ">")) {
-        type = Token::Type::kGreaterThan;
-        advance(1);
-    } else if (matches(pos(), "<=")) {
-        type = Token::Type::kLessThanEqual;
-        advance(2);
-    } else if (matches(pos(), "<<")) {
-        type = Token::Type::kShiftLeft;
-        advance(2);
-    } else if (matches(pos(), "<")) {
-        type = Token::Type::kLessThan;
-        advance(1);
-    } else if (matches(pos(), "%=")) {
-        type = Token::Type::kModuloEqual;
-        advance(2);
-    } else if (matches(pos(), "%")) {
-        type = Token::Type::kMod;
-        advance(1);
-    } else if (matches(pos(), "->")) {
-        type = Token::Type::kArrow;
-        advance(2);
-    } else if (matches(pos(), "--")) {
-        type = Token::Type::kMinusMinus;
-        advance(2);
-    } else if (matches(pos(), "-=")) {
-        type = Token::Type::kMinusEqual;
-        advance(2);
-    } else if (matches(pos(), "-")) {
-        type = Token::Type::kMinus;
-        advance(1);
-    } else if (matches(pos(), ".")) {
+    } else if (matches(pos(), '=')) {
+        if (matches(pos() + 1, '=')) {
+            type = Token::Type::kEqualEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kEqual;
+            advance(1);
+        }
+    } else if (matches(pos(), '>')) {
+        if (matches(pos() + 1, '=')) {
+            type = Token::Type::kGreaterThanEqual;
+            advance(2);
+        } else if (matches(pos() + 1, '>')) {
+            type = Token::Type::kShiftRight;
+            advance(2);
+        } else {
+            type = Token::Type::kGreaterThan;
+            advance(1);
+        }
+    } else if (matches(pos(), '<')) {
+        if (matches(pos() + 1, '=')) {
+            type = Token::Type::kLessThanEqual;
+            advance(2);
+        } else if (matches(pos() + 1, '<')) {
+            type = Token::Type::kShiftLeft;
+            advance(2);
+        } else {
+            type = Token::Type::kLessThan;
+            advance(1);
+        }
+    } else if (matches(pos(), '%')) {
+        if (matches(pos() + 1, '=')) {
+            type = Token::Type::kModuloEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kMod;
+            advance(1);
+        }
+    } else if (matches(pos(), '-')) {
+        if (matches(pos() + 1, '>')) {
+            type = Token::Type::kArrow;
+            advance(2);
+        } else if (matches(pos() + 1, '-')) {
+            type = Token::Type::kMinusMinus;
+            advance(2);
+        } else if (matches(pos() + 1, '=')) {
+            type = Token::Type::kMinusEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kMinus;
+            advance(1);
+        }
+    } else if (matches(pos(), '.')) {
        type = Token::Type::kPeriod;
        advance(1);
-    } else if (matches(pos(), "++")) {
-        type = Token::Type::kPlusPlus;
-        advance(2);
-    } else if (matches(pos(), "+=")) {
-        type = Token::Type::kPlusEqual;
-        advance(2);
-    } else if (matches(pos(), "+")) {
-        type = Token::Type::kPlus;
-        advance(1);
-    } else if (matches(pos(), "||")) {
-        type = Token::Type::kOrOr;
-        advance(2);
-    } else if (matches(pos(), "|=")) {
-        type = Token::Type::kOrEqual;
-        advance(2);
-    } else if (matches(pos(), "|")) {
-        type = Token::Type::kOr;
-        advance(1);
-    } else if (matches(pos(), ";")) {
+    } else if (matches(pos(), '+')) {
+        if (matches(pos() + 1, '+')) {
+            type = Token::Type::kPlusPlus;
+            advance(2);
+        } else if (matches(pos() + 1, '=')) {
+            type = Token::Type::kPlusEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kPlus;
+            advance(1);
+        }
+    } else if (matches(pos(), '|')) {
+        if (matches(pos() + 1, '|')) {
+            type = Token::Type::kOrOr;
+            advance(2);
+        } else if (matches(pos() + 1, '=')) {
+            type = Token::Type::kOrEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kOr;
+            advance(1);
+        }
+    } else if (matches(pos(), ';')) {
        type = Token::Type::kSemicolon;
        advance(1);
-    } else if (matches(pos(), "*=")) {
-        type = Token::Type::kTimesEqual;
-        advance(2);
-    } else if (matches(pos(), "*")) {
-        type = Token::Type::kStar;
-        advance(1);
-    } else if (matches(pos(), "~")) {
+    } else if (matches(pos(), '*')) {
+        if (matches(pos() + 1, '=')) {
+            type = Token::Type::kTimesEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kStar;
+            advance(1);
+        }
+    } else if (matches(pos(), '~')) {
        type = Token::Type::kTilde;
        advance(1);
-    } else if (matches(pos(), "_")) {
+    } else if (matches(pos(), '_')) {
        type = Token::Type::kUnderscore;
        advance(1);
-    } else if (matches(pos(), "^=")) {
-        type = Token::Type::kXorEqual;
-        advance(2);
-    } else if (matches(pos(), "^")) {
-        type = Token::Type::kXor;
-        advance(1);
+    } else if (matches(pos(), '^')) {
+        if (matches(pos() + 1, '=')) {
+            type = Token::Type::kXorEqual;
+            advance(2);
+        } else {
+            type = Token::Type::kXor;
+            advance(1);
+        }
    }

    end_source(source);
--- a/src/tint/reader/wgsl/lexer.h
+++ b/src/tint/reader/wgsl/lexer.h
@@ -96,7 +96,8 @@ class Lexer {
    bool is_hex(char ch) const;
    /// @returns true if string at `pos` matches `substr`
    bool matches(size_t pos, std::string_view substr);
-
+    /// @returns true if char at `pos` matches `ch`
+    bool matches(size_t pos, char ch);
    /// The source file content
    Source::File const* const file_;
    /// The current location within the input