// Copyright 2020 The Tint Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "src/tint/reader/wgsl/lexer.h" #include #include #include #include #include // NOLINT(build/include_order) #include #include #include #include "src/tint/debug.h" #include "src/tint/text/unicode.h" namespace tint::reader::wgsl { namespace { // Unicode parsing code assumes that the size of a single std::string element is // 1 byte. static_assert(sizeof(decltype(tint::Source::FileContent::data[0])) == sizeof(uint8_t), "tint::reader::wgsl requires the size of a std::string element " "to be a single byte"); bool read_blankspace(std::string_view str, size_t i, bool* is_blankspace, size_t* blankspace_size) { // See https://www.w3.org/TR/WGSL/#blankspace auto* utf8 = reinterpret_cast(&str[i]); auto [cp, n] = text::utf8::Decode(utf8, str.size() - i); if (n == 0) { return false; } static const auto kSpace = text::CodePoint(0x0020); // space static const auto kHTab = text::CodePoint(0x0009); // horizontal tab static const auto kL2R = text::CodePoint(0x200E); // left-to-right mark static const auto kR2L = text::CodePoint(0x200F); // right-to-left mark if (cp == kSpace || cp == kHTab || cp == kL2R || cp == kR2L) { *is_blankspace = true; *blankspace_size = n; return true; } *is_blankspace = false; return true; } uint32_t dec_value(char c) { if (c >= '0' && c <= '9') { return static_cast(c - '0'); } return 0; } uint32_t hex_value(char c) { if (c >= '0' && c <= '9') { return static_cast(c - '0'); } if (c >= 'a' && c <= 'f') { return 0xA + static_cast(c - 'a'); } if (c >= 'A' && c <= 'F') { return 0xA + static_cast(c - 'A'); } return 0; } /// LimitCheck is the enumerator result of check_limits(). enum class LimitCheck { /// The value was within the limits of the data type. kWithinLimits, /// The value was too small to fit within the data type. kTooSmall, /// The value was too large to fit within the data type. kTooLarge, }; /// Checks whether the value fits within the integer type `T` template LimitCheck check_limits(int64_t value) { static_assert(std::is_integral_v, "T must be an integer"); if (value < static_cast(std::numeric_limits::lowest())) { return LimitCheck::kTooSmall; } if (value > static_cast(std::numeric_limits::max())) { return LimitCheck::kTooLarge; } return LimitCheck::kWithinLimits; } /// Checks whether the value fits within the floating point type `T` template LimitCheck check_limits(double value) { static_assert(std::is_floating_point_v, "T must be a floating point"); if (value < static_cast(std::numeric_limits::lowest())) { return LimitCheck::kTooSmall; } if (value > static_cast(std::numeric_limits::max())) { return LimitCheck::kTooLarge; } return LimitCheck::kWithinLimits; } } // namespace Lexer::Lexer(const Source::File* file) : file_(file), location_{1, 1} {} Lexer::~Lexer() = default; const std::string_view Lexer::line() const { if (file_->content.lines.size() == 0) { static const char* empty_string = ""; return empty_string; } return file_->content.lines[location_.line - 1]; } size_t Lexer::pos() const { return location_.column - 1; } size_t Lexer::length() const { return line().size(); } const char& Lexer::at(size_t pos) const { auto l = line(); // Unlike for std::string, if pos == l.size(), indexing `l[pos]` is UB for // std::string_view. if (pos >= l.size()) { static const char zero = 0; return zero; } return l[pos]; } std::string_view Lexer::substr(size_t offset, size_t count) { return line().substr(offset, count); } void Lexer::advance(size_t offset) { location_.column += offset; } void Lexer::set_pos(size_t pos) { location_.column = pos + 1; } void Lexer::advance_line() { location_.line++; location_.column = 1; } bool Lexer::is_eof() const { return location_.line >= file_->content.lines.size() && pos() >= length(); } bool Lexer::is_eol() const { return pos() >= length(); } Token Lexer::next() { if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) { return t; } if (auto t = try_hex_float(); !t.IsUninitialized()) { return t; } if (auto t = try_hex_integer(); !t.IsUninitialized()) { return t; } if (auto t = try_float(); !t.IsUninitialized()) { return t; } if (auto t = try_integer(); !t.IsUninitialized()) { return t; } if (auto t = try_ident(); !t.IsUninitialized()) { return t; } if (auto t = try_punctuation(); !t.IsUninitialized()) { return t; } return {Token::Type::kError, begin_source(), (is_null() ? "null character found" : "invalid character found")}; } Source Lexer::begin_source() const { Source src{}; src.file = file_; src.range.begin = location_; src.range.end = location_; return src; } void Lexer::end_source(Source& src) const { src.range.end = location_; } bool Lexer::is_null() const { return (pos() < length()) && (at(pos()) == 0); } bool Lexer::is_digit(char ch) const { return std::isdigit(static_cast(ch)); } bool Lexer::is_hex(char ch) const { return std::isxdigit(static_cast(ch)); } bool Lexer::matches(size_t pos, std::string_view sub_string) { if (pos >= length()) { return false; } return substr(pos, sub_string.size()) == sub_string; } Token Lexer::skip_blankspace_and_comments() { for (;;) { auto loc = location_; while (!is_eof()) { if (is_eol()) { advance_line(); continue; } bool is_blankspace; size_t blankspace_size; if (!read_blankspace(line(), pos(), &is_blankspace, &blankspace_size)) { return {Token::Type::kError, begin_source(), "invalid UTF-8"}; } if (!is_blankspace) { break; } advance(blankspace_size); } auto t = skip_comment(); if (!t.IsUninitialized()) { return t; } // If the cursor didn't advance we didn't remove any blankspace // so we're done. if (loc == location_) { break; } } if (is_eof()) { return {Token::Type::kEOF, begin_source()}; } return {}; } Token Lexer::skip_comment() { if (matches(pos(), "//")) { // Line comment: ignore everything until the end of line. while (!is_eol()) { if (is_null()) { return {Token::Type::kError, begin_source(), "null character found"}; } advance(); } return {}; } if (matches(pos(), "/*")) { // Block comment: ignore everything until the closing '*/' token. // Record source location of the initial '/*' auto source = begin_source(); source.range.end.column += 1; advance(2); int depth = 1; while (!is_eof() && depth > 0) { if (matches(pos(), "/*")) { // Start of block comment: increase nesting depth. advance(2); depth++; } else if (matches(pos(), "*/")) { // End of block comment: decrease nesting depth. advance(2); depth--; } else if (is_eol()) { // Newline: skip and update source location. advance_line(); } else if (is_null()) { return {Token::Type::kError, begin_source(), "null character found"}; } else { // Anything else: skip and update source location. advance(); } } if (depth > 0) { return {Token::Type::kError, source, "unterminated block comment"}; } } return {}; } Token Lexer::try_float() { auto start = pos(); auto end = pos(); auto source = begin_source(); bool has_mantissa_digits = false; if (matches(end, "-")) { end++; } while (end < length() && is_digit(at(end))) { has_mantissa_digits = true; end++; } bool has_point = false; if (end < length() && matches(end, ".")) { has_point = true; end++; } while (end < length() && is_digit(at(end))) { has_mantissa_digits = true; end++; } if (!has_mantissa_digits) { return {}; } // Parse the exponent if one exists bool has_exponent = false; if (end < length() && (matches(end, "e") || matches(end, "E"))) { end++; if (end < length() && (matches(end, "+") || matches(end, "-"))) { end++; } while (end < length() && isdigit(at(end))) { has_exponent = true; end++; } // If an 'e' or 'E' was present, then the number part must also be present. if (!has_exponent) { const auto str = std::string{substr(start, end - start)}; return {Token::Type::kError, source, "incomplete exponent for floating point literal: " + str}; } } bool has_f_suffix = false; if (end < length() && matches(end, "f")) { end++; has_f_suffix = true; } if (!has_point && !has_exponent && !has_f_suffix) { // If it only has digits then it's an integer. return {}; } // Save the error string, for use by diagnostics. const auto str = std::string{substr(start, end - start)}; advance(end - start); end_source(source); double value = strtod(&at(start), nullptr); const double magnitude = std::abs(value); if (has_f_suffix) { // This errors out if a non-zero magnitude is too small to represent in a // float. It can't be represented faithfully in an f32. if (0.0 < magnitude && magnitude < static_cast(std::numeric_limits::min())) { return {Token::Type::kError, source, "magnitude too small to be represented as f32"}; } switch (check_limits(value)) { case LimitCheck::kTooSmall: return {Token::Type::kError, source, "value too small for f32"}; case LimitCheck::kTooLarge: return {Token::Type::kError, source, "value too large for f32"}; default: return {Token::Type::kFloatLiteral_F, source, value}; } } // TODO(crbug.com/tint/1504): Properly support abstract float: // Change `AbstractFloatType` to `double`, update errors to say 'abstract int'. using AbstractFloatType = float; if (0.0 < magnitude && magnitude < static_cast(std::numeric_limits::min())) { return {Token::Type::kError, source, "magnitude too small to be represented as f32"}; } switch (check_limits(value)) { case LimitCheck::kTooSmall: return {Token::Type::kError, source, "value too small for f32"}; case LimitCheck::kTooLarge: return {Token::Type::kError, source, "value too large for f32"}; default: return {Token::Type::kFloatLiteral, source, value}; } } Token Lexer::try_hex_float() { constexpr uint32_t kTotalBits = 32; constexpr uint32_t kTotalMsb = kTotalBits - 1; constexpr uint32_t kMantissaBits = 23; constexpr uint32_t kMantissaMsb = kMantissaBits - 1; constexpr uint32_t kMantissaShiftRight = kTotalBits - kMantissaBits; constexpr int32_t kExponentBias = 127; constexpr int32_t kExponentMax = 255; constexpr uint32_t kExponentBits = 8; constexpr uint32_t kExponentMask = (1 << kExponentBits) - 1; constexpr uint32_t kExponentLeftShift = kMantissaBits; constexpr uint32_t kSignBit = 31; auto start = pos(); auto end = pos(); auto source = begin_source(); // clang-format off // -?0[xX]([0-9a-fA-F]*.?[0-9a-fA-F]+ | [0-9a-fA-F]+.[0-9a-fA-F]*)(p|P)(+|-)?[0-9]+ // NOLINT // clang-format on // -? int32_t sign_bit = 0; if (matches(end, "-")) { sign_bit = 1; end++; } // 0[xX] if (matches(end, "0x") || matches(end, "0X")) { end += 2; } else { return {}; } uint32_t mantissa = 0; uint32_t exponent = 0; // TODO(dneto): Values in the normal range for the format do not explicitly // store the most significant bit. The algorithm here works hard to eliminate // that bit in the representation during parsing, and then it backtracks // when it sees it may have to explicitly represent it, and backtracks again // when it sees the number is sub-normal (i.e. the exponent underflows). // I suspect the logic can be clarified by storing it during parsing, and // then removing it later only when needed. // `set_next_mantissa_bit_to` sets next `mantissa` bit starting from msb to // lsb to value 1 if `set` is true, 0 otherwise. Returns true on success, i.e. // when the bit can be accommodated in the available space. uint32_t mantissa_next_bit = kTotalMsb; auto set_next_mantissa_bit_to = [&](bool set, bool integer_part) -> bool { // If adding bits for the integer part, we can overflow whether we set the // bit or not. For the fractional part, we can only overflow when setting // the bit. const bool check_overflow = integer_part || set; // Note: mantissa_next_bit actually decrements, so comparing it as // larger than a positive number relies on wraparound. if (check_overflow && (mantissa_next_bit > kTotalMsb)) { return false; // Overflowed mantissa } if (set) { mantissa |= (1 << mantissa_next_bit); } --mantissa_next_bit; return true; }; // Collect integer range (if any) auto integer_range = std::make_pair(end, end); while (end < length() && is_hex(at(end))) { integer_range.second = ++end; } // .? bool hex_point = false; if (matches(end, ".")) { hex_point = true; end++; } // Collect fractional range (if any) auto fractional_range = std::make_pair(end, end); while (end < length() && is_hex(at(end))) { fractional_range.second = ++end; } // Must have at least an integer or fractional part if ((integer_range.first == integer_range.second) && (fractional_range.first == fractional_range.second)) { return {}; } // Is the binary exponent present? It's optional. const bool has_exponent = (matches(end, "p") || matches(end, "P")); if (has_exponent) { end++; } if (!has_exponent && !hex_point) { // It's not a hex float. At best it's a hex integer. return {}; } // At this point, we know for sure our token is a hex float value, // or an invalid token. // Parse integer part // [0-9a-fA-F]* bool has_zero_integer = true; // The magnitude is zero if and only if seen_prior_one_bits is false. bool seen_prior_one_bits = false; for (auto i = integer_range.first; i < integer_range.second; ++i) { const auto nibble = hex_value(at(i)); if (nibble != 0) { has_zero_integer = false; } for (int32_t bit = 3; bit >= 0; --bit) { auto v = 1 & (nibble >> bit); // Skip leading 0s and the first 1 if (seen_prior_one_bits) { if (!set_next_mantissa_bit_to(v != 0, true)) { return {Token::Type::kError, source, "mantissa is too large for hex float"}; } ++exponent; } else { if (v == 1) { seen_prior_one_bits = true; } } } } // Parse fractional part // [0-9a-fA-F]* for (auto i = fractional_range.first; i < fractional_range.second; ++i) { auto nibble = hex_value(at(i)); for (int32_t bit = 3; bit >= 0; --bit) { auto v = 1 & (nibble >> bit); if (v == 1) { seen_prior_one_bits = true; } // If integer part is 0, we only start writing bits to the // mantissa once we have a non-zero fractional bit. While the fractional // values are 0, we adjust the exponent to avoid overflowing `mantissa`. if (!seen_prior_one_bits) { --exponent; } else { if (!set_next_mantissa_bit_to(v != 0, false)) { return {Token::Type::kError, source, "mantissa is too large for hex float"}; } } } } // Determine if the value of the mantissa is zero. // Note: it's not enough to check mantissa == 0 as we drop the initial bit, // whether it's in the integer part or the fractional part. const bool is_zero = !seen_prior_one_bits; TINT_ASSERT(Reader, !is_zero || mantissa == 0); // Parse the optional exponent. // ((p|P)(\+|-)?[0-9]+)? uint32_t input_exponent = 0; // Defaults to 0 if not present int32_t exponent_sign = 1; // If the 'p' part is present, the rest of the exponent must exist. bool has_f_suffix = false; if (has_exponent) { // Parse the rest of the exponent. // (+|-)? if (matches(end, "+")) { end++; } else if (matches(end, "-")) { exponent_sign = -1; end++; } // Parse exponent from input // [0-9]+ // Allow overflow (in uint32_t) when the floating point value magnitude is // zero. bool has_exponent_digits = false; while (end < length() && isdigit(at(end))) { has_exponent_digits = true; auto prev_exponent = input_exponent; input_exponent = (input_exponent * 10) + dec_value(at(end)); // Check if we've overflowed input_exponent. This only matters when // the mantissa is non-zero. if (!is_zero && (prev_exponent > input_exponent)) { return {Token::Type::kError, source, "exponent is too large for hex float"}; } end++; } // Parse optional 'f' suffix. For a hex float, it can only exist // when the exponent is present. Otherwise it will look like // one of the mantissa digits. if (end < length() && matches(end, "f")) { has_f_suffix = true; end++; } if (!has_exponent_digits) { return {Token::Type::kError, source, "expected an exponent value for hex float"}; } } advance(end - start); end_source(source); if (is_zero) { // If value is zero, then ignore the exponent and produce a zero exponent = 0; } else { // Ensure input exponent is not too large; i.e. that it won't overflow when // adding the exponent bias. const uint32_t kIntMax = static_cast(std::numeric_limits::max()); const uint32_t kMaxInputExponent = kIntMax - kExponentBias; if (input_exponent > kMaxInputExponent) { return {Token::Type::kError, source, "exponent is too large for hex float"}; } // Compute exponent so far exponent += static_cast(static_cast(input_exponent) * exponent_sign); // Bias exponent if non-zero // After this, if exponent is <= 0, our value is a denormal exponent += kExponentBias; // We know the number is not zero. The MSB is 1 (by construction), and // should be eliminated because it becomes the implicit 1 that isn't // explicitly represented in the binary32 format. We'll bring it back // later if we find the exponent actually underflowed, i.e. the number // is sub-normal. if (has_zero_integer) { mantissa <<= 1; --exponent; } } // We can now safely work with exponent as a signed quantity, as there's no // chance to overflow int32_t signed_exponent = static_cast(exponent); // Shift mantissa to occupy the low 23 bits mantissa >>= kMantissaShiftRight; // If denormal, shift mantissa until our exponent is zero if (!is_zero) { // Denorm has exponent 0 and non-zero mantissa. We set the top bit here, // then shift the mantissa to make exponent zero. if (signed_exponent <= 0) { mantissa >>= 1; mantissa |= (1 << kMantissaMsb); } while (signed_exponent < 0) { mantissa >>= 1; ++signed_exponent; // If underflow, clamp to zero if (mantissa == 0) { signed_exponent = 0; } } } if (signed_exponent > kExponentMax) { // Overflow: set to infinity signed_exponent = kExponentMax; mantissa = 0; } else if (signed_exponent == kExponentMax && mantissa != 0) { // NaN: set to infinity mantissa = 0; } // Combine sign, mantissa, and exponent uint32_t result_u32 = sign_bit << kSignBit; result_u32 |= mantissa; result_u32 |= (static_cast(signed_exponent) & kExponentMask) << kExponentLeftShift; // Reinterpret as float and return float result_f32; std::memcpy(&result_f32, &result_u32, sizeof(result_f32)); double result_f64 = static_cast(result_f32); return {has_f_suffix ? Token::Type::kFloatLiteral_F : Token::Type::kFloatLiteral, source, result_f64}; } Token Lexer::build_token_from_int_if_possible(Source source, size_t start, int32_t base) { int64_t res = strtoll(&at(start), nullptr, base); if (matches(pos(), "u")) { switch (check_limits(res)) { case LimitCheck::kTooSmall: return {Token::Type::kError, source, "unsigned literal cannot be negative"}; case LimitCheck::kTooLarge: return {Token::Type::kError, source, "value too large for u32"}; default: advance(1); end_source(source); return {Token::Type::kIntLiteral_U, source, res}; } } if (matches(pos(), "i")) { switch (check_limits(res)) { case LimitCheck::kTooSmall: return {Token::Type::kError, source, "value too small for i32"}; case LimitCheck::kTooLarge: return {Token::Type::kError, source, "value too large for i32"}; default: break; } advance(1); end_source(source); return {Token::Type::kIntLiteral_I, source, res}; } // TODO(crbug.com/tint/1504): Properly support abstract int: // Change `AbstractIntType` to `int64_t`, update errors to say 'abstract int'. using AbstractIntType = int32_t; switch (check_limits(res)) { case LimitCheck::kTooSmall: return {Token::Type::kError, source, "value too small for i32"}; case LimitCheck::kTooLarge: return {Token::Type::kError, source, "value too large for i32"}; default: end_source(source); return {Token::Type::kIntLiteral, source, res}; } } Token Lexer::try_hex_integer() { constexpr size_t kMaxDigits = 8; // Valid for both 32-bit integer types auto start = pos(); auto end = pos(); auto source = begin_source(); if (matches(end, "-")) { end++; } if (matches(end, "0x") || matches(end, "0X")) { end += 2; } else { return {}; } auto first = end; while (!is_eol() && is_hex(at(end))) { end++; auto digits = end - first; if (digits > kMaxDigits) { return {Token::Type::kError, source, "integer literal (" + std::string{substr(start, end - 1 - start)} + "...) has too many digits"}; } } if (first == end) { return {Token::Type::kError, source, "integer or float hex literal has no significant digits"}; } advance(end - start); return build_token_from_int_if_possible(source, start, 16); } Token Lexer::try_integer() { constexpr size_t kMaxDigits = 10; // Valid for both 32-bit integer types auto start = pos(); auto end = start; auto source = begin_source(); if (matches(end, "-")) { end++; } if (end >= length() || !is_digit(at(end))) { return {}; } auto first = end; // If the first digit is a zero this must only be zero as leading zeros // are not allowed. auto next = first + 1; if (next < length()) { if (at(first) == '0' && is_digit(at(next))) { return {Token::Type::kError, source, "integer literal (" + std::string{substr(start, end - 1 - start)} + "...) has leading 0s"}; } } while (end < length() && is_digit(at(end))) { auto digits = end - first; if (digits > kMaxDigits) { return {Token::Type::kError, source, "integer literal (" + std::string{substr(start, end - 1 - start)} + "...) has too many digits"}; } end++; } advance(end - start); return build_token_from_int_if_possible(source, start, 10); } Token Lexer::try_ident() { auto source = begin_source(); auto start = pos(); // Must begin with an XID_Source unicode character, or underscore { auto* utf8 = reinterpret_cast(&at(pos())); auto [code_point, n] = text::utf8::Decode(utf8, length() - pos()); if (n == 0) { advance(); // Skip the bad byte. return {Token::Type::kError, source, "invalid UTF-8"}; } if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) { return {}; } // Consume start codepoint advance(n); } while (!is_eol()) { // Must continue with an XID_Continue unicode character auto* utf8 = reinterpret_cast(&at(pos())); auto [code_point, n] = text::utf8::Decode(utf8, line().size() - pos()); if (n == 0) { advance(); // Skip the bad byte. return {Token::Type::kError, source, "invalid UTF-8"}; } if (!code_point.IsXIDContinue()) { break; } // Consume continuing codepoint advance(n); } if (at(start) == '_') { // Check for an underscore on its own (special token), or a // double-underscore (not allowed). if ((pos() == start + 1) || (at(start + 1) == '_')) { set_pos(start); return {}; } } auto str = substr(start, pos() - start); end_source(source); auto t = check_keyword(source, str); if (!t.IsUninitialized()) { return t; } return {Token::Type::kIdentifier, source, str}; } Token Lexer::try_punctuation() { auto source = begin_source(); auto type = Token::Type::kUninitialized; if (matches(pos(), "@")) { type = Token::Type::kAttr; advance(1); } else if (matches(pos(), "(")) { type = Token::Type::kParenLeft; advance(1); } else if (matches(pos(), ")")) { type = Token::Type::kParenRight; advance(1); } else if (matches(pos(), "[")) { type = Token::Type::kBracketLeft; advance(1); } else if (matches(pos(), "]")) { type = Token::Type::kBracketRight; advance(1); } else if (matches(pos(), "{")) { type = Token::Type::kBraceLeft; advance(1); } else if (matches(pos(), "}")) { type = Token::Type::kBraceRight; advance(1); } else if (matches(pos(), "&&")) { type = Token::Type::kAndAnd; advance(2); } else if (matches(pos(), "&=")) { type = Token::Type::kAndEqual; advance(2); } else if (matches(pos(), "&")) { type = Token::Type::kAnd; advance(1); } else if (matches(pos(), "/=")) { type = Token::Type::kDivisionEqual; advance(2); } else if (matches(pos(), "/")) { type = Token::Type::kForwardSlash; advance(1); } else if (matches(pos(), "!=")) { type = Token::Type::kNotEqual; advance(2); } else if (matches(pos(), "!")) { type = Token::Type::kBang; advance(1); } else if (matches(pos(), ":")) { type = Token::Type::kColon; advance(1); } else if (matches(pos(), ",")) { type = Token::Type::kComma; advance(1); } else if (matches(pos(), "==")) { type = Token::Type::kEqualEqual; advance(2); } else if (matches(pos(), "=")) { type = Token::Type::kEqual; advance(1); } else if (matches(pos(), ">=")) { type = Token::Type::kGreaterThanEqual; advance(2); } else if (matches(pos(), ">>")) { type = Token::Type::kShiftRight; advance(2); } else if (matches(pos(), ">")) { type = Token::Type::kGreaterThan; advance(1); } else if (matches(pos(), "<=")) { type = Token::Type::kLessThanEqual; advance(2); } else if (matches(pos(), "<<")) { type = Token::Type::kShiftLeft; advance(2); } else if (matches(pos(), "<")) { type = Token::Type::kLessThan; advance(1); } else if (matches(pos(), "%=")) { type = Token::Type::kModuloEqual; advance(2); } else if (matches(pos(), "%")) { type = Token::Type::kMod; advance(1); } else if (matches(pos(), "->")) { type = Token::Type::kArrow; advance(2); } else if (matches(pos(), "--")) { type = Token::Type::kMinusMinus; advance(2); } else if (matches(pos(), "-=")) { type = Token::Type::kMinusEqual; advance(2); } else if (matches(pos(), "-")) { type = Token::Type::kMinus; advance(1); } else if (matches(pos(), ".")) { type = Token::Type::kPeriod; advance(1); } else if (matches(pos(), "++")) { type = Token::Type::kPlusPlus; advance(2); } else if (matches(pos(), "+=")) { type = Token::Type::kPlusEqual; advance(2); } else if (matches(pos(), "+")) { type = Token::Type::kPlus; advance(1); } else if (matches(pos(), "||")) { type = Token::Type::kOrOr; advance(2); } else if (matches(pos(), "|=")) { type = Token::Type::kOrEqual; advance(2); } else if (matches(pos(), "|")) { type = Token::Type::kOr; advance(1); } else if (matches(pos(), ";")) { type = Token::Type::kSemicolon; advance(1); } else if (matches(pos(), "*=")) { type = Token::Type::kTimesEqual; advance(2); } else if (matches(pos(), "*")) { type = Token::Type::kStar; advance(1); } else if (matches(pos(), "~")) { type = Token::Type::kTilde; advance(1); } else if (matches(pos(), "_")) { type = Token::Type::kUnderscore; advance(1); } else if (matches(pos(), "^=")) { type = Token::Type::kXorEqual; advance(2); } else if (matches(pos(), "^")) { type = Token::Type::kXor; advance(1); } end_source(source); return {type, source}; } Token Lexer::check_keyword(const Source& source, std::string_view str) { if (str == "array") { return {Token::Type::kArray, source, "array"}; } if (str == "atomic") { return {Token::Type::kAtomic, source, "atomic"}; } if (str == "bitcast") { return {Token::Type::kBitcast, source, "bitcast"}; } if (str == "bool") { return {Token::Type::kBool, source, "bool"}; } if (str == "break") { return {Token::Type::kBreak, source, "break"}; } if (str == "case") { return {Token::Type::kCase, source, "case"}; } if (str == "continue") { return {Token::Type::kContinue, source, "continue"}; } if (str == "continuing") { return {Token::Type::kContinuing, source, "continuing"}; } if (str == "discard") { return {Token::Type::kDiscard, source, "discard"}; } if (str == "default") { return {Token::Type::kDefault, source, "default"}; } if (str == "else") { return {Token::Type::kElse, source, "else"}; } if (str == "enable") { return {Token::Type::kEnable, source, "enable"}; } if (str == "f16") { return {Token::Type::kF16, source, "f16"}; } if (str == "f32") { return {Token::Type::kF32, source, "f32"}; } if (str == "fallthrough") { return {Token::Type::kFallthrough, source, "fallthrough"}; } if (str == "false") { return {Token::Type::kFalse, source, "false"}; } if (str == "fn") { return {Token::Type::kFn, source, "fn"}; } if (str == "for") { return {Token::Type::kFor, source, "for"}; } if (str == "function") { return {Token::Type::kFunction, source, "function"}; } if (str == "i32") { return {Token::Type::kI32, source, "i32"}; } if (str == "if") { return {Token::Type::kIf, source, "if"}; } if (str == "import") { return {Token::Type::kImport, source, "import"}; } if (str == "let") { return {Token::Type::kLet, source, "let"}; } if (str == "loop") { return {Token::Type::kLoop, source, "loop"}; } if (str == "mat2x2") { return {Token::Type::kMat2x2, source, "mat2x2"}; } if (str == "mat2x3") { return {Token::Type::kMat2x3, source, "mat2x3"}; } if (str == "mat2x4") { return {Token::Type::kMat2x4, source, "mat2x4"}; } if (str == "mat3x2") { return {Token::Type::kMat3x2, source, "mat3x2"}; } if (str == "mat3x3") { return {Token::Type::kMat3x3, source, "mat3x3"}; } if (str == "mat3x4") { return {Token::Type::kMat3x4, source, "mat3x4"}; } if (str == "mat4x2") { return {Token::Type::kMat4x2, source, "mat4x2"}; } if (str == "mat4x3") { return {Token::Type::kMat4x3, source, "mat4x3"}; } if (str == "mat4x4") { return {Token::Type::kMat4x4, source, "mat4x4"}; } if (str == "override") { return {Token::Type::kOverride, source, "override"}; } if (str == "private") { return {Token::Type::kPrivate, source, "private"}; } if (str == "ptr") { return {Token::Type::kPtr, source, "ptr"}; } if (str == "return") { return {Token::Type::kReturn, source, "return"}; } if (str == "sampler") { return {Token::Type::kSampler, source, "sampler"}; } if (str == "sampler_comparison") { return {Token::Type::kComparisonSampler, source, "sampler_comparison"}; } if (str == "storage_buffer" || str == "storage") { return {Token::Type::kStorage, source, "storage"}; } if (str == "struct") { return {Token::Type::kStruct, source, "struct"}; } if (str == "switch") { return {Token::Type::kSwitch, source, "switch"}; } if (str == "texture_1d") { return {Token::Type::kTextureSampled1d, source, "texture_1d"}; } if (str == "texture_2d") { return {Token::Type::kTextureSampled2d, source, "texture_2d"}; } if (str == "texture_2d_array") { return {Token::Type::kTextureSampled2dArray, source, "texture_2d_array"}; } if (str == "texture_3d") { return {Token::Type::kTextureSampled3d, source, "texture_3d"}; } if (str == "texture_cube") { return {Token::Type::kTextureSampledCube, source, "texture_cube"}; } if (str == "texture_cube_array") { return {Token::Type::kTextureSampledCubeArray, source, "texture_cube_array"}; } if (str == "texture_depth_2d") { return {Token::Type::kTextureDepth2d, source, "texture_depth_2d"}; } if (str == "texture_depth_2d_array") { return {Token::Type::kTextureDepth2dArray, source, "texture_depth_2d_array"}; } if (str == "texture_depth_cube") { return {Token::Type::kTextureDepthCube, source, "texture_depth_cube"}; } if (str == "texture_depth_cube_array") { return {Token::Type::kTextureDepthCubeArray, source, "texture_depth_cube_array"}; } if (str == "texture_depth_multisampled_2d") { return {Token::Type::kTextureDepthMultisampled2d, source, "texture_depth_multisampled_2d"}; } if (str == "texture_external") { return {Token::Type::kTextureExternal, source, "texture_external"}; } if (str == "texture_multisampled_2d") { return {Token::Type::kTextureMultisampled2d, source, "texture_multisampled_2d"}; } if (str == "texture_storage_1d") { return {Token::Type::kTextureStorage1d, source, "texture_storage_1d"}; } if (str == "texture_storage_2d") { return {Token::Type::kTextureStorage2d, source, "texture_storage_2d"}; } if (str == "texture_storage_2d_array") { return {Token::Type::kTextureStorage2dArray, source, "texture_storage_2d_array"}; } if (str == "texture_storage_3d") { return {Token::Type::kTextureStorage3d, source, "texture_storage_3d"}; } if (str == "true") { return {Token::Type::kTrue, source, "true"}; } if (str == "type") { return {Token::Type::kType, source, "type"}; } if (str == "u32") { return {Token::Type::kU32, source, "u32"}; } if (str == "uniform") { return {Token::Type::kUniform, source, "uniform"}; } if (str == "var") { return {Token::Type::kVar, source, "var"}; } if (str == "vec2") { return {Token::Type::kVec2, source, "vec2"}; } if (str == "vec3") { return {Token::Type::kVec3, source, "vec3"}; } if (str == "vec4") { return {Token::Type::kVec4, source, "vec4"}; } if (str == "workgroup") { return {Token::Type::kWorkgroup, source, "workgroup"}; } return {}; } } // namespace tint::reader::wgsl