Use optional in Lexer.
This Cl updates the lexer to only create tokens when needed, in the case of no match an empty optional is returned. Change-Id: Ie4ba45d72cfb263beb8d9c83e3098ff11beeecd2 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/128620 Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: Ben Clayton <bclayton@google.com> Commit-Queue: Dan Sinclair <dsinclair@chromium.org>
This commit is contained in:
parent
889edb18b2
commit
378a1f51a2
|
@ -39,7 +39,9 @@ static_assert(sizeof(decltype(tint::Source::FileContent::data[0])) == sizeof(uin
|
||||||
"tint::reader::wgsl requires the size of a std::string element "
|
"tint::reader::wgsl requires the size of a std::string element "
|
||||||
"to be a single byte");
|
"to be a single byte");
|
||||||
|
|
||||||
static constexpr size_t kDefaultListSize = 512;
|
// A token is ~80bytes. The 4k here comes from looking at the number of tokens in the benchmark
|
||||||
|
// programs and being a bit bigger then those need (atan2-const-eval is the outlier here).
|
||||||
|
static constexpr size_t kDefaultListSize = 4092;
|
||||||
|
|
||||||
bool read_blankspace(std::string_view str, size_t i, bool* is_blankspace, size_t* blankspace_size) {
|
bool read_blankspace(std::string_view str, size_t i, bool* is_blankspace, size_t* blankspace_size) {
|
||||||
// See https://www.w3.org/TR/WGSL/#blankspace
|
// See https://www.w3.org/TR/WGSL/#blankspace
|
||||||
|
@ -95,8 +97,12 @@ Lexer::~Lexer() = default;
|
||||||
std::vector<Token> Lexer::Lex() {
|
std::vector<Token> Lexer::Lex() {
|
||||||
std::vector<Token> tokens;
|
std::vector<Token> tokens;
|
||||||
tokens.reserve(kDefaultListSize);
|
tokens.reserve(kDefaultListSize);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
tokens.emplace_back(next());
|
tokens.emplace_back(next());
|
||||||
|
if (tokens.back().IsEof() || tokens.back().IsError()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// If the token can be split, we insert a placeholder element(s) into the stream to hold the
|
// If the token can be split, we insert a placeholder element(s) into the stream to hold the
|
||||||
// split character.
|
// split character.
|
||||||
|
@ -104,11 +110,7 @@ std::vector<Token> Lexer::Lex() {
|
||||||
for (size_t i = 0; i < num_placeholders; i++) {
|
for (size_t i = 0; i < num_placeholders; i++) {
|
||||||
auto src = tokens.back().source();
|
auto src = tokens.back().source();
|
||||||
src.range.begin.column++;
|
src.range.begin.column++;
|
||||||
tokens.emplace_back(Token(Token::Type::kPlaceholder, src));
|
tokens.emplace_back(Token::Type::kPlaceholder, src);
|
||||||
}
|
|
||||||
|
|
||||||
if (tokens.back().IsEof() || tokens.back().IsError()) {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return tokens;
|
return tokens;
|
||||||
|
@ -167,32 +169,32 @@ bool Lexer::is_eol() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::next() {
|
Token Lexer::next() {
|
||||||
if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) {
|
if (auto t = skip_blankspace_and_comments(); t.has_value() && !t->IsUninitialized()) {
|
||||||
return t;
|
return std::move(t.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto t = try_hex_float(); !t.IsUninitialized()) {
|
if (auto t = try_hex_float(); t.has_value() && !t->IsUninitialized()) {
|
||||||
return t;
|
return std::move(t.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto t = try_hex_integer(); !t.IsUninitialized()) {
|
if (auto t = try_hex_integer(); t.has_value() && !t->IsUninitialized()) {
|
||||||
return t;
|
return std::move(t.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto t = try_float(); !t.IsUninitialized()) {
|
if (auto t = try_float(); t.has_value() && !t->IsUninitialized()) {
|
||||||
return t;
|
return std::move(t.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto t = try_integer(); !t.IsUninitialized()) {
|
if (auto t = try_integer(); t.has_value() && !t->IsUninitialized()) {
|
||||||
return t;
|
return std::move(t.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto t = try_ident(); !t.IsUninitialized()) {
|
if (auto t = try_ident(); t.has_value() && !t->IsUninitialized()) {
|
||||||
return t;
|
return std::move(t.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto t = try_punctuation(); !t.IsUninitialized()) {
|
if (auto t = try_punctuation(); t.has_value() && !t->IsUninitialized()) {
|
||||||
return t;
|
return std::move(t.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
return {Token::Type::kError, begin_source(),
|
return {Token::Type::kError, begin_source(),
|
||||||
|
@ -237,7 +239,7 @@ bool Lexer::matches(size_t pos, char ch) {
|
||||||
return line()[pos] == ch;
|
return line()[pos] == ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::skip_blankspace_and_comments() {
|
std::optional<Token> Lexer::skip_blankspace_and_comments() {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
auto loc = location_;
|
auto loc = location_;
|
||||||
while (!is_eof()) {
|
while (!is_eof()) {
|
||||||
|
@ -249,7 +251,7 @@ Token Lexer::skip_blankspace_and_comments() {
|
||||||
bool is_blankspace;
|
bool is_blankspace;
|
||||||
size_t blankspace_size;
|
size_t blankspace_size;
|
||||||
if (!read_blankspace(line(), pos(), &is_blankspace, &blankspace_size)) {
|
if (!read_blankspace(line(), pos(), &is_blankspace, &blankspace_size)) {
|
||||||
return {Token::Type::kError, begin_source(), "invalid UTF-8"};
|
return Token{Token::Type::kError, begin_source(), "invalid UTF-8"};
|
||||||
}
|
}
|
||||||
if (!is_blankspace) {
|
if (!is_blankspace) {
|
||||||
break;
|
break;
|
||||||
|
@ -259,7 +261,7 @@ Token Lexer::skip_blankspace_and_comments() {
|
||||||
}
|
}
|
||||||
|
|
||||||
auto t = skip_comment();
|
auto t = skip_comment();
|
||||||
if (!t.IsUninitialized()) {
|
if (t.has_value() && !t->IsUninitialized()) {
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,18 +272,18 @@ Token Lexer::skip_blankspace_and_comments() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (is_eof()) {
|
if (is_eof()) {
|
||||||
return {Token::Type::kEOF, begin_source()};
|
return Token{Token::Type::kEOF, begin_source()};
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::skip_comment() {
|
std::optional<Token> Lexer::skip_comment() {
|
||||||
if (matches(pos(), "//")) {
|
if (matches(pos(), "//")) {
|
||||||
// Line comment: ignore everything until the end of line.
|
// Line comment: ignore everything until the end of line.
|
||||||
while (!is_eol()) {
|
while (!is_eol()) {
|
||||||
if (is_null()) {
|
if (is_null()) {
|
||||||
return {Token::Type::kError, begin_source(), "null character found"};
|
return Token{Token::Type::kError, begin_source(), "null character found"};
|
||||||
}
|
}
|
||||||
advance();
|
advance();
|
||||||
}
|
}
|
||||||
|
@ -311,20 +313,20 @@ Token Lexer::skip_comment() {
|
||||||
// Newline: skip and update source location.
|
// Newline: skip and update source location.
|
||||||
advance_line();
|
advance_line();
|
||||||
} else if (is_null()) {
|
} else if (is_null()) {
|
||||||
return {Token::Type::kError, begin_source(), "null character found"};
|
return Token{Token::Type::kError, begin_source(), "null character found"};
|
||||||
} else {
|
} else {
|
||||||
// Anything else: skip and update source location.
|
// Anything else: skip and update source location.
|
||||||
advance();
|
advance();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (depth > 0) {
|
if (depth > 0) {
|
||||||
return {Token::Type::kError, source, "unterminated block comment"};
|
return Token{Token::Type::kError, source, "unterminated block comment"};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::try_float() {
|
std::optional<Token> Lexer::try_float() {
|
||||||
auto start = pos();
|
auto start = pos();
|
||||||
auto end = pos();
|
auto end = pos();
|
||||||
|
|
||||||
|
@ -385,7 +387,7 @@ Token Lexer::try_float() {
|
||||||
// If an 'e' or 'E' was present, then the number part must also be present.
|
// If an 'e' or 'E' was present, then the number part must also be present.
|
||||||
if (!has_digits) {
|
if (!has_digits) {
|
||||||
const auto str = std::string{substr(start, end - start)};
|
const auto str = std::string{substr(start, end - start)};
|
||||||
return {Token::Type::kError, source,
|
return Token{Token::Type::kError, source,
|
||||||
"incomplete exponent for floating point literal: " + str};
|
"incomplete exponent for floating point literal: " + str};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -452,9 +454,9 @@ Token Lexer::try_float() {
|
||||||
if (!overflow && f) {
|
if (!overflow && f) {
|
||||||
advance(1);
|
advance(1);
|
||||||
end_source(source);
|
end_source(source);
|
||||||
return {Token::Type::kFloatLiteral_F, source, static_cast<double>(f.Get())};
|
return Token{Token::Type::kFloatLiteral_F, source, static_cast<double>(f.Get())};
|
||||||
}
|
}
|
||||||
return {Token::Type::kError, source, "value cannot be represented as 'f32'"};
|
return Token{Token::Type::kError, source, "value cannot be represented as 'f32'"};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_h_suffix) {
|
if (has_h_suffix) {
|
||||||
|
@ -462,23 +464,24 @@ Token Lexer::try_float() {
|
||||||
if (!overflow && f) {
|
if (!overflow && f) {
|
||||||
advance(1);
|
advance(1);
|
||||||
end_source(source);
|
end_source(source);
|
||||||
return {Token::Type::kFloatLiteral_H, source, static_cast<double>(f.Get())};
|
return Token{Token::Type::kFloatLiteral_H, source, static_cast<double>(f.Get())};
|
||||||
}
|
}
|
||||||
return {Token::Type::kError, source, "value cannot be represented as 'f16'"};
|
return Token{Token::Type::kError, source, "value cannot be represented as 'f16'"};
|
||||||
}
|
}
|
||||||
|
|
||||||
end_source(source);
|
end_source(source);
|
||||||
|
|
||||||
TINT_BEGIN_DISABLE_WARNING(FLOAT_EQUAL);
|
TINT_BEGIN_DISABLE_WARNING(FLOAT_EQUAL);
|
||||||
if (overflow || value == HUGE_VAL || -value == HUGE_VAL) {
|
if (overflow || value == HUGE_VAL || -value == HUGE_VAL) {
|
||||||
return {Token::Type::kError, source, "value cannot be represented as 'abstract-float'"};
|
return Token{Token::Type::kError, source,
|
||||||
|
"value cannot be represented as 'abstract-float'"};
|
||||||
} else {
|
} else {
|
||||||
return {Token::Type::kFloatLiteral, source, value};
|
return Token{Token::Type::kFloatLiteral, source, value};
|
||||||
}
|
}
|
||||||
TINT_END_DISABLE_WARNING(FLOAT_EQUAL);
|
TINT_END_DISABLE_WARNING(FLOAT_EQUAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::try_hex_float() {
|
std::optional<Token> Lexer::try_hex_float() {
|
||||||
constexpr uint64_t kExponentBits = 11;
|
constexpr uint64_t kExponentBits = 11;
|
||||||
constexpr uint64_t kMantissaBits = 52;
|
constexpr uint64_t kMantissaBits = 52;
|
||||||
constexpr uint64_t kTotalBits = 1 + kExponentBits + kMantissaBits;
|
constexpr uint64_t kTotalBits = 1 + kExponentBits + kMantissaBits;
|
||||||
|
@ -593,7 +596,8 @@ Token Lexer::try_hex_float() {
|
||||||
// Skip leading 0s and the first 1
|
// Skip leading 0s and the first 1
|
||||||
if (seen_prior_one_bits) {
|
if (seen_prior_one_bits) {
|
||||||
if (!set_next_mantissa_bit_to(v != 0, true)) {
|
if (!set_next_mantissa_bit_to(v != 0, true)) {
|
||||||
return {Token::Type::kError, source, "mantissa is too large for hex float"};
|
return Token{Token::Type::kError, source,
|
||||||
|
"mantissa is too large for hex float"};
|
||||||
}
|
}
|
||||||
++exponent;
|
++exponent;
|
||||||
} else {
|
} else {
|
||||||
|
@ -622,7 +626,8 @@ Token Lexer::try_hex_float() {
|
||||||
--exponent;
|
--exponent;
|
||||||
} else {
|
} else {
|
||||||
if (!set_next_mantissa_bit_to(v != 0, false)) {
|
if (!set_next_mantissa_bit_to(v != 0, false)) {
|
||||||
return {Token::Type::kError, source, "mantissa is too large for hex float"};
|
return Token{Token::Type::kError, source,
|
||||||
|
"mantissa is too large for hex float"};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -663,7 +668,7 @@ Token Lexer::try_hex_float() {
|
||||||
// Check if we've overflowed input_exponent. This only matters when
|
// Check if we've overflowed input_exponent. This only matters when
|
||||||
// the mantissa is non-zero.
|
// the mantissa is non-zero.
|
||||||
if (!is_zero && (prev_exponent > input_exponent)) {
|
if (!is_zero && (prev_exponent > input_exponent)) {
|
||||||
return {Token::Type::kError, source, "exponent is too large for hex float"};
|
return Token{Token::Type::kError, source, "exponent is too large for hex float"};
|
||||||
}
|
}
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
|
@ -680,7 +685,7 @@ Token Lexer::try_hex_float() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!has_exponent_digits) {
|
if (!has_exponent_digits) {
|
||||||
return {Token::Type::kError, source, "expected an exponent value for hex float"};
|
return Token{Token::Type::kError, source, "expected an exponent value for hex float"};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -696,7 +701,7 @@ Token Lexer::try_hex_float() {
|
||||||
const uint64_t kIntMax = static_cast<uint64_t>(std::numeric_limits<int64_t>::max());
|
const uint64_t kIntMax = static_cast<uint64_t>(std::numeric_limits<int64_t>::max());
|
||||||
const uint64_t kMaxInputExponent = kIntMax - kExponentBias;
|
const uint64_t kMaxInputExponent = kIntMax - kExponentBias;
|
||||||
if (input_exponent > kMaxInputExponent) {
|
if (input_exponent > kMaxInputExponent) {
|
||||||
return {Token::Type::kError, source, "exponent is too large for hex float"};
|
return Token{Token::Type::kError, source, "exponent is too large for hex float"};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute exponent so far
|
// Compute exponent so far
|
||||||
|
@ -746,7 +751,7 @@ Token Lexer::try_hex_float() {
|
||||||
|
|
||||||
if (signed_exponent >= kExponentMax || (signed_exponent == kExponentMax && mantissa != 0)) {
|
if (signed_exponent >= kExponentMax || (signed_exponent == kExponentMax && mantissa != 0)) {
|
||||||
std::string type = has_f_suffix ? "f32" : (has_h_suffix ? "f16" : "abstract-float");
|
std::string type = has_f_suffix ? "f32" : (has_h_suffix ? "f16" : "abstract-float");
|
||||||
return {Token::Type::kError, source, "value cannot be represented as '" + type + "'"};
|
return Token{Token::Type::kError, source, "value cannot be represented as '" + type + "'"};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Combine sign, mantissa, and exponent
|
// Combine sign, mantissa, and exponent
|
||||||
|
@ -762,7 +767,7 @@ Token Lexer::try_hex_float() {
|
||||||
// Check value fits in f32
|
// Check value fits in f32
|
||||||
if (result_f64 < static_cast<double>(f32::kLowestValue) ||
|
if (result_f64 < static_cast<double>(f32::kLowestValue) ||
|
||||||
result_f64 > static_cast<double>(f32::kHighestValue)) {
|
result_f64 > static_cast<double>(f32::kHighestValue)) {
|
||||||
return {Token::Type::kError, source, "value cannot be represented as 'f32'"};
|
return Token{Token::Type::kError, source, "value cannot be represented as 'f32'"};
|
||||||
}
|
}
|
||||||
// Check the value can be exactly represented, i.e. only high 23 mantissa bits are valid for
|
// Check the value can be exactly represented, i.e. only high 23 mantissa bits are valid for
|
||||||
// normal f32 values, and less for subnormal f32 values. The rest low mantissa bits must be
|
// normal f32 values, and less for subnormal f32 values. The rest low mantissa bits must be
|
||||||
|
@ -803,19 +808,21 @@ Token Lexer::try_hex_float() {
|
||||||
} else if (abs_result_f64 != 0.0) {
|
} else if (abs_result_f64 != 0.0) {
|
||||||
// The result is smaller than the smallest subnormal f32 value, but not equal to zero.
|
// The result is smaller than the smallest subnormal f32 value, but not equal to zero.
|
||||||
// Such value will never be exactly represented by f32.
|
// Such value will never be exactly represented by f32.
|
||||||
return {Token::Type::kError, source, "value cannot be exactly represented as 'f32'"};
|
return Token{Token::Type::kError, source,
|
||||||
|
"value cannot be exactly represented as 'f32'"};
|
||||||
}
|
}
|
||||||
// Check the low 52-valid_mantissa_bits mantissa bits must be 0.
|
// Check the low 52-valid_mantissa_bits mantissa bits must be 0.
|
||||||
TINT_ASSERT(Reader, (0 <= valid_mantissa_bits) && (valid_mantissa_bits <= 23));
|
TINT_ASSERT(Reader, (0 <= valid_mantissa_bits) && (valid_mantissa_bits <= 23));
|
||||||
if (result_u64 & ((uint64_t(1) << (52 - valid_mantissa_bits)) - 1)) {
|
if (result_u64 & ((uint64_t(1) << (52 - valid_mantissa_bits)) - 1)) {
|
||||||
return {Token::Type::kError, source, "value cannot be exactly represented as 'f32'"};
|
return Token{Token::Type::kError, source,
|
||||||
|
"value cannot be exactly represented as 'f32'"};
|
||||||
}
|
}
|
||||||
return {Token::Type::kFloatLiteral_F, source, result_f64};
|
return Token{Token::Type::kFloatLiteral_F, source, result_f64};
|
||||||
} else if (has_h_suffix) {
|
} else if (has_h_suffix) {
|
||||||
// Check value fits in f16
|
// Check value fits in f16
|
||||||
if (result_f64 < static_cast<double>(f16::kLowestValue) ||
|
if (result_f64 < static_cast<double>(f16::kLowestValue) ||
|
||||||
result_f64 > static_cast<double>(f16::kHighestValue)) {
|
result_f64 > static_cast<double>(f16::kHighestValue)) {
|
||||||
return {Token::Type::kError, source, "value cannot be represented as 'f16'"};
|
return Token{Token::Type::kError, source, "value cannot be represented as 'f16'"};
|
||||||
}
|
}
|
||||||
// Check the value can be exactly represented, i.e. only high 10 mantissa bits are valid for
|
// Check the value can be exactly represented, i.e. only high 10 mantissa bits are valid for
|
||||||
// normal f16 values, and less for subnormal f16 values. The rest low mantissa bits must be
|
// normal f16 values, and less for subnormal f16 values. The rest low mantissa bits must be
|
||||||
|
@ -854,17 +861,19 @@ Token Lexer::try_hex_float() {
|
||||||
} else if (abs_result_f64 != 0.0) {
|
} else if (abs_result_f64 != 0.0) {
|
||||||
// The result is smaller than the smallest subnormal f16 value, but not equal to zero.
|
// The result is smaller than the smallest subnormal f16 value, but not equal to zero.
|
||||||
// Such value will never be exactly represented by f16.
|
// Such value will never be exactly represented by f16.
|
||||||
return {Token::Type::kError, source, "value cannot be exactly represented as 'f16'"};
|
return Token{Token::Type::kError, source,
|
||||||
|
"value cannot be exactly represented as 'f16'"};
|
||||||
}
|
}
|
||||||
// Check the low 52-valid_mantissa_bits mantissa bits must be 0.
|
// Check the low 52-valid_mantissa_bits mantissa bits must be 0.
|
||||||
TINT_ASSERT(Reader, (0 <= valid_mantissa_bits) && (valid_mantissa_bits <= 10));
|
TINT_ASSERT(Reader, (0 <= valid_mantissa_bits) && (valid_mantissa_bits <= 10));
|
||||||
if (result_u64 & ((uint64_t(1) << (52 - valid_mantissa_bits)) - 1)) {
|
if (result_u64 & ((uint64_t(1) << (52 - valid_mantissa_bits)) - 1)) {
|
||||||
return {Token::Type::kError, source, "value cannot be exactly represented as 'f16'"};
|
return Token{Token::Type::kError, source,
|
||||||
|
"value cannot be exactly represented as 'f16'"};
|
||||||
}
|
}
|
||||||
return {Token::Type::kFloatLiteral_H, source, result_f64};
|
return Token{Token::Type::kFloatLiteral_H, source, result_f64};
|
||||||
}
|
}
|
||||||
|
|
||||||
return {Token::Type::kFloatLiteral, source, result_f64};
|
return Token{Token::Type::kFloatLiteral, source, result_f64};
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::build_token_from_int_if_possible(Source source,
|
Token Lexer::build_token_from_int_if_possible(Source source,
|
||||||
|
@ -911,7 +920,7 @@ Token Lexer::build_token_from_int_if_possible(Source source,
|
||||||
return {Token::Type::kIntLiteral, source, value};
|
return {Token::Type::kIntLiteral, source, value};
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::try_hex_integer() {
|
std::optional<Token> Lexer::try_hex_integer() {
|
||||||
auto start = pos();
|
auto start = pos();
|
||||||
auto curr = start;
|
auto curr = start;
|
||||||
|
|
||||||
|
@ -924,14 +933,14 @@ Token Lexer::try_hex_integer() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_hex(at(curr))) {
|
if (!is_hex(at(curr))) {
|
||||||
return {Token::Type::kError, source,
|
return Token{Token::Type::kError, source,
|
||||||
"integer or float hex literal has no significant digits"};
|
"integer or float hex literal has no significant digits"};
|
||||||
}
|
}
|
||||||
|
|
||||||
return build_token_from_int_if_possible(source, curr, curr - start, 16);
|
return build_token_from_int_if_possible(source, curr, curr - start, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::try_integer() {
|
std::optional<Token> Lexer::try_integer() {
|
||||||
auto start = pos();
|
auto start = pos();
|
||||||
auto curr = start;
|
auto curr = start;
|
||||||
|
|
||||||
|
@ -945,14 +954,14 @@ Token Lexer::try_integer() {
|
||||||
// are not allowed.
|
// are not allowed.
|
||||||
if (auto next = curr + 1; next < length()) {
|
if (auto next = curr + 1; next < length()) {
|
||||||
if (at(curr) == '0' && is_digit(at(next))) {
|
if (at(curr) == '0' && is_digit(at(next))) {
|
||||||
return {Token::Type::kError, source, "integer literal cannot have leading 0s"};
|
return Token{Token::Type::kError, source, "integer literal cannot have leading 0s"};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return build_token_from_int_if_possible(source, start, 0, 10);
|
return build_token_from_int_if_possible(source, start, 0, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::try_ident() {
|
std::optional<Token> Lexer::try_ident() {
|
||||||
auto source = begin_source();
|
auto source = begin_source();
|
||||||
auto start = pos();
|
auto start = pos();
|
||||||
|
|
||||||
|
@ -962,7 +971,7 @@ Token Lexer::try_ident() {
|
||||||
auto [code_point, n] = utils::utf8::Decode(utf8, length() - pos());
|
auto [code_point, n] = utils::utf8::Decode(utf8, length() - pos());
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
advance(); // Skip the bad byte.
|
advance(); // Skip the bad byte.
|
||||||
return {Token::Type::kError, source, "invalid UTF-8"};
|
return Token{Token::Type::kError, source, "invalid UTF-8"};
|
||||||
}
|
}
|
||||||
if (code_point != utils::CodePoint('_') && !code_point.IsXIDStart()) {
|
if (code_point != utils::CodePoint('_') && !code_point.IsXIDStart()) {
|
||||||
return {};
|
return {};
|
||||||
|
@ -977,7 +986,7 @@ Token Lexer::try_ident() {
|
||||||
auto [code_point, n] = utils::utf8::Decode(utf8, line().size() - pos());
|
auto [code_point, n] = utils::utf8::Decode(utf8, line().size() - pos());
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
advance(); // Skip the bad byte.
|
advance(); // Skip the bad byte.
|
||||||
return {Token::Type::kError, source, "invalid UTF-8"};
|
return Token{Token::Type::kError, source, "invalid UTF-8"};
|
||||||
}
|
}
|
||||||
if (!code_point.IsXIDContinue()) {
|
if (!code_point.IsXIDContinue()) {
|
||||||
break;
|
break;
|
||||||
|
@ -999,15 +1008,14 @@ Token Lexer::try_ident() {
|
||||||
auto str = substr(start, pos() - start);
|
auto str = substr(start, pos() - start);
|
||||||
end_source(source);
|
end_source(source);
|
||||||
|
|
||||||
auto t = check_keyword(source, str);
|
if (auto t = check_keyword(source, str); t.has_value() && !t->IsUninitialized()) {
|
||||||
if (!t.IsUninitialized()) {
|
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
return {Token::Type::kIdentifier, source, str};
|
return Token{Token::Type::kIdentifier, source, str};
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::try_punctuation() {
|
std::optional<Token> Lexer::try_punctuation() {
|
||||||
auto source = begin_source();
|
auto source = begin_source();
|
||||||
auto type = Token::Type::kUninitialized;
|
auto type = Token::Type::kUninitialized;
|
||||||
|
|
||||||
|
@ -1177,97 +1185,99 @@ Token Lexer::try_punctuation() {
|
||||||
type = Token::Type::kXor;
|
type = Token::Type::kXor;
|
||||||
advance(1);
|
advance(1);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
end_source(source);
|
end_source(source);
|
||||||
|
|
||||||
return {type, source};
|
return Token{type, source};
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Lexer::check_keyword(const Source& source, std::string_view str) {
|
std::optional<Token> Lexer::check_keyword(const Source& source, std::string_view str) {
|
||||||
if (str == "alias") {
|
if (str == "alias") {
|
||||||
return {Token::Type::kAlias, source, "alias"};
|
return Token{Token::Type::kAlias, source, "alias"};
|
||||||
}
|
}
|
||||||
if (str == "bitcast") {
|
if (str == "bitcast") {
|
||||||
return {Token::Type::kBitcast, source, "bitcast"};
|
return Token{Token::Type::kBitcast, source, "bitcast"};
|
||||||
}
|
}
|
||||||
if (str == "break") {
|
if (str == "break") {
|
||||||
return {Token::Type::kBreak, source, "break"};
|
return Token{Token::Type::kBreak, source, "break"};
|
||||||
}
|
}
|
||||||
if (str == "case") {
|
if (str == "case") {
|
||||||
return {Token::Type::kCase, source, "case"};
|
return Token{Token::Type::kCase, source, "case"};
|
||||||
}
|
}
|
||||||
if (str == "const") {
|
if (str == "const") {
|
||||||
return {Token::Type::kConst, source, "const"};
|
return Token{Token::Type::kConst, source, "const"};
|
||||||
}
|
}
|
||||||
if (str == "const_assert") {
|
if (str == "const_assert") {
|
||||||
return {Token::Type::kConstAssert, source, "const_assert"};
|
return Token{Token::Type::kConstAssert, source, "const_assert"};
|
||||||
}
|
}
|
||||||
if (str == "continue") {
|
if (str == "continue") {
|
||||||
return {Token::Type::kContinue, source, "continue"};
|
return Token{Token::Type::kContinue, source, "continue"};
|
||||||
}
|
}
|
||||||
if (str == "continuing") {
|
if (str == "continuing") {
|
||||||
return {Token::Type::kContinuing, source, "continuing"};
|
return Token{Token::Type::kContinuing, source, "continuing"};
|
||||||
}
|
}
|
||||||
if (str == "diagnostic") {
|
if (str == "diagnostic") {
|
||||||
return {Token::Type::kDiagnostic, source, "diagnostic"};
|
return Token{Token::Type::kDiagnostic, source, "diagnostic"};
|
||||||
}
|
}
|
||||||
if (str == "discard") {
|
if (str == "discard") {
|
||||||
return {Token::Type::kDiscard, source, "discard"};
|
return Token{Token::Type::kDiscard, source, "discard"};
|
||||||
}
|
}
|
||||||
if (str == "default") {
|
if (str == "default") {
|
||||||
return {Token::Type::kDefault, source, "default"};
|
return Token{Token::Type::kDefault, source, "default"};
|
||||||
}
|
}
|
||||||
if (str == "else") {
|
if (str == "else") {
|
||||||
return {Token::Type::kElse, source, "else"};
|
return Token{Token::Type::kElse, source, "else"};
|
||||||
}
|
}
|
||||||
if (str == "enable") {
|
if (str == "enable") {
|
||||||
return {Token::Type::kEnable, source, "enable"};
|
return Token{Token::Type::kEnable, source, "enable"};
|
||||||
}
|
}
|
||||||
if (str == "fallthrough") {
|
if (str == "fallthrough") {
|
||||||
return {Token::Type::kFallthrough, source, "fallthrough"};
|
return Token{Token::Type::kFallthrough, source, "fallthrough"};
|
||||||
}
|
}
|
||||||
if (str == "false") {
|
if (str == "false") {
|
||||||
return {Token::Type::kFalse, source, "false"};
|
return Token{Token::Type::kFalse, source, "false"};
|
||||||
}
|
}
|
||||||
if (str == "fn") {
|
if (str == "fn") {
|
||||||
return {Token::Type::kFn, source, "fn"};
|
return Token{Token::Type::kFn, source, "fn"};
|
||||||
}
|
}
|
||||||
if (str == "for") {
|
if (str == "for") {
|
||||||
return {Token::Type::kFor, source, "for"};
|
return Token{Token::Type::kFor, source, "for"};
|
||||||
}
|
}
|
||||||
if (str == "if") {
|
if (str == "if") {
|
||||||
return {Token::Type::kIf, source, "if"};
|
return Token{Token::Type::kIf, source, "if"};
|
||||||
}
|
}
|
||||||
if (str == "let") {
|
if (str == "let") {
|
||||||
return {Token::Type::kLet, source, "let"};
|
return Token{Token::Type::kLet, source, "let"};
|
||||||
}
|
}
|
||||||
if (str == "loop") {
|
if (str == "loop") {
|
||||||
return {Token::Type::kLoop, source, "loop"};
|
return Token{Token::Type::kLoop, source, "loop"};
|
||||||
}
|
}
|
||||||
if (str == "override") {
|
if (str == "override") {
|
||||||
return {Token::Type::kOverride, source, "override"};
|
return Token{Token::Type::kOverride, source, "override"};
|
||||||
}
|
}
|
||||||
if (str == "return") {
|
if (str == "return") {
|
||||||
return {Token::Type::kReturn, source, "return"};
|
return Token{Token::Type::kReturn, source, "return"};
|
||||||
}
|
}
|
||||||
if (str == "requires") {
|
if (str == "requires") {
|
||||||
return {Token::Type::kRequires, source, "requires"};
|
return Token{Token::Type::kRequires, source, "requires"};
|
||||||
}
|
}
|
||||||
if (str == "struct") {
|
if (str == "struct") {
|
||||||
return {Token::Type::kStruct, source, "struct"};
|
return Token{Token::Type::kStruct, source, "struct"};
|
||||||
}
|
}
|
||||||
if (str == "switch") {
|
if (str == "switch") {
|
||||||
return {Token::Type::kSwitch, source, "switch"};
|
return Token{Token::Type::kSwitch, source, "switch"};
|
||||||
}
|
}
|
||||||
if (str == "true") {
|
if (str == "true") {
|
||||||
return {Token::Type::kTrue, source, "true"};
|
return Token{Token::Type::kTrue, source, "true"};
|
||||||
}
|
}
|
||||||
if (str == "var") {
|
if (str == "var") {
|
||||||
return {Token::Type::kVar, source, "var"};
|
return Token{Token::Type::kVar, source, "var"};
|
||||||
}
|
}
|
||||||
if (str == "while") {
|
if (str == "while") {
|
||||||
return {Token::Type::kWhile, source, "while"};
|
return Token{Token::Type::kWhile, source, "while"};
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#ifndef SRC_TINT_READER_WGSL_LEXER_H_
|
#ifndef SRC_TINT_READER_WGSL_LEXER_H_
|
||||||
#define SRC_TINT_READER_WGSL_LEXER_H_
|
#define SRC_TINT_READER_WGSL_LEXER_H_
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -40,19 +41,19 @@ class Lexer {
|
||||||
|
|
||||||
/// Advances past blankspace and comments, if present at the current position.
|
/// Advances past blankspace and comments, if present at the current position.
|
||||||
/// @returns error token, EOF, or uninitialized
|
/// @returns error token, EOF, or uninitialized
|
||||||
Token skip_blankspace_and_comments();
|
std::optional<Token> skip_blankspace_and_comments();
|
||||||
/// Advances past a comment at the current position, if one exists.
|
/// Advances past a comment at the current position, if one exists.
|
||||||
/// Returns an error if there was an unterminated block comment,
|
/// Returns an error if there was an unterminated block comment,
|
||||||
/// or a null character was present.
|
/// or a null character was present.
|
||||||
/// @returns uninitialized token on success, or error
|
/// @returns uninitialized token on success, or error
|
||||||
Token skip_comment();
|
std::optional<Token> skip_comment();
|
||||||
|
|
||||||
Token build_token_from_int_if_possible(Source source,
|
Token build_token_from_int_if_possible(Source source,
|
||||||
size_t start,
|
size_t start,
|
||||||
size_t prefix_count,
|
size_t prefix_count,
|
||||||
int32_t base);
|
int32_t base);
|
||||||
|
|
||||||
Token check_keyword(const Source&, std::string_view);
|
std::optional<Token> check_keyword(const Source&, std::string_view);
|
||||||
|
|
||||||
/// The try_* methods have the following in common:
|
/// The try_* methods have the following in common:
|
||||||
/// - They assume there is at least one character to be consumed,
|
/// - They assume there is at least one character to be consumed,
|
||||||
|
@ -62,12 +63,12 @@ class Lexer {
|
||||||
/// - Some can return an error token.
|
/// - Some can return an error token.
|
||||||
/// - Otherwise they return an uninitialized token when they did not
|
/// - Otherwise they return an uninitialized token when they did not
|
||||||
/// match a token of the specfied kind.
|
/// match a token of the specfied kind.
|
||||||
Token try_float();
|
std::optional<Token> try_float();
|
||||||
Token try_hex_float();
|
std::optional<Token> try_hex_float();
|
||||||
Token try_hex_integer();
|
std::optional<Token> try_hex_integer();
|
||||||
Token try_ident();
|
std::optional<Token> try_ident();
|
||||||
Token try_integer();
|
std::optional<Token> try_integer();
|
||||||
Token try_punctuation();
|
std::optional<Token> try_punctuation();
|
||||||
|
|
||||||
Source begin_source() const;
|
Source begin_source() const;
|
||||||
void end_source(Source&) const;
|
void end_source(Source&) const;
|
||||||
|
|
Loading…
Reference in New Issue