tint: make Lexer use line breaks from Source::File

Before this change, we duplicated line break parsing in both
Source::File and Lexer. This change makes it so that the Lexer no longer
looks for line breaks, instead relying on Source::File for this info.
This de-duplication will also help in implementing the latest spec
changes with respect to line breaks (CRLF vs CR, etc).

Bug: tint:1505
Bug: tint:1513
Change-Id: Ifa820f75ede7e82822525282127e05d2fea047e1
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/87604
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Antonio Maiorano <amaiorano@google.com>
This commit is contained in:
Antonio Maiorano 2022-04-22 15:34:21 +00:00 committed by Dawn LUCI CQ
parent d51b47ac67
commit eba0e85c33
2 changed files with 247 additions and 253 deletions

View File

@ -28,8 +28,7 @@ namespace {
bool is_blankspace(char c) { bool is_blankspace(char c) {
// See https://www.w3.org/TR/WGSL/#blankspace. // See https://www.w3.org/TR/WGSL/#blankspace.
return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || return c == ' ' || c == '\t' || c == '\v' || c == '\f' || c == '\r';
c == '\r';
} }
uint32_t dec_value(char c) { uint32_t dec_value(char c) {
@ -54,13 +53,62 @@ uint32_t hex_value(char c) {
} // namespace } // namespace
Lexer::Lexer(const Source::File* file) Lexer::Lexer(const Source::File* file) : file_(file), location_{1, 1} {}
: file_(file),
len_(static_cast<uint32_t>(file->content.data.size())),
location_{1, 1} {}
Lexer::~Lexer() = default; Lexer::~Lexer() = default;
const std::string_view Lexer::line() const {
if (file_->content.lines.size() == 0) {
static const char* empty_string = "";
return empty_string;
}
return file_->content.lines[location_.line - 1];
}
size_t Lexer::pos() const {
return location_.column - 1;
}
size_t Lexer::length() const {
return line().size();
}
const char& Lexer::at(size_t pos) const {
auto l = line();
// Unlike for std::string, if pos == l.size(), indexing `l[pos]` is UB for
// std::string_view.
if (pos >= l.size()) {
static const char zero = 0;
return zero;
}
return l[pos];
}
std::string_view Lexer::substr(size_t offset, size_t count) {
return line().substr(offset, count);
}
void Lexer::advance(size_t offset) {
location_.column += offset;
}
void Lexer::set_pos(size_t pos) {
location_.column = pos + 1;
}
void Lexer::advance_line() {
location_.line++;
location_.column = 1;
}
bool Lexer::is_eof() const {
return location_.line >= file_->content.lines.size() && pos() >= length();
}
bool Lexer::is_eol() const {
return pos() >= length();
}
Token Lexer::next() { Token Lexer::next() {
if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) { if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) {
return t; return t;
@ -106,12 +154,8 @@ void Lexer::end_source(Source& src) const {
src.range.end = location_; src.range.end = location_;
} }
bool Lexer::is_eof() const {
return pos_ >= len_;
}
bool Lexer::is_null() const { bool Lexer::is_null() const {
return (pos_ < len_) && (file_->content.data[pos_] == 0); return (pos() < length()) && (at(pos()) == 0);
} }
bool Lexer::is_digit(char ch) const { bool Lexer::is_digit(char ch) const {
@ -122,25 +166,26 @@ bool Lexer::is_hex(char ch) const {
return std::isxdigit(static_cast<unsigned char>(ch)); return std::isxdigit(static_cast<unsigned char>(ch));
} }
bool Lexer::matches(size_t pos, std::string_view substr) { bool Lexer::matches(size_t pos, std::string_view sub_string) {
if (pos >= len_) if (pos >= length())
return false; return false;
return file_->content.data_view.substr(pos, substr.size()) == substr; return substr(pos, sub_string.size()) == sub_string;
} }
Token Lexer::skip_blankspace_and_comments() { Token Lexer::skip_blankspace_and_comments() {
for (;;) { for (;;) {
auto pos = pos_; auto loc = location_;
while (!is_eof() && is_blankspace(file_->content.data[pos_])) { while (!is_eof()) {
if (matches(pos_, "\n")) { if (is_eol()) {
pos_++; advance_line();
location_.line++;
location_.column = 1;
continue; continue;
} }
pos_++; if (!is_blankspace(at(pos()))) {
location_.column++; break;
}
advance();
} }
auto t = skip_comment(); auto t = skip_comment();
@ -150,7 +195,7 @@ Token Lexer::skip_blankspace_and_comments() {
// If the cursor didn't advance we didn't remove any blankspace // If the cursor didn't advance we didn't remove any blankspace
// so we're done. // so we're done.
if (pos == pos_) if (loc == location_)
break; break;
} }
if (is_eof()) { if (is_eof()) {
@ -161,53 +206,46 @@ Token Lexer::skip_blankspace_and_comments() {
} }
Token Lexer::skip_comment() { Token Lexer::skip_comment() {
if (matches(pos_, "//")) { if (matches(pos(), "//")) {
// Line comment: ignore everything until the end of input or a blankspace // Line comment: ignore everything until the end of input or a blankspace
// character other than space or horizontal tab. // character other than space or horizontal tab.
while (!is_eof() && !(is_blankspace(file_->content.data[pos_]) && while (!is_eol() && !(is_blankspace(at(pos())) && !matches(pos(), " ") &&
!matches(pos_, " ") && !matches(pos_, "\t"))) { !matches(pos(), "\t"))) {
if (is_null()) { if (is_null()) {
return {Token::Type::kError, begin_source(), "null character found"}; return {Token::Type::kError, begin_source(), "null character found"};
} }
pos_++; advance();
location_.column++;
} }
return {}; return {};
} }
if (matches(pos_, "/*")) { if (matches(pos(), "/*")) {
// Block comment: ignore everything until the closing '*/' token. // Block comment: ignore everything until the closing '*/' token.
// Record source location of the initial '/*' // Record source location of the initial '/*'
auto source = begin_source(); auto source = begin_source();
source.range.end.column += 1; source.range.end.column += 1;
pos_ += 2; advance(2);
location_.column += 2;
int depth = 1; int depth = 1;
while (!is_eof() && depth > 0) { while (!is_eof() && depth > 0) {
if (matches(pos_, "/*")) { if (matches(pos(), "/*")) {
// Start of block comment: increase nesting depth. // Start of block comment: increase nesting depth.
pos_ += 2; advance(2);
location_.column += 2;
depth++; depth++;
} else if (matches(pos_, "*/")) { } else if (matches(pos(), "*/")) {
// End of block comment: decrease nesting depth. // End of block comment: decrease nesting depth.
pos_ += 2; advance(2);
location_.column += 2;
depth--; depth--;
} else if (matches(pos_, "\n")) { } else if (is_eol()) {
// Newline: skip and update source location. // Newline: skip and update source location.
pos_++; advance_line();
location_.line++;
location_.column = 1;
} else if (is_null()) { } else if (is_null()) {
return {Token::Type::kError, begin_source(), "null character found"}; return {Token::Type::kError, begin_source(), "null character found"};
} else { } else {
// Anything else: skip and update source location. // Anything else: skip and update source location.
pos_++; advance();
location_.column++;
} }
} }
if (depth > 0) { if (depth > 0) {
@ -218,8 +256,8 @@ Token Lexer::skip_comment() {
} }
Token Lexer::try_float() { Token Lexer::try_float() {
auto start = pos_; auto start = pos();
auto end = pos_; auto end = pos();
auto source = begin_source(); auto source = begin_source();
bool has_mantissa_digits = false; bool has_mantissa_digits = false;
@ -227,18 +265,18 @@ Token Lexer::try_float() {
if (matches(end, "-")) { if (matches(end, "-")) {
end++; end++;
} }
while (end < len_ && is_digit(file_->content.data[end])) { while (end < length() && is_digit(at(end))) {
has_mantissa_digits = true; has_mantissa_digits = true;
end++; end++;
} }
bool has_point = false; bool has_point = false;
if (end < len_ && matches(end, ".")) { if (end < length() && matches(end, ".")) {
has_point = true; has_point = true;
end++; end++;
} }
while (end < len_ && is_digit(file_->content.data[end])) { while (end < length() && is_digit(at(end))) {
has_mantissa_digits = true; has_mantissa_digits = true;
end++; end++;
} }
@ -249,27 +287,27 @@ Token Lexer::try_float() {
// Parse the exponent if one exists // Parse the exponent if one exists
bool has_exponent = false; bool has_exponent = false;
if (end < len_ && (matches(end, "e") || matches(end, "E"))) { if (end < length() && (matches(end, "e") || matches(end, "E"))) {
end++; end++;
if (end < len_ && (matches(end, "+") || matches(end, "-"))) { if (end < length() && (matches(end, "+") || matches(end, "-"))) {
end++; end++;
} }
while (end < len_ && isdigit(file_->content.data[end])) { while (end < length() && isdigit(at(end))) {
has_exponent = true; has_exponent = true;
end++; end++;
} }
// If an 'e' or 'E' was present, then the number part must also be present. // If an 'e' or 'E' was present, then the number part must also be present.
if (!has_exponent) { if (!has_exponent) {
const auto str = file_->content.data.substr(start, end - start); const auto str = std::string{substr(start, end - start)};
return {Token::Type::kError, source, return {Token::Type::kError, source,
"incomplete exponent for floating point literal: " + str}; "incomplete exponent for floating point literal: " + str};
} }
} }
bool has_f_suffix = false; bool has_f_suffix = false;
if (end < len_ && matches(end, "f")) { if (end < length() && matches(end, "f")) {
end++; end++;
has_f_suffix = true; has_f_suffix = true;
} }
@ -280,14 +318,12 @@ Token Lexer::try_float() {
} }
// Save the error string, for use by diagnostics. // Save the error string, for use by diagnostics.
const auto str = file_->content.data.substr(start, end - start); const auto str = std::string{substr(start, end - start)};
pos_ = end;
location_.column += (end - start);
advance(end - start);
end_source(source); end_source(source);
auto res = strtod(file_->content.data.c_str() + start, nullptr); auto res = strtod(&at(start), nullptr);
// This errors out if a non-zero magnitude is too small to represent in a // This errors out if a non-zero magnitude is too small to represent in a
// float. It can't be represented faithfully in an f32. // float. It can't be represented faithfully in an f32.
const auto magnitude = std::fabs(res); const auto magnitude = std::fabs(res);
@ -322,8 +358,8 @@ Token Lexer::try_hex_float() {
constexpr uint32_t kExponentLeftShift = kMantissaBits; constexpr uint32_t kExponentLeftShift = kMantissaBits;
constexpr uint32_t kSignBit = 31; constexpr uint32_t kSignBit = 31;
auto start = pos_; auto start = pos();
auto end = pos_; auto end = pos();
auto source = begin_source(); auto source = begin_source();
@ -378,7 +414,7 @@ Token Lexer::try_hex_float() {
// Collect integer range (if any) // Collect integer range (if any)
auto integer_range = std::make_pair(end, end); auto integer_range = std::make_pair(end, end);
while (end < len_ && is_hex(file_->content.data[end])) { while (end < length() && is_hex(at(end))) {
integer_range.second = ++end; integer_range.second = ++end;
} }
@ -391,7 +427,7 @@ Token Lexer::try_hex_float() {
// Collect fractional range (if any) // Collect fractional range (if any)
auto fractional_range = std::make_pair(end, end); auto fractional_range = std::make_pair(end, end);
while (end < len_ && is_hex(file_->content.data[end])) { while (end < length() && is_hex(at(end))) {
fractional_range.second = ++end; fractional_range.second = ++end;
} }
@ -421,7 +457,7 @@ Token Lexer::try_hex_float() {
// The magnitude is zero if and only if seen_prior_one_bits is false. // The magnitude is zero if and only if seen_prior_one_bits is false.
bool seen_prior_one_bits = false; bool seen_prior_one_bits = false;
for (auto i = integer_range.first; i < integer_range.second; ++i) { for (auto i = integer_range.first; i < integer_range.second; ++i) {
const auto nibble = hex_value(file_->content.data[i]); const auto nibble = hex_value(at(i));
if (nibble != 0) { if (nibble != 0) {
has_zero_integer = false; has_zero_integer = false;
} }
@ -447,7 +483,7 @@ Token Lexer::try_hex_float() {
// Parse fractional part // Parse fractional part
// [0-9a-fA-F]* // [0-9a-fA-F]*
for (auto i = fractional_range.first; i < fractional_range.second; ++i) { for (auto i = fractional_range.first; i < fractional_range.second; ++i) {
auto nibble = hex_value(file_->content.data[i]); auto nibble = hex_value(at(i));
for (int32_t bit = 3; bit >= 0; --bit) { for (int32_t bit = 3; bit >= 0; --bit) {
auto v = 1 & (nibble >> bit); auto v = 1 & (nibble >> bit);
@ -495,11 +531,10 @@ Token Lexer::try_hex_float() {
// Allow overflow (in uint32_t) when the floating point value magnitude is // Allow overflow (in uint32_t) when the floating point value magnitude is
// zero. // zero.
bool has_exponent_digits = false; bool has_exponent_digits = false;
while (end < len_ && isdigit(file_->content.data[end])) { while (end < length() && isdigit(at(end))) {
has_exponent_digits = true; has_exponent_digits = true;
auto prev_exponent = input_exponent; auto prev_exponent = input_exponent;
input_exponent = input_exponent = (input_exponent * 10) + dec_value(at(end));
(input_exponent * 10) + dec_value(file_->content.data[end]);
// Check if we've overflowed input_exponent. This only matters when // Check if we've overflowed input_exponent. This only matters when
// the mantissa is non-zero. // the mantissa is non-zero.
if (!is_zero && (prev_exponent > input_exponent)) { if (!is_zero && (prev_exponent > input_exponent)) {
@ -512,7 +547,7 @@ Token Lexer::try_hex_float() {
// Parse optional 'f' suffix. For a hex float, it can only exist // Parse optional 'f' suffix. For a hex float, it can only exist
// when the exponent is present. Otherwise it will look like // when the exponent is present. Otherwise it will look like
// one of the mantissa digits. // one of the mantissa digits.
if (end < len_ && matches(end, "f")) { if (end < length() && matches(end, "f")) {
end++; end++;
} }
@ -522,8 +557,7 @@ Token Lexer::try_hex_float() {
} }
} }
pos_ = end; advance(end - start);
location_.column += (end - start);
end_source(source); end_source(source);
if (is_zero) { if (is_zero) {
@ -611,29 +645,26 @@ Token Lexer::build_token_from_int_if_possible(Source source,
size_t start, size_t start,
size_t end, size_t end,
int32_t base) { int32_t base) {
auto res = strtoll(file_->content.data.c_str() + start, nullptr, base); auto res = strtoll(&at(start), nullptr, base);
if (matches(pos_, "u")) { if (matches(pos(), "u")) {
if (static_cast<uint64_t>(res) > if (static_cast<uint64_t>(res) >
static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) { static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) {
return {Token::Type::kError, source, return {
"u32 (" + file_->content.data.substr(start, end - start) + Token::Type::kError, source,
") too large"}; "u32 (" + std::string{substr(start, end - start)} + ") too large"};
} }
pos_ += 1; advance(1);
location_.column += 1;
end_source(source); end_source(source);
return {source, static_cast<uint32_t>(res)}; return {source, static_cast<uint32_t>(res)};
} }
if (res < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) { if (res < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
return {Token::Type::kError, source, return {Token::Type::kError, source,
"i32 (" + file_->content.data.substr(start, end - start) + "i32 (" + std::string{substr(start, end - start)} + ") too small"};
") too small"};
} }
if (res > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) { if (res > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
return {Token::Type::kError, source, return {Token::Type::kError, source,
"i32 (" + file_->content.data.substr(start, end - start) + "i32 (" + std::string{substr(start, end - start)} + ") too large"};
") too large"};
} }
end_source(source); end_source(source);
return {source, static_cast<int32_t>(res)}; return {source, static_cast<int32_t>(res)};
@ -641,8 +672,8 @@ Token Lexer::build_token_from_int_if_possible(Source source,
Token Lexer::try_hex_integer() { Token Lexer::try_hex_integer() {
constexpr size_t kMaxDigits = 8; // Valid for both 32-bit integer types constexpr size_t kMaxDigits = 8; // Valid for both 32-bit integer types
auto start = pos_; auto start = pos();
auto end = pos_; auto end = pos();
auto source = begin_source(); auto source = begin_source();
@ -657,14 +688,14 @@ Token Lexer::try_hex_integer() {
} }
auto first = end; auto first = end;
while (!is_eof() && is_hex(file_->content.data[end])) { while (!is_eol() && is_hex(at(end))) {
end++; end++;
auto digits = end - first; auto digits = end - first;
if (digits > kMaxDigits) { if (digits > kMaxDigits) {
return {Token::Type::kError, source, return {Token::Type::kError, source,
"integer literal (" + "integer literal (" +
file_->content.data.substr(start, end - 1 - start) + std::string{substr(start, end - 1 - start)} +
"...) has too many digits"}; "...) has too many digits"};
} }
} }
@ -673,15 +704,14 @@ Token Lexer::try_hex_integer() {
"integer or float hex literal has no significant digits"}; "integer or float hex literal has no significant digits"};
} }
pos_ = end; advance(end - start);
location_.column += (end - start);
return build_token_from_int_if_possible(source, start, end, 16); return build_token_from_int_if_possible(source, start, end, 16);
} }
Token Lexer::try_integer() { Token Lexer::try_integer() {
constexpr size_t kMaxDigits = 10; // Valid for both 32-bit integer types constexpr size_t kMaxDigits = 10; // Valid for both 32-bit integer types
auto start = pos_; auto start = pos();
auto end = start; auto end = start;
auto source = begin_source(); auto source = begin_source();
@ -690,7 +720,7 @@ Token Lexer::try_integer() {
end++; end++;
} }
if (end >= len_ || !is_digit(file_->content.data[end])) { if (end >= length() || !is_digit(at(end))) {
return {}; return {};
} }
@ -698,67 +728,62 @@ Token Lexer::try_integer() {
// If the first digit is a zero this must only be zero as leading zeros // If the first digit is a zero this must only be zero as leading zeros
// are not allowed. // are not allowed.
auto next = first + 1; auto next = first + 1;
if (next < len_) { if (next < length()) {
if (file_->content.data[first] == '0' && if (at(first) == '0' && is_digit(at(next))) {
is_digit(file_->content.data[next])) {
return {Token::Type::kError, source, return {Token::Type::kError, source,
"integer literal (" + "integer literal (" +
file_->content.data.substr(start, end - 1 - start) + std::string{substr(start, end - 1 - start)} +
"...) has leading 0s"}; "...) has leading 0s"};
} }
} }
while (end < len_ && is_digit(file_->content.data[end])) { while (end < length() && is_digit(at(end))) {
auto digits = end - first; auto digits = end - first;
if (digits > kMaxDigits) { if (digits > kMaxDigits) {
return {Token::Type::kError, source, return {Token::Type::kError, source,
"integer literal (" + "integer literal (" +
file_->content.data.substr(start, end - 1 - start) + std::string{substr(start, end - 1 - start)} +
"...) has too many digits"}; "...) has too many digits"};
} }
end++; end++;
} }
pos_ = end; advance(end - start);
location_.column += (end - start);
return build_token_from_int_if_possible(source, start, end, 10); return build_token_from_int_if_possible(source, start, end, 10);
} }
Token Lexer::try_ident() { Token Lexer::try_ident() {
auto source = begin_source(); auto source = begin_source();
auto start = pos_; auto start = pos();
// This below assumes that the size of a single std::string element is 1 byte. // This below assumes that the size of a single std::string element is 1 byte.
static_assert(sizeof(file_->content.data[0]) == sizeof(uint8_t), static_assert(sizeof(at(0)) == sizeof(uint8_t),
"tint::reader::wgsl requires the size of a std::string element " "tint::reader::wgsl requires the size of a std::string element "
"to be a single byte"); "to be a single byte");
// Must begin with an XID_Source unicode character, or underscore // Must begin with an XID_Source unicode character, or underscore
{ {
auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]); auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
auto [code_point, n] = auto [code_point, n] = text::utf8::Decode(utf8, length() - pos());
text::utf8::Decode(utf8, file_->content.data.size() - pos_);
if (n == 0) { if (n == 0) {
pos_++; // Skip the bad byte. advance(); // Skip the bad byte.
return {Token::Type::kError, source, "invalid UTF-8"}; return {Token::Type::kError, source, "invalid UTF-8"};
} }
if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) { if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
return {}; return {};
} }
// Consume start codepoint // Consume start codepoint
pos_ += n; advance(n);
location_.column += n;
} }
while (!is_eof()) { while (!is_eol()) {
// Must continue with an XID_Continue unicode character // Must continue with an XID_Continue unicode character
auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]); auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
auto [code_point, n] = auto [code_point, n] = text::utf8::Decode(utf8, line().size() - pos());
text::utf8::Decode(utf8, file_->content.data.size() - pos_);
if (n == 0) { if (n == 0) {
pos_++; // Skip the bad byte. advance(); // Skip the bad byte.
return {Token::Type::kError, source, "invalid UTF-8"}; return {Token::Type::kError, source, "invalid UTF-8"};
} }
if (!code_point.IsXIDContinue()) { if (!code_point.IsXIDContinue()) {
@ -766,21 +791,19 @@ Token Lexer::try_ident() {
} }
// Consume continuing codepoint // Consume continuing codepoint
pos_ += n; advance(n);
location_.column += n;
} }
if (file_->content.data[start] == '_') { if (at(start) == '_') {
// Check for an underscore on its own (special token), or a // Check for an underscore on its own (special token), or a
// double-underscore (not allowed). // double-underscore (not allowed).
if ((pos_ == start + 1) || (file_->content.data[start + 1] == '_')) { if ((pos() == start + 1) || (at(start + 1) == '_')) {
location_.column -= (pos_ - start); set_pos(start);
pos_ = start;
return {}; return {};
} }
} }
auto str = file_->content.data_view.substr(start, pos_ - start); auto str = substr(start, pos() - start);
end_source(source); end_source(source);
auto t = check_keyword(source, str); auto t = check_keyword(source, str);
@ -795,182 +818,138 @@ Token Lexer::try_punctuation() {
auto source = begin_source(); auto source = begin_source();
auto type = Token::Type::kUninitialized; auto type = Token::Type::kUninitialized;
if (matches(pos_, "@")) { if (matches(pos(), "@")) {
type = Token::Type::kAttr; type = Token::Type::kAttr;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "(")) {
} else if (matches(pos_, "(")) {
type = Token::Type::kParenLeft; type = Token::Type::kParenLeft;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), ")")) {
} else if (matches(pos_, ")")) {
type = Token::Type::kParenRight; type = Token::Type::kParenRight;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "[")) {
} else if (matches(pos_, "[")) {
type = Token::Type::kBracketLeft; type = Token::Type::kBracketLeft;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "]")) {
} else if (matches(pos_, "]")) {
type = Token::Type::kBracketRight; type = Token::Type::kBracketRight;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "{")) {
} else if (matches(pos_, "{")) {
type = Token::Type::kBraceLeft; type = Token::Type::kBraceLeft;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "}")) {
} else if (matches(pos_, "}")) {
type = Token::Type::kBraceRight; type = Token::Type::kBraceRight;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "&&")) {
} else if (matches(pos_, "&&")) {
type = Token::Type::kAndAnd; type = Token::Type::kAndAnd;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "&=")) {
} else if (matches(pos_, "&=")) {
type = Token::Type::kAndEqual; type = Token::Type::kAndEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "&")) {
} else if (matches(pos_, "&")) {
type = Token::Type::kAnd; type = Token::Type::kAnd;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "/=")) {
} else if (matches(pos_, "/=")) {
type = Token::Type::kDivisionEqual; type = Token::Type::kDivisionEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "/")) {
} else if (matches(pos_, "/")) {
type = Token::Type::kForwardSlash; type = Token::Type::kForwardSlash;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "!=")) {
} else if (matches(pos_, "!=")) {
type = Token::Type::kNotEqual; type = Token::Type::kNotEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "!")) {
} else if (matches(pos_, "!")) {
type = Token::Type::kBang; type = Token::Type::kBang;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), ":")) {
} else if (matches(pos_, ":")) {
type = Token::Type::kColon; type = Token::Type::kColon;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), ",")) {
} else if (matches(pos_, ",")) {
type = Token::Type::kComma; type = Token::Type::kComma;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "==")) {
} else if (matches(pos_, "==")) {
type = Token::Type::kEqualEqual; type = Token::Type::kEqualEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "=")) {
} else if (matches(pos_, "=")) {
type = Token::Type::kEqual; type = Token::Type::kEqual;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), ">=")) {
} else if (matches(pos_, ">=")) {
type = Token::Type::kGreaterThanEqual; type = Token::Type::kGreaterThanEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), ">>")) {
} else if (matches(pos_, ">>")) {
type = Token::Type::kShiftRight; type = Token::Type::kShiftRight;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), ">")) {
} else if (matches(pos_, ">")) {
type = Token::Type::kGreaterThan; type = Token::Type::kGreaterThan;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "<=")) {
} else if (matches(pos_, "<=")) {
type = Token::Type::kLessThanEqual; type = Token::Type::kLessThanEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "<<")) {
} else if (matches(pos_, "<<")) {
type = Token::Type::kShiftLeft; type = Token::Type::kShiftLeft;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "<")) {
} else if (matches(pos_, "<")) {
type = Token::Type::kLessThan; type = Token::Type::kLessThan;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "%=")) {
} else if (matches(pos_, "%=")) {
type = Token::Type::kModuloEqual; type = Token::Type::kModuloEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "%")) {
} else if (matches(pos_, "%")) {
type = Token::Type::kMod; type = Token::Type::kMod;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "->")) {
} else if (matches(pos_, "->")) {
type = Token::Type::kArrow; type = Token::Type::kArrow;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "--")) {
} else if (matches(pos_, "--")) {
type = Token::Type::kMinusMinus; type = Token::Type::kMinusMinus;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "-=")) {
} else if (matches(pos_, "-=")) {
type = Token::Type::kMinusEqual; type = Token::Type::kMinusEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "-")) {
} else if (matches(pos_, "-")) {
type = Token::Type::kMinus; type = Token::Type::kMinus;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), ".")) {
} else if (matches(pos_, ".")) {
type = Token::Type::kPeriod; type = Token::Type::kPeriod;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "++")) {
} else if (matches(pos_, "++")) {
type = Token::Type::kPlusPlus; type = Token::Type::kPlusPlus;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "+=")) {
} else if (matches(pos_, "+=")) {
type = Token::Type::kPlusEqual; type = Token::Type::kPlusEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "+")) {
} else if (matches(pos_, "+")) {
type = Token::Type::kPlus; type = Token::Type::kPlus;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "||")) {
} else if (matches(pos_, "||")) {
type = Token::Type::kOrOr; type = Token::Type::kOrOr;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "|=")) {
} else if (matches(pos_, "|=")) {
type = Token::Type::kOrEqual; type = Token::Type::kOrEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "|")) {
} else if (matches(pos_, "|")) {
type = Token::Type::kOr; type = Token::Type::kOr;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), ";")) {
} else if (matches(pos_, ";")) {
type = Token::Type::kSemicolon; type = Token::Type::kSemicolon;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "*=")) {
} else if (matches(pos_, "*=")) {
type = Token::Type::kTimesEqual; type = Token::Type::kTimesEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "*")) {
} else if (matches(pos_, "*")) {
type = Token::Type::kStar; type = Token::Type::kStar;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "~")) {
} else if (matches(pos_, "~")) {
type = Token::Type::kTilde; type = Token::Type::kTilde;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "_")) {
} else if (matches(pos_, "_")) {
type = Token::Type::kUnderscore; type = Token::Type::kUnderscore;
pos_ += 1; advance(1);
location_.column += 1; } else if (matches(pos(), "^=")) {
} else if (matches(pos_, "^=")) {
type = Token::Type::kXorEqual; type = Token::Type::kXorEqual;
pos_ += 2; advance(2);
location_.column += 2; } else if (matches(pos(), "^")) {
} else if (matches(pos_, "^")) {
type = Token::Type::kXor; type = Token::Type::kXor;
pos_ += 1; advance(1);
location_.column += 1;
} }
end_source(source); end_source(source);

View File

@ -67,8 +67,26 @@ class Lexer {
Source begin_source() const; Source begin_source() const;
void end_source(Source&) const; void end_source(Source&) const;
/// @returns view of current line
const std::string_view line() const;
/// @returns position in current line
size_t pos() const;
/// @returns length of current line
size_t length() const;
/// @returns reference to character at `pos` within current line
const char& at(size_t pos) const;
/// @returns substring view at `offset` within current line of length `count`
std::string_view substr(size_t offset, size_t count);
/// advances current position by `offset` within current line
void advance(size_t offset = 1);
/// sets current position to `pos` within current line
void set_pos(size_t pos);
/// advances current position to next line
void advance_line();
/// @returns true if the end of the input has been reached. /// @returns true if the end of the input has been reached.
bool is_eof() const; bool is_eof() const;
/// @returns true if the end of the current line has been reached.
bool is_eol() const;
/// @returns true if there is another character on the input and /// @returns true if there is another character on the input and
/// it is not null. /// it is not null.
bool is_null() const; bool is_null() const;
@ -78,14 +96,11 @@ class Lexer {
/// @param ch a character /// @param ch a character
/// @returns true if 'ch' is a hexadecimal digit /// @returns true if 'ch' is a hexadecimal digit
bool is_hex(char ch) const; bool is_hex(char ch) const;
/// @returns true if string at `pos` matches `substr`
bool matches(size_t pos, std::string_view substr); bool matches(size_t pos, std::string_view substr);
/// The source file content /// The source file content
Source::File const* const file_; Source::File const* const file_;
/// The length of the input
uint32_t len_ = 0;
/// The current position in utf-8 code units (bytes) within the input
uint32_t pos_ = 0;
/// The current location within the input /// The current location within the input
Source::Location location_; Source::Location location_;
}; };