tint: make Lexer use line breaks from Source::File

Before this change, we duplicated line break parsing in both
Source::File and Lexer. This change makes it so that the Lexer no longer
looks for line breaks, instead relying on Source::File for this info.
This de-duplication will also help in implementing the latest spec
changes with respect to line breaks (CRLF vs CR, etc).

Bug: tint:1505
Bug: tint:1513
Change-Id: Ifa820f75ede7e82822525282127e05d2fea047e1
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/87604
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Antonio Maiorano <amaiorano@google.com>
This commit is contained in:
Antonio Maiorano 2022-04-22 15:34:21 +00:00 committed by Dawn LUCI CQ
parent d51b47ac67
commit eba0e85c33
2 changed files with 247 additions and 253 deletions

View File

@ -28,8 +28,7 @@ namespace {
bool is_blankspace(char c) {
// See https://www.w3.org/TR/WGSL/#blankspace.
return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' ||
c == '\r';
return c == ' ' || c == '\t' || c == '\v' || c == '\f' || c == '\r';
}
uint32_t dec_value(char c) {
@ -54,13 +53,62 @@ uint32_t hex_value(char c) {
} // namespace
Lexer::Lexer(const Source::File* file)
: file_(file),
len_(static_cast<uint32_t>(file->content.data.size())),
location_{1, 1} {}
Lexer::Lexer(const Source::File* file) : file_(file), location_{1, 1} {}
Lexer::~Lexer() = default;
const std::string_view Lexer::line() const {
if (file_->content.lines.size() == 0) {
static const char* empty_string = "";
return empty_string;
}
return file_->content.lines[location_.line - 1];
}
size_t Lexer::pos() const {
return location_.column - 1;
}
size_t Lexer::length() const {
return line().size();
}
const char& Lexer::at(size_t pos) const {
auto l = line();
// Unlike for std::string, if pos == l.size(), indexing `l[pos]` is UB for
// std::string_view.
if (pos >= l.size()) {
static const char zero = 0;
return zero;
}
return l[pos];
}
std::string_view Lexer::substr(size_t offset, size_t count) {
return line().substr(offset, count);
}
void Lexer::advance(size_t offset) {
location_.column += offset;
}
void Lexer::set_pos(size_t pos) {
location_.column = pos + 1;
}
void Lexer::advance_line() {
location_.line++;
location_.column = 1;
}
bool Lexer::is_eof() const {
return location_.line >= file_->content.lines.size() && pos() >= length();
}
bool Lexer::is_eol() const {
return pos() >= length();
}
Token Lexer::next() {
if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) {
return t;
@ -106,12 +154,8 @@ void Lexer::end_source(Source& src) const {
src.range.end = location_;
}
bool Lexer::is_eof() const {
return pos_ >= len_;
}
bool Lexer::is_null() const {
return (pos_ < len_) && (file_->content.data[pos_] == 0);
return (pos() < length()) && (at(pos()) == 0);
}
bool Lexer::is_digit(char ch) const {
@ -122,25 +166,26 @@ bool Lexer::is_hex(char ch) const {
return std::isxdigit(static_cast<unsigned char>(ch));
}
bool Lexer::matches(size_t pos, std::string_view substr) {
if (pos >= len_)
bool Lexer::matches(size_t pos, std::string_view sub_string) {
if (pos >= length())
return false;
return file_->content.data_view.substr(pos, substr.size()) == substr;
return substr(pos, sub_string.size()) == sub_string;
}
Token Lexer::skip_blankspace_and_comments() {
for (;;) {
auto pos = pos_;
while (!is_eof() && is_blankspace(file_->content.data[pos_])) {
if (matches(pos_, "\n")) {
pos_++;
location_.line++;
location_.column = 1;
auto loc = location_;
while (!is_eof()) {
if (is_eol()) {
advance_line();
continue;
}
pos_++;
location_.column++;
if (!is_blankspace(at(pos()))) {
break;
}
advance();
}
auto t = skip_comment();
@ -150,7 +195,7 @@ Token Lexer::skip_blankspace_and_comments() {
// If the cursor didn't advance we didn't remove any blankspace
// so we're done.
if (pos == pos_)
if (loc == location_)
break;
}
if (is_eof()) {
@ -161,53 +206,46 @@ Token Lexer::skip_blankspace_and_comments() {
}
Token Lexer::skip_comment() {
if (matches(pos_, "//")) {
if (matches(pos(), "//")) {
// Line comment: ignore everything until the end of input or a blankspace
// character other than space or horizontal tab.
while (!is_eof() && !(is_blankspace(file_->content.data[pos_]) &&
!matches(pos_, " ") && !matches(pos_, "\t"))) {
while (!is_eol() && !(is_blankspace(at(pos())) && !matches(pos(), " ") &&
!matches(pos(), "\t"))) {
if (is_null()) {
return {Token::Type::kError, begin_source(), "null character found"};
}
pos_++;
location_.column++;
advance();
}
return {};
}
if (matches(pos_, "/*")) {
if (matches(pos(), "/*")) {
// Block comment: ignore everything until the closing '*/' token.
// Record source location of the initial '/*'
auto source = begin_source();
source.range.end.column += 1;
pos_ += 2;
location_.column += 2;
advance(2);
int depth = 1;
while (!is_eof() && depth > 0) {
if (matches(pos_, "/*")) {
if (matches(pos(), "/*")) {
// Start of block comment: increase nesting depth.
pos_ += 2;
location_.column += 2;
advance(2);
depth++;
} else if (matches(pos_, "*/")) {
} else if (matches(pos(), "*/")) {
// End of block comment: decrease nesting depth.
pos_ += 2;
location_.column += 2;
advance(2);
depth--;
} else if (matches(pos_, "\n")) {
} else if (is_eol()) {
// Newline: skip and update source location.
pos_++;
location_.line++;
location_.column = 1;
advance_line();
} else if (is_null()) {
return {Token::Type::kError, begin_source(), "null character found"};
} else {
// Anything else: skip and update source location.
pos_++;
location_.column++;
advance();
}
}
if (depth > 0) {
@ -218,8 +256,8 @@ Token Lexer::skip_comment() {
}
Token Lexer::try_float() {
auto start = pos_;
auto end = pos_;
auto start = pos();
auto end = pos();
auto source = begin_source();
bool has_mantissa_digits = false;
@ -227,18 +265,18 @@ Token Lexer::try_float() {
if (matches(end, "-")) {
end++;
}
while (end < len_ && is_digit(file_->content.data[end])) {
while (end < length() && is_digit(at(end))) {
has_mantissa_digits = true;
end++;
}
bool has_point = false;
if (end < len_ && matches(end, ".")) {
if (end < length() && matches(end, ".")) {
has_point = true;
end++;
}
while (end < len_ && is_digit(file_->content.data[end])) {
while (end < length() && is_digit(at(end))) {
has_mantissa_digits = true;
end++;
}
@ -249,27 +287,27 @@ Token Lexer::try_float() {
// Parse the exponent if one exists
bool has_exponent = false;
if (end < len_ && (matches(end, "e") || matches(end, "E"))) {
if (end < length() && (matches(end, "e") || matches(end, "E"))) {
end++;
if (end < len_ && (matches(end, "+") || matches(end, "-"))) {
if (end < length() && (matches(end, "+") || matches(end, "-"))) {
end++;
}
while (end < len_ && isdigit(file_->content.data[end])) {
while (end < length() && isdigit(at(end))) {
has_exponent = true;
end++;
}
// If an 'e' or 'E' was present, then the number part must also be present.
if (!has_exponent) {
const auto str = file_->content.data.substr(start, end - start);
const auto str = std::string{substr(start, end - start)};
return {Token::Type::kError, source,
"incomplete exponent for floating point literal: " + str};
}
}
bool has_f_suffix = false;
if (end < len_ && matches(end, "f")) {
if (end < length() && matches(end, "f")) {
end++;
has_f_suffix = true;
}
@ -280,14 +318,12 @@ Token Lexer::try_float() {
}
// Save the error string, for use by diagnostics.
const auto str = file_->content.data.substr(start, end - start);
pos_ = end;
location_.column += (end - start);
const auto str = std::string{substr(start, end - start)};
advance(end - start);
end_source(source);
auto res = strtod(file_->content.data.c_str() + start, nullptr);
auto res = strtod(&at(start), nullptr);
// This errors out if a non-zero magnitude is too small to represent in a
// float. It can't be represented faithfully in an f32.
const auto magnitude = std::fabs(res);
@ -322,8 +358,8 @@ Token Lexer::try_hex_float() {
constexpr uint32_t kExponentLeftShift = kMantissaBits;
constexpr uint32_t kSignBit = 31;
auto start = pos_;
auto end = pos_;
auto start = pos();
auto end = pos();
auto source = begin_source();
@ -378,7 +414,7 @@ Token Lexer::try_hex_float() {
// Collect integer range (if any)
auto integer_range = std::make_pair(end, end);
while (end < len_ && is_hex(file_->content.data[end])) {
while (end < length() && is_hex(at(end))) {
integer_range.second = ++end;
}
@ -391,7 +427,7 @@ Token Lexer::try_hex_float() {
// Collect fractional range (if any)
auto fractional_range = std::make_pair(end, end);
while (end < len_ && is_hex(file_->content.data[end])) {
while (end < length() && is_hex(at(end))) {
fractional_range.second = ++end;
}
@ -421,7 +457,7 @@ Token Lexer::try_hex_float() {
// The magnitude is zero if and only if seen_prior_one_bits is false.
bool seen_prior_one_bits = false;
for (auto i = integer_range.first; i < integer_range.second; ++i) {
const auto nibble = hex_value(file_->content.data[i]);
const auto nibble = hex_value(at(i));
if (nibble != 0) {
has_zero_integer = false;
}
@ -447,7 +483,7 @@ Token Lexer::try_hex_float() {
// Parse fractional part
// [0-9a-fA-F]*
for (auto i = fractional_range.first; i < fractional_range.second; ++i) {
auto nibble = hex_value(file_->content.data[i]);
auto nibble = hex_value(at(i));
for (int32_t bit = 3; bit >= 0; --bit) {
auto v = 1 & (nibble >> bit);
@ -495,11 +531,10 @@ Token Lexer::try_hex_float() {
// Allow overflow (in uint32_t) when the floating point value magnitude is
// zero.
bool has_exponent_digits = false;
while (end < len_ && isdigit(file_->content.data[end])) {
while (end < length() && isdigit(at(end))) {
has_exponent_digits = true;
auto prev_exponent = input_exponent;
input_exponent =
(input_exponent * 10) + dec_value(file_->content.data[end]);
input_exponent = (input_exponent * 10) + dec_value(at(end));
// Check if we've overflowed input_exponent. This only matters when
// the mantissa is non-zero.
if (!is_zero && (prev_exponent > input_exponent)) {
@ -512,7 +547,7 @@ Token Lexer::try_hex_float() {
// Parse optional 'f' suffix. For a hex float, it can only exist
// when the exponent is present. Otherwise it will look like
// one of the mantissa digits.
if (end < len_ && matches(end, "f")) {
if (end < length() && matches(end, "f")) {
end++;
}
@ -522,8 +557,7 @@ Token Lexer::try_hex_float() {
}
}
pos_ = end;
location_.column += (end - start);
advance(end - start);
end_source(source);
if (is_zero) {
@ -611,29 +645,26 @@ Token Lexer::build_token_from_int_if_possible(Source source,
size_t start,
size_t end,
int32_t base) {
auto res = strtoll(file_->content.data.c_str() + start, nullptr, base);
if (matches(pos_, "u")) {
auto res = strtoll(&at(start), nullptr, base);
if (matches(pos(), "u")) {
if (static_cast<uint64_t>(res) >
static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) {
return {Token::Type::kError, source,
"u32 (" + file_->content.data.substr(start, end - start) +
") too large"};
return {
Token::Type::kError, source,
"u32 (" + std::string{substr(start, end - start)} + ") too large"};
}
pos_ += 1;
location_.column += 1;
advance(1);
end_source(source);
return {source, static_cast<uint32_t>(res)};
}
if (res < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
return {Token::Type::kError, source,
"i32 (" + file_->content.data.substr(start, end - start) +
") too small"};
"i32 (" + std::string{substr(start, end - start)} + ") too small"};
}
if (res > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
return {Token::Type::kError, source,
"i32 (" + file_->content.data.substr(start, end - start) +
") too large"};
"i32 (" + std::string{substr(start, end - start)} + ") too large"};
}
end_source(source);
return {source, static_cast<int32_t>(res)};
@ -641,8 +672,8 @@ Token Lexer::build_token_from_int_if_possible(Source source,
Token Lexer::try_hex_integer() {
constexpr size_t kMaxDigits = 8; // Valid for both 32-bit integer types
auto start = pos_;
auto end = pos_;
auto start = pos();
auto end = pos();
auto source = begin_source();
@ -657,14 +688,14 @@ Token Lexer::try_hex_integer() {
}
auto first = end;
while (!is_eof() && is_hex(file_->content.data[end])) {
while (!is_eol() && is_hex(at(end))) {
end++;
auto digits = end - first;
if (digits > kMaxDigits) {
return {Token::Type::kError, source,
"integer literal (" +
file_->content.data.substr(start, end - 1 - start) +
std::string{substr(start, end - 1 - start)} +
"...) has too many digits"};
}
}
@ -673,15 +704,14 @@ Token Lexer::try_hex_integer() {
"integer or float hex literal has no significant digits"};
}
pos_ = end;
location_.column += (end - start);
advance(end - start);
return build_token_from_int_if_possible(source, start, end, 16);
}
Token Lexer::try_integer() {
constexpr size_t kMaxDigits = 10; // Valid for both 32-bit integer types
auto start = pos_;
auto start = pos();
auto end = start;
auto source = begin_source();
@ -690,7 +720,7 @@ Token Lexer::try_integer() {
end++;
}
if (end >= len_ || !is_digit(file_->content.data[end])) {
if (end >= length() || !is_digit(at(end))) {
return {};
}
@ -698,67 +728,62 @@ Token Lexer::try_integer() {
// If the first digit is a zero this must only be zero as leading zeros
// are not allowed.
auto next = first + 1;
if (next < len_) {
if (file_->content.data[first] == '0' &&
is_digit(file_->content.data[next])) {
if (next < length()) {
if (at(first) == '0' && is_digit(at(next))) {
return {Token::Type::kError, source,
"integer literal (" +
file_->content.data.substr(start, end - 1 - start) +
std::string{substr(start, end - 1 - start)} +
"...) has leading 0s"};
}
}
while (end < len_ && is_digit(file_->content.data[end])) {
while (end < length() && is_digit(at(end))) {
auto digits = end - first;
if (digits > kMaxDigits) {
return {Token::Type::kError, source,
"integer literal (" +
file_->content.data.substr(start, end - 1 - start) +
std::string{substr(start, end - 1 - start)} +
"...) has too many digits"};
}
end++;
}
pos_ = end;
location_.column += (end - start);
advance(end - start);
return build_token_from_int_if_possible(source, start, end, 10);
}
Token Lexer::try_ident() {
auto source = begin_source();
auto start = pos_;
auto start = pos();
// This below assumes that the size of a single std::string element is 1 byte.
static_assert(sizeof(file_->content.data[0]) == sizeof(uint8_t),
static_assert(sizeof(at(0)) == sizeof(uint8_t),
"tint::reader::wgsl requires the size of a std::string element "
"to be a single byte");
// Must begin with an XID_Source unicode character, or underscore
{
auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
auto [code_point, n] =
text::utf8::Decode(utf8, file_->content.data.size() - pos_);
auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
auto [code_point, n] = text::utf8::Decode(utf8, length() - pos());
if (n == 0) {
pos_++; // Skip the bad byte.
advance(); // Skip the bad byte.
return {Token::Type::kError, source, "invalid UTF-8"};
}
if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
return {};
}
// Consume start codepoint
pos_ += n;
location_.column += n;
advance(n);
}
while (!is_eof()) {
while (!is_eol()) {
// Must continue with an XID_Continue unicode character
auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
auto [code_point, n] =
text::utf8::Decode(utf8, file_->content.data.size() - pos_);
auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
auto [code_point, n] = text::utf8::Decode(utf8, line().size() - pos());
if (n == 0) {
pos_++; // Skip the bad byte.
advance(); // Skip the bad byte.
return {Token::Type::kError, source, "invalid UTF-8"};
}
if (!code_point.IsXIDContinue()) {
@ -766,21 +791,19 @@ Token Lexer::try_ident() {
}
// Consume continuing codepoint
pos_ += n;
location_.column += n;
advance(n);
}
if (file_->content.data[start] == '_') {
if (at(start) == '_') {
// Check for an underscore on its own (special token), or a
// double-underscore (not allowed).
if ((pos_ == start + 1) || (file_->content.data[start + 1] == '_')) {
location_.column -= (pos_ - start);
pos_ = start;
if ((pos() == start + 1) || (at(start + 1) == '_')) {
set_pos(start);
return {};
}
}
auto str = file_->content.data_view.substr(start, pos_ - start);
auto str = substr(start, pos() - start);
end_source(source);
auto t = check_keyword(source, str);
@ -795,182 +818,138 @@ Token Lexer::try_punctuation() {
auto source = begin_source();
auto type = Token::Type::kUninitialized;
if (matches(pos_, "@")) {
if (matches(pos(), "@")) {
type = Token::Type::kAttr;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "(")) {
advance(1);
} else if (matches(pos(), "(")) {
type = Token::Type::kParenLeft;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, ")")) {
advance(1);
} else if (matches(pos(), ")")) {
type = Token::Type::kParenRight;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "[")) {
advance(1);
} else if (matches(pos(), "[")) {
type = Token::Type::kBracketLeft;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "]")) {
advance(1);
} else if (matches(pos(), "]")) {
type = Token::Type::kBracketRight;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "{")) {
advance(1);
} else if (matches(pos(), "{")) {
type = Token::Type::kBraceLeft;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "}")) {
advance(1);
} else if (matches(pos(), "}")) {
type = Token::Type::kBraceRight;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "&&")) {
advance(1);
} else if (matches(pos(), "&&")) {
type = Token::Type::kAndAnd;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "&=")) {
advance(2);
} else if (matches(pos(), "&=")) {
type = Token::Type::kAndEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "&")) {
advance(2);
} else if (matches(pos(), "&")) {
type = Token::Type::kAnd;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "/=")) {
advance(1);
} else if (matches(pos(), "/=")) {
type = Token::Type::kDivisionEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "/")) {
advance(2);
} else if (matches(pos(), "/")) {
type = Token::Type::kForwardSlash;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "!=")) {
advance(1);
} else if (matches(pos(), "!=")) {
type = Token::Type::kNotEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "!")) {
advance(2);
} else if (matches(pos(), "!")) {
type = Token::Type::kBang;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, ":")) {
advance(1);
} else if (matches(pos(), ":")) {
type = Token::Type::kColon;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, ",")) {
advance(1);
} else if (matches(pos(), ",")) {
type = Token::Type::kComma;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "==")) {
advance(1);
} else if (matches(pos(), "==")) {
type = Token::Type::kEqualEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "=")) {
advance(2);
} else if (matches(pos(), "=")) {
type = Token::Type::kEqual;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, ">=")) {
advance(1);
} else if (matches(pos(), ">=")) {
type = Token::Type::kGreaterThanEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, ">>")) {
advance(2);
} else if (matches(pos(), ">>")) {
type = Token::Type::kShiftRight;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, ">")) {
advance(2);
} else if (matches(pos(), ">")) {
type = Token::Type::kGreaterThan;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "<=")) {
advance(1);
} else if (matches(pos(), "<=")) {
type = Token::Type::kLessThanEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "<<")) {
advance(2);
} else if (matches(pos(), "<<")) {
type = Token::Type::kShiftLeft;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "<")) {
advance(2);
} else if (matches(pos(), "<")) {
type = Token::Type::kLessThan;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "%=")) {
advance(1);
} else if (matches(pos(), "%=")) {
type = Token::Type::kModuloEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "%")) {
advance(2);
} else if (matches(pos(), "%")) {
type = Token::Type::kMod;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "->")) {
advance(1);
} else if (matches(pos(), "->")) {
type = Token::Type::kArrow;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "--")) {
advance(2);
} else if (matches(pos(), "--")) {
type = Token::Type::kMinusMinus;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "-=")) {
advance(2);
} else if (matches(pos(), "-=")) {
type = Token::Type::kMinusEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "-")) {
advance(2);
} else if (matches(pos(), "-")) {
type = Token::Type::kMinus;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, ".")) {
advance(1);
} else if (matches(pos(), ".")) {
type = Token::Type::kPeriod;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "++")) {
advance(1);
} else if (matches(pos(), "++")) {
type = Token::Type::kPlusPlus;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "+=")) {
advance(2);
} else if (matches(pos(), "+=")) {
type = Token::Type::kPlusEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "+")) {
advance(2);
} else if (matches(pos(), "+")) {
type = Token::Type::kPlus;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "||")) {
advance(1);
} else if (matches(pos(), "||")) {
type = Token::Type::kOrOr;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "|=")) {
advance(2);
} else if (matches(pos(), "|=")) {
type = Token::Type::kOrEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "|")) {
advance(2);
} else if (matches(pos(), "|")) {
type = Token::Type::kOr;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, ";")) {
advance(1);
} else if (matches(pos(), ";")) {
type = Token::Type::kSemicolon;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "*=")) {
advance(1);
} else if (matches(pos(), "*=")) {
type = Token::Type::kTimesEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "*")) {
advance(2);
} else if (matches(pos(), "*")) {
type = Token::Type::kStar;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "~")) {
advance(1);
} else if (matches(pos(), "~")) {
type = Token::Type::kTilde;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "_")) {
advance(1);
} else if (matches(pos(), "_")) {
type = Token::Type::kUnderscore;
pos_ += 1;
location_.column += 1;
} else if (matches(pos_, "^=")) {
advance(1);
} else if (matches(pos(), "^=")) {
type = Token::Type::kXorEqual;
pos_ += 2;
location_.column += 2;
} else if (matches(pos_, "^")) {
advance(2);
} else if (matches(pos(), "^")) {
type = Token::Type::kXor;
pos_ += 1;
location_.column += 1;
advance(1);
}
end_source(source);

View File

@ -67,8 +67,26 @@ class Lexer {
Source begin_source() const;
void end_source(Source&) const;
/// @returns view of current line
const std::string_view line() const;
/// @returns position in current line
size_t pos() const;
/// @returns length of current line
size_t length() const;
/// @returns reference to character at `pos` within current line
const char& at(size_t pos) const;
/// @returns substring view at `offset` within current line of length `count`
std::string_view substr(size_t offset, size_t count);
/// advances current position by `offset` within current line
void advance(size_t offset = 1);
/// sets current position to `pos` within current line
void set_pos(size_t pos);
/// advances current position to next line
void advance_line();
/// @returns true if the end of the input has been reached.
bool is_eof() const;
/// @returns true if the end of the current line has been reached.
bool is_eol() const;
/// @returns true if there is another character on the input and
/// it is not null.
bool is_null() const;
@ -78,14 +96,11 @@ class Lexer {
/// @param ch a character
/// @returns true if 'ch' is a hexadecimal digit
bool is_hex(char ch) const;
/// @returns true if string at `pos` matches `substr`
bool matches(size_t pos, std::string_view substr);
/// The source file content
Source::File const* const file_;
/// The length of the input
uint32_t len_ = 0;
/// The current position in utf-8 code units (bytes) within the input
uint32_t pos_ = 0;
/// The current location within the input
Source::Location location_;
};