tint: make Lexer use line breaks from Source::File

Before this change, we duplicated line break parsing in both Source::File and Lexer. This change makes it so that the Lexer no longer looks for line breaks, instead relying on Source::File for this info. This de-duplication will also help in implementing the latest spec changes with respect to line breaks (CRLF vs CR, etc). Bug: tint:1505 Bug: tint:1513 Change-Id: Ifa820f75ede7e82822525282127e05d2fea047e1 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/87604 Reviewed-by: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Antonio Maiorano <amaiorano@google.com>
2025-10-21 01:05:52 +00:00 · 2022-04-22 15:34:21 +00:00 · 2022-04-22 15:34:21 +00:00 · eba0e85c33
commit eba0e85c33
parent d51b47ac67
2 changed files with 247 additions and 253 deletions
--- a/src/tint/reader/wgsl/lexer.cc
+++ b/src/tint/reader/wgsl/lexer.cc
@ -28,8 +28,7 @@ namespace {

 bool is_blankspace(char c) {
  // See https://www.w3.org/TR/WGSL/#blankspace.
-  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' ||
-         c == '\r';
+  return c == ' ' || c == '\t' || c == '\v' || c == '\f' || c == '\r';
 }

 uint32_t dec_value(char c) {
@ -54,13 +53,62 @@ uint32_t hex_value(char c) {

 }  // namespace

-Lexer::Lexer(const Source::File* file)
-    : file_(file),
-      len_(static_cast<uint32_t>(file->content.data.size())),
-      location_{1, 1} {}
+Lexer::Lexer(const Source::File* file) : file_(file), location_{1, 1} {}

 Lexer::~Lexer() = default;

+const std::string_view Lexer::line() const {
+  if (file_->content.lines.size() == 0) {
+    static const char* empty_string = "";
+    return empty_string;
+  }
+  return file_->content.lines[location_.line - 1];
+}
+
+size_t Lexer::pos() const {
+  return location_.column - 1;
+}
+
+size_t Lexer::length() const {
+  return line().size();
+}
+
+const char& Lexer::at(size_t pos) const {
+  auto l = line();
+  // Unlike for std::string, if pos == l.size(), indexing `l[pos]` is UB for
+  // std::string_view.
+  if (pos >= l.size()) {
+    static const char zero = 0;
+    return zero;
+  }
+  return l[pos];
+}
+
+std::string_view Lexer::substr(size_t offset, size_t count) {
+  return line().substr(offset, count);
+}
+
+void Lexer::advance(size_t offset) {
+  location_.column += offset;
+}
+
+void Lexer::set_pos(size_t pos) {
+  location_.column = pos + 1;
+}
+
+void Lexer::advance_line() {
+  location_.line++;
+  location_.column = 1;
+}
+
+bool Lexer::is_eof() const {
+  return location_.line >= file_->content.lines.size() && pos() >= length();
+}
+
+bool Lexer::is_eol() const {
+  return pos() >= length();
+}
+
 Token Lexer::next() {
  if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) {
    return t;
@ -106,12 +154,8 @@ void Lexer::end_source(Source& src) const {
  src.range.end = location_;
 }

-bool Lexer::is_eof() const {
-  return pos_ >= len_;
-}
-
 bool Lexer::is_null() const {
-  return (pos_ < len_) && (file_->content.data[pos_] == 0);
+  return (pos() < length()) && (at(pos()) == 0);
 }

 bool Lexer::is_digit(char ch) const {
@ -122,25 +166,26 @@ bool Lexer::is_hex(char ch) const {
  return std::isxdigit(static_cast<unsigned char>(ch));
 }

-bool Lexer::matches(size_t pos, std::string_view substr) {
-  if (pos >= len_)
+bool Lexer::matches(size_t pos, std::string_view sub_string) {
+  if (pos >= length())
    return false;
-  return file_->content.data_view.substr(pos, substr.size()) == substr;
+  return substr(pos, sub_string.size()) == sub_string;
 }

 Token Lexer::skip_blankspace_and_comments() {
  for (;;) {
-    auto pos = pos_;
-    while (!is_eof() && is_blankspace(file_->content.data[pos_])) {
-      if (matches(pos_, "\n")) {
-        pos_++;
-        location_.line++;
-        location_.column = 1;
+    auto loc = location_;
+    while (!is_eof()) {
+      if (is_eol()) {
+        advance_line();
        continue;
      }

-      pos_++;
-      location_.column++;
+      if (!is_blankspace(at(pos()))) {
+        break;
+      }
+
+      advance();
    }

    auto t = skip_comment();
@ -150,7 +195,7 @@ Token Lexer::skip_blankspace_and_comments() {

    // If the cursor didn't advance we didn't remove any blankspace
    // so we're done.
-    if (pos == pos_)
+    if (loc == location_)
      break;
  }
  if (is_eof()) {
@ -161,53 +206,46 @@ Token Lexer::skip_blankspace_and_comments() {
 }

 Token Lexer::skip_comment() {
-  if (matches(pos_, "//")) {
+  if (matches(pos(), "//")) {
    // Line comment: ignore everything until the end of input or a blankspace
    // character other than space or horizontal tab.
-    while (!is_eof() && !(is_blankspace(file_->content.data[pos_]) &&
-                          !matches(pos_, " ") && !matches(pos_, "\t"))) {
+    while (!is_eol() && !(is_blankspace(at(pos())) && !matches(pos(), " ") &&
+                          !matches(pos(), "\t"))) {
      if (is_null()) {
        return {Token::Type::kError, begin_source(), "null character found"};
      }
-      pos_++;
-      location_.column++;
+      advance();
    }
    return {};
  }

-  if (matches(pos_, "/*")) {
+  if (matches(pos(), "/*")) {
    // Block comment: ignore everything until the closing '*/' token.

    // Record source location of the initial '/*'
    auto source = begin_source();
    source.range.end.column += 1;

-    pos_ += 2;
-    location_.column += 2;
+    advance(2);

    int depth = 1;
    while (!is_eof() && depth > 0) {
-      if (matches(pos_, "/*")) {
+      if (matches(pos(), "/*")) {
        // Start of block comment: increase nesting depth.
-        pos_ += 2;
-        location_.column += 2;
+        advance(2);
        depth++;
-      } else if (matches(pos_, "*/")) {
+      } else if (matches(pos(), "*/")) {
        // End of block comment: decrease nesting depth.
-        pos_ += 2;
-        location_.column += 2;
+        advance(2);
        depth--;
-      } else if (matches(pos_, "\n")) {
+      } else if (is_eol()) {
        // Newline: skip and update source location.
-        pos_++;
-        location_.line++;
-        location_.column = 1;
+        advance_line();
      } else if (is_null()) {
        return {Token::Type::kError, begin_source(), "null character found"};
      } else {
        // Anything else: skip and update source location.
-        pos_++;
-        location_.column++;
+        advance();
      }
    }
    if (depth > 0) {
@ -218,8 +256,8 @@ Token Lexer::skip_comment() {
 }

 Token Lexer::try_float() {
-  auto start = pos_;
-  auto end = pos_;
+  auto start = pos();
+  auto end = pos();

  auto source = begin_source();
  bool has_mantissa_digits = false;
@ -227,18 +265,18 @@ Token Lexer::try_float() {
  if (matches(end, "-")) {
    end++;
  }
-  while (end < len_ && is_digit(file_->content.data[end])) {
+  while (end < length() && is_digit(at(end))) {
    has_mantissa_digits = true;
    end++;
  }

  bool has_point = false;
-  if (end < len_ && matches(end, ".")) {
+  if (end < length() && matches(end, ".")) {
    has_point = true;
    end++;
  }

-  while (end < len_ && is_digit(file_->content.data[end])) {
+  while (end < length() && is_digit(at(end))) {
    has_mantissa_digits = true;
    end++;
  }
@ -249,27 +287,27 @@ Token Lexer::try_float() {

  // Parse the exponent if one exists
  bool has_exponent = false;
-  if (end < len_ && (matches(end, "e") || matches(end, "E"))) {
+  if (end < length() && (matches(end, "e") || matches(end, "E"))) {
    end++;
-    if (end < len_ && (matches(end, "+") || matches(end, "-"))) {
+    if (end < length() && (matches(end, "+") || matches(end, "-"))) {
      end++;
    }

-    while (end < len_ && isdigit(file_->content.data[end])) {
+    while (end < length() && isdigit(at(end))) {
      has_exponent = true;
      end++;
    }

    // If an 'e' or 'E' was present, then the number part must also be present.
    if (!has_exponent) {
-      const auto str = file_->content.data.substr(start, end - start);
+      const auto str = std::string{substr(start, end - start)};
      return {Token::Type::kError, source,
              "incomplete exponent for floating point literal: " + str};
    }
  }

  bool has_f_suffix = false;
-  if (end < len_ && matches(end, "f")) {
+  if (end < length() && matches(end, "f")) {
    end++;
    has_f_suffix = true;
  }
@ -280,14 +318,12 @@ Token Lexer::try_float() {
  }

  // Save the error string, for use by diagnostics.
-  const auto str = file_->content.data.substr(start, end - start);
-
-  pos_ = end;
-  location_.column += (end - start);
+  const auto str = std::string{substr(start, end - start)};

+  advance(end - start);
  end_source(source);

-  auto res = strtod(file_->content.data.c_str() + start, nullptr);
+  auto res = strtod(&at(start), nullptr);
  // This errors out if a non-zero magnitude is too small to represent in a
  // float. It can't be represented faithfully in an f32.
  const auto magnitude = std::fabs(res);
@ -322,8 +358,8 @@ Token Lexer::try_hex_float() {
  constexpr uint32_t kExponentLeftShift = kMantissaBits;
  constexpr uint32_t kSignBit = 31;

-  auto start = pos_;
-  auto end = pos_;
+  auto start = pos();
+  auto end = pos();

  auto source = begin_source();

@ -378,7 +414,7 @@ Token Lexer::try_hex_float() {

  // Collect integer range (if any)
  auto integer_range = std::make_pair(end, end);
-  while (end < len_ && is_hex(file_->content.data[end])) {
+  while (end < length() && is_hex(at(end))) {
    integer_range.second = ++end;
  }

@ -391,7 +427,7 @@ Token Lexer::try_hex_float() {

  // Collect fractional range (if any)
  auto fractional_range = std::make_pair(end, end);
-  while (end < len_ && is_hex(file_->content.data[end])) {
+  while (end < length() && is_hex(at(end))) {
    fractional_range.second = ++end;
  }

@ -421,7 +457,7 @@ Token Lexer::try_hex_float() {
  // The magnitude is zero if and only if seen_prior_one_bits is false.
  bool seen_prior_one_bits = false;
  for (auto i = integer_range.first; i < integer_range.second; ++i) {
-    const auto nibble = hex_value(file_->content.data[i]);
+    const auto nibble = hex_value(at(i));
    if (nibble != 0) {
      has_zero_integer = false;
    }
@ -447,7 +483,7 @@ Token Lexer::try_hex_float() {
  // Parse fractional part
  // [0-9a-fA-F]*
  for (auto i = fractional_range.first; i < fractional_range.second; ++i) {
-    auto nibble = hex_value(file_->content.data[i]);
+    auto nibble = hex_value(at(i));
    for (int32_t bit = 3; bit >= 0; --bit) {
      auto v = 1 & (nibble >> bit);

@ -495,11 +531,10 @@ Token Lexer::try_hex_float() {
    // Allow overflow (in uint32_t) when the floating point value magnitude is
    // zero.
    bool has_exponent_digits = false;
-    while (end < len_ && isdigit(file_->content.data[end])) {
+    while (end < length() && isdigit(at(end))) {
      has_exponent_digits = true;
      auto prev_exponent = input_exponent;
-      input_exponent =
-          (input_exponent * 10) + dec_value(file_->content.data[end]);
+      input_exponent = (input_exponent * 10) + dec_value(at(end));
      // Check if we've overflowed input_exponent. This only matters when
      // the mantissa is non-zero.
      if (!is_zero && (prev_exponent > input_exponent)) {
@ -512,7 +547,7 @@ Token Lexer::try_hex_float() {
    // Parse optional 'f' suffix.  For a hex float, it can only exist
    // when the exponent is present. Otherwise it will look like
    // one of the mantissa digits.
-    if (end < len_ && matches(end, "f")) {
+    if (end < length() && matches(end, "f")) {
      end++;
    }

@ -522,8 +557,7 @@ Token Lexer::try_hex_float() {
    }
  }

-  pos_ = end;
-  location_.column += (end - start);
+  advance(end - start);
  end_source(source);

  if (is_zero) {
@ -611,29 +645,26 @@ Token Lexer::build_token_from_int_if_possible(Source source,
                                              size_t start,
                                              size_t end,
                                              int32_t base) {
-  auto res = strtoll(file_->content.data.c_str() + start, nullptr, base);
-  if (matches(pos_, "u")) {
+  auto res = strtoll(&at(start), nullptr, base);
+  if (matches(pos(), "u")) {
    if (static_cast<uint64_t>(res) >
        static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) {
-      return {Token::Type::kError, source,
-              "u32 (" + file_->content.data.substr(start, end - start) +
-                  ") too large"};
+      return {
+          Token::Type::kError, source,
+          "u32 (" + std::string{substr(start, end - start)} + ") too large"};
    }
-    pos_ += 1;
-    location_.column += 1;
+    advance(1);
    end_source(source);
    return {source, static_cast<uint32_t>(res)};
  }

  if (res < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
    return {Token::Type::kError, source,
-            "i32 (" + file_->content.data.substr(start, end - start) +
-                ") too small"};
+            "i32 (" + std::string{substr(start, end - start)} + ") too small"};
  }
  if (res > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
    return {Token::Type::kError, source,
-            "i32 (" + file_->content.data.substr(start, end - start) +
-                ") too large"};
+            "i32 (" + std::string{substr(start, end - start)} + ") too large"};
  }
  end_source(source);
  return {source, static_cast<int32_t>(res)};
@ -641,8 +672,8 @@ Token Lexer::build_token_from_int_if_possible(Source source,

 Token Lexer::try_hex_integer() {
  constexpr size_t kMaxDigits = 8;  // Valid for both 32-bit integer types
-  auto start = pos_;
-  auto end = pos_;
+  auto start = pos();
+  auto end = pos();

  auto source = begin_source();

@ -657,14 +688,14 @@ Token Lexer::try_hex_integer() {
  }

  auto first = end;
-  while (!is_eof() && is_hex(file_->content.data[end])) {
+  while (!is_eol() && is_hex(at(end))) {
    end++;

    auto digits = end - first;
    if (digits > kMaxDigits) {
      return {Token::Type::kError, source,
              "integer literal (" +
-                  file_->content.data.substr(start, end - 1 - start) +
+                  std::string{substr(start, end - 1 - start)} +
                  "...) has too many digits"};
    }
  }
@ -673,15 +704,14 @@ Token Lexer::try_hex_integer() {
            "integer or float hex literal has no significant digits"};
  }

-  pos_ = end;
-  location_.column += (end - start);
+  advance(end - start);

  return build_token_from_int_if_possible(source, start, end, 16);
 }

 Token Lexer::try_integer() {
  constexpr size_t kMaxDigits = 10;  // Valid for both 32-bit integer types
-  auto start = pos_;
+  auto start = pos();
  auto end = start;

  auto source = begin_source();
@ -690,7 +720,7 @@ Token Lexer::try_integer() {
    end++;
  }

-  if (end >= len_ || !is_digit(file_->content.data[end])) {
+  if (end >= length() || !is_digit(at(end))) {
    return {};
  }

@ -698,67 +728,62 @@ Token Lexer::try_integer() {
  // If the first digit is a zero this must only be zero as leading zeros
  // are not allowed.
  auto next = first + 1;
-  if (next < len_) {
-    if (file_->content.data[first] == '0' &&
-        is_digit(file_->content.data[next])) {
+  if (next < length()) {
+    if (at(first) == '0' && is_digit(at(next))) {
      return {Token::Type::kError, source,
              "integer literal (" +
-                  file_->content.data.substr(start, end - 1 - start) +
+                  std::string{substr(start, end - 1 - start)} +
                  "...) has leading 0s"};
    }
  }

-  while (end < len_ && is_digit(file_->content.data[end])) {
+  while (end < length() && is_digit(at(end))) {
    auto digits = end - first;
    if (digits > kMaxDigits) {
      return {Token::Type::kError, source,
              "integer literal (" +
-                  file_->content.data.substr(start, end - 1 - start) +
+                  std::string{substr(start, end - 1 - start)} +
                  "...) has too many digits"};
    }

    end++;
  }

-  pos_ = end;
-  location_.column += (end - start);
+  advance(end - start);

  return build_token_from_int_if_possible(source, start, end, 10);
 }

 Token Lexer::try_ident() {
  auto source = begin_source();
-  auto start = pos_;
+  auto start = pos();

  // This below assumes that the size of a single std::string element is 1 byte.
-  static_assert(sizeof(file_->content.data[0]) == sizeof(uint8_t),
+  static_assert(sizeof(at(0)) == sizeof(uint8_t),
                "tint::reader::wgsl requires the size of a std::string element "
                "to be a single byte");

  // Must begin with an XID_Source unicode character, or underscore
  {
-    auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
-    auto [code_point, n] =
-        text::utf8::Decode(utf8, file_->content.data.size() - pos_);
+    auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
+    auto [code_point, n] = text::utf8::Decode(utf8, length() - pos());
    if (n == 0) {
-      pos_++;  // Skip the bad byte.
+      advance();  // Skip the bad byte.
      return {Token::Type::kError, source, "invalid UTF-8"};
    }
    if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
      return {};
    }
    // Consume start codepoint
-    pos_ += n;
-    location_.column += n;
+    advance(n);
  }

-  while (!is_eof()) {
+  while (!is_eol()) {
    // Must continue with an XID_Continue unicode character
-    auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
-    auto [code_point, n] =
-        text::utf8::Decode(utf8, file_->content.data.size() - pos_);
+    auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
+    auto [code_point, n] = text::utf8::Decode(utf8, line().size() - pos());
    if (n == 0) {
-      pos_++;  // Skip the bad byte.
+      advance();  // Skip the bad byte.
      return {Token::Type::kError, source, "invalid UTF-8"};
    }
    if (!code_point.IsXIDContinue()) {
@ -766,21 +791,19 @@ Token Lexer::try_ident() {
    }

    // Consume continuing codepoint
-    pos_ += n;
-    location_.column += n;
+    advance(n);
  }

-  if (file_->content.data[start] == '_') {
+  if (at(start) == '_') {
    // Check for an underscore on its own (special token), or a
    // double-underscore (not allowed).
-    if ((pos_ == start + 1) || (file_->content.data[start + 1] == '_')) {
-      location_.column -= (pos_ - start);
-      pos_ = start;
+    if ((pos() == start + 1) || (at(start + 1) == '_')) {
+      set_pos(start);
      return {};
    }
  }

-  auto str = file_->content.data_view.substr(start, pos_ - start);
+  auto str = substr(start, pos() - start);
  end_source(source);

  auto t = check_keyword(source, str);
@ -795,182 +818,138 @@ Token Lexer::try_punctuation() {
  auto source = begin_source();
  auto type = Token::Type::kUninitialized;

-  if (matches(pos_, "@")) {
+  if (matches(pos(), "@")) {
    type = Token::Type::kAttr;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "(")) {
+    advance(1);
+  } else if (matches(pos(), "(")) {
    type = Token::Type::kParenLeft;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, ")")) {
+    advance(1);
+  } else if (matches(pos(), ")")) {
    type = Token::Type::kParenRight;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "[")) {
+    advance(1);
+  } else if (matches(pos(), "[")) {
    type = Token::Type::kBracketLeft;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "]")) {
+    advance(1);
+  } else if (matches(pos(), "]")) {
    type = Token::Type::kBracketRight;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "{")) {
+    advance(1);
+  } else if (matches(pos(), "{")) {
    type = Token::Type::kBraceLeft;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "}")) {
+    advance(1);
+  } else if (matches(pos(), "}")) {
    type = Token::Type::kBraceRight;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "&&")) {
+    advance(1);
+  } else if (matches(pos(), "&&")) {
    type = Token::Type::kAndAnd;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "&=")) {
+    advance(2);
+  } else if (matches(pos(), "&=")) {
    type = Token::Type::kAndEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "&")) {
+    advance(2);
+  } else if (matches(pos(), "&")) {
    type = Token::Type::kAnd;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "/=")) {
+    advance(1);
+  } else if (matches(pos(), "/=")) {
    type = Token::Type::kDivisionEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "/")) {
+    advance(2);
+  } else if (matches(pos(), "/")) {
    type = Token::Type::kForwardSlash;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "!=")) {
+    advance(1);
+  } else if (matches(pos(), "!=")) {
    type = Token::Type::kNotEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "!")) {
+    advance(2);
+  } else if (matches(pos(), "!")) {
    type = Token::Type::kBang;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, ":")) {
+    advance(1);
+  } else if (matches(pos(), ":")) {
    type = Token::Type::kColon;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, ",")) {
+    advance(1);
+  } else if (matches(pos(), ",")) {
    type = Token::Type::kComma;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "==")) {
+    advance(1);
+  } else if (matches(pos(), "==")) {
    type = Token::Type::kEqualEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "=")) {
+    advance(2);
+  } else if (matches(pos(), "=")) {
    type = Token::Type::kEqual;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, ">=")) {
+    advance(1);
+  } else if (matches(pos(), ">=")) {
    type = Token::Type::kGreaterThanEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, ">>")) {
+    advance(2);
+  } else if (matches(pos(), ">>")) {
    type = Token::Type::kShiftRight;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, ">")) {
+    advance(2);
+  } else if (matches(pos(), ">")) {
    type = Token::Type::kGreaterThan;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "<=")) {
+    advance(1);
+  } else if (matches(pos(), "<=")) {
    type = Token::Type::kLessThanEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "<<")) {
+    advance(2);
+  } else if (matches(pos(), "<<")) {
    type = Token::Type::kShiftLeft;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "<")) {
+    advance(2);
+  } else if (matches(pos(), "<")) {
    type = Token::Type::kLessThan;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "%=")) {
+    advance(1);
+  } else if (matches(pos(), "%=")) {
    type = Token::Type::kModuloEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "%")) {
+    advance(2);
+  } else if (matches(pos(), "%")) {
    type = Token::Type::kMod;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "->")) {
+    advance(1);
+  } else if (matches(pos(), "->")) {
    type = Token::Type::kArrow;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "--")) {
+    advance(2);
+  } else if (matches(pos(), "--")) {
    type = Token::Type::kMinusMinus;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "-=")) {
+    advance(2);
+  } else if (matches(pos(), "-=")) {
    type = Token::Type::kMinusEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "-")) {
+    advance(2);
+  } else if (matches(pos(), "-")) {
    type = Token::Type::kMinus;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, ".")) {
+    advance(1);
+  } else if (matches(pos(), ".")) {
    type = Token::Type::kPeriod;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "++")) {
+    advance(1);
+  } else if (matches(pos(), "++")) {
    type = Token::Type::kPlusPlus;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "+=")) {
+    advance(2);
+  } else if (matches(pos(), "+=")) {
    type = Token::Type::kPlusEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "+")) {
+    advance(2);
+  } else if (matches(pos(), "+")) {
    type = Token::Type::kPlus;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "||")) {
+    advance(1);
+  } else if (matches(pos(), "||")) {
    type = Token::Type::kOrOr;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "|=")) {
+    advance(2);
+  } else if (matches(pos(), "|=")) {
    type = Token::Type::kOrEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "|")) {
+    advance(2);
+  } else if (matches(pos(), "|")) {
    type = Token::Type::kOr;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, ";")) {
+    advance(1);
+  } else if (matches(pos(), ";")) {
    type = Token::Type::kSemicolon;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "*=")) {
+    advance(1);
+  } else if (matches(pos(), "*=")) {
    type = Token::Type::kTimesEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "*")) {
+    advance(2);
+  } else if (matches(pos(), "*")) {
    type = Token::Type::kStar;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "~")) {
+    advance(1);
+  } else if (matches(pos(), "~")) {
    type = Token::Type::kTilde;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "_")) {
+    advance(1);
+  } else if (matches(pos(), "_")) {
    type = Token::Type::kUnderscore;
-    pos_ += 1;
-    location_.column += 1;
-  } else if (matches(pos_, "^=")) {
+    advance(1);
+  } else if (matches(pos(), "^=")) {
    type = Token::Type::kXorEqual;
-    pos_ += 2;
-    location_.column += 2;
-  } else if (matches(pos_, "^")) {
+    advance(2);
+  } else if (matches(pos(), "^")) {
    type = Token::Type::kXor;
-    pos_ += 1;
-    location_.column += 1;
+    advance(1);
  }

  end_source(source);
--- a/src/tint/reader/wgsl/lexer.h
+++ b/src/tint/reader/wgsl/lexer.h
@ -67,8 +67,26 @@ class Lexer {
  Source begin_source() const;
  void end_source(Source&) const;

+  /// @returns view of current line
+  const std::string_view line() const;
+  /// @returns position in current line
+  size_t pos() const;
+  /// @returns length of current line
+  size_t length() const;
+  /// @returns reference to character at `pos` within current line
+  const char& at(size_t pos) const;
+  /// @returns substring view at `offset` within current line of length `count`
+  std::string_view substr(size_t offset, size_t count);
+  /// advances current position by `offset` within current line
+  void advance(size_t offset = 1);
+  /// sets current position to `pos` within current line
+  void set_pos(size_t pos);
+  /// advances current position to next line
+  void advance_line();
  /// @returns true if the end of the input has been reached.
  bool is_eof() const;
+  /// @returns true if the end of the current line has been reached.
+  bool is_eol() const;
  /// @returns true if there is another character on the input and
  /// it is not null.
  bool is_null() const;
@ -78,14 +96,11 @@ class Lexer {
  /// @param ch a character
  /// @returns true if 'ch' is a hexadecimal digit
  bool is_hex(char ch) const;
+  /// @returns true if string at `pos` matches `substr`
  bool matches(size_t pos, std::string_view substr);

  /// The source file content
  Source::File const* const file_;
-  /// The length of the input
-  uint32_t len_ = 0;
-  /// The current position in utf-8 code units (bytes) within the input
-  uint32_t pos_ = 0;
  /// The current location within the input
  Source::Location location_;
 };