Optimize Source by removing a std::string

The `std::string file_path` carried around by every Source was very expensive for heap allocations. Push this into the Source::File. Bug: tint:1383 Change-Id: Id9e3bdef1cf24aea5f3c83f348c05f5cf8ef4bbb Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/78321 Reviewed-by: Antonio Maiorano <amaiorano@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Kokoro: Ben Clayton <bclayton@google.com>
2025-10-25 11:10:29 +00:00 · 2022-01-27 17:36:27 +00:00 · 2022-01-27 17:36:27 +00:00 · df9900c43e
commit df9900c43e
parent 792897422d
13 changed files with 260 additions and 172 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -40,6 +40,8 @@ endfunction()
 add_library(tint_diagnostic_utils
  debug.cc
  debug.h
+  source.h
+  source.cc
  diagnostic/diagnostic.cc
  diagnostic/diagnostic.h
  diagnostic/formatter.cc
@ -286,8 +288,6 @@ set(TINT_LIB_SRCS
  sem/struct.cc
  sem/type_mappings.h
  sem/variable.cc
-  source.cc
-  source.h
  symbol_table.cc
  symbol_table.h
  symbol.cc
@ -686,6 +686,7 @@ if(TINT_BUILD_TESTS)
    clone_context_test.cc
    debug_test.cc
    demangler_test.cc
+    diagnostic/diagnostic_test.cc
    diagnostic/formatter_test.cc
    diagnostic/printer_test.cc
    intrinsic_table_test.cc
--- a/src/debug.cc
+++ b/src/debug.cc
@ -32,7 +32,8 @@ InternalCompilerError::InternalCompilerError(const char* file,
    : file_(file), line_(line), system_(system), diagnostics_(diagnostics) {}

 InternalCompilerError::~InternalCompilerError() {
-  Source source{Source::Range{Source::Location{line_}}, file_};
+  Source source{Source::Range{{line_}}, new Source::File{file_, ""}};
+  diagnostics_.own_file(source.file);
  diagnostics_.add_ice(system_, msg_.str(), source);

  if (ice_reporter) {
--- a/src/diagnostic/diagnostic.cc
+++ b/src/diagnostic/diagnostic.cc
@ -14,6 +14,8 @@

 #include "src/diagnostic/diagnostic.h"

+#include <unordered_map>
+
 #include "src/diagnostic/formatter.h"

 namespace tint {
@ -21,13 +23,39 @@ namespace diag {

 List::List() = default;
 List::List(std::initializer_list<Diagnostic> list) : entries_(list) {}
-List::List(const List&) = default;
-List::List(List&&) = default;
+List::List(const List& rhs) {
+  *this = rhs;
+}
+
+List::List(List&& rhs) = default;

 List::~List() = default;

-List& List::operator=(const List&) = default;
-List& List::operator=(List&&) = default;
+List& List::operator=(const List& rhs) {
+  // Create copies of any of owned files, maintaining a map of rhs-file to
+  // new-file.
+  std::unordered_map<const Source::File*, const Source::File*> files;
+  owned_files_.reserve(rhs.owned_files_.size());
+  files.reserve(rhs.owned_files_.size());
+  for (auto& rhs_file : rhs.owned_files_) {
+    auto file = std::make_unique<Source::File>(*rhs_file);
+    files.emplace(rhs_file.get(), file.get());
+    owned_files_.emplace_back(std::move(file));
+  }
+
+  // Copy the diagnostic entries, then fix up pointers to the file copies.
+  entries_ = rhs.entries_;
+  for (auto& entry : entries_) {
+    if (auto it = files.find(entry.source.file); it != files.end()) {
+      entry.source.file = it->second;
+    }
+  }
+
+  error_count_ = rhs.error_count_;
+  return *this;
+}
+
+List& List::operator=(List&& rhs) = default;

 std::string List::str() const {
  diag::Formatter::Style style;
--- a/src/diagnostic/diagnostic.h
+++ b/src/diagnostic/diagnostic.h
@ -15,6 +15,7 @@
 #ifndef SRC_DIAGNOSTIC_DIAGNOSTIC_H_
 #define SRC_DIAGNOSTIC_DIAGNOSTIC_H_

+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@ -87,6 +88,8 @@ class List {
  /// Move constructor. Moves the diagnostics from `list` into this list.
  /// @param list the list of diagnostics to move into this list.
  List(List&& list);
+
+  /// Destructor
  ~List();

  /// Assignment operator. Copies the diagnostics from `list` into this list.
@ -205,6 +208,13 @@ class List {
    add(std::move(ice));
  }

+  /// Adds the file to the list of files owned by this diagnostic list.
+  /// When this list is destructed, all the owned files will be deleted.
+  /// @param file the file that this List should own
+  void own_file(const Source::File* file) {
+    owned_files_.emplace_back(std::unique_ptr<const Source::File>(file));
+  }
+
  /// @returns true iff the diagnostic list contains errors diagnostics (or of
  /// higher severity).
  bool contains_errors() const { return error_count_ > 0; }
@ -222,6 +232,7 @@ class List {

 private:
  std::vector<Diagnostic> entries_;
+  std::vector<std::unique_ptr<const Source::File>> owned_files_;
  size_t error_count_ = 0;
 };

--- a/src/diagnostic/diagnostic_test.cc
+++ b/src/diagnostic/diagnostic_test.cc
@ -0,0 +1,65 @@
+// Copyright 2020 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/diagnostic/formatter.h"
+
+#include "gtest/gtest.h"
+#include "src/diagnostic/diagnostic.h"
+
+namespace tint {
+namespace diag {
+namespace {
+
+TEST(DiagListTest, UnownedFilesNotCopied) {
+  Source::File file{"path", "content"};
+
+  diag::List list_a, list_b;
+  {
+    diag::Diagnostic diag{};
+    diag.source = Source{Source::Range{{0, 0}}, &file};
+    list_a.add(std::move(diag));
+  }
+
+  list_b = list_a;
+
+  ASSERT_EQ(list_b.count(), list_a.count());
+  EXPECT_EQ(list_b.begin()->source.file, &file);
+}
+
+TEST(DiagListTest, OwnedFilesCopied) {
+  auto* file = new Source::File{"path", "content"};
+
+  diag::List list_a, list_b;
+  {
+    diag::Diagnostic diag{};
+    diag.source = Source{Source::Range{{0, 0}}, file};
+    list_a.add(std::move(diag));
+    list_a.own_file(file);
+  }
+
+  list_b = list_a;
+
+  ASSERT_EQ(list_b.count(), list_a.count());
+  EXPECT_NE(list_b.begin()->source.file, file);
+  ASSERT_NE(list_b.begin()->source.file, nullptr);
+  EXPECT_EQ(list_b.begin()->source.file->path, file->path);
+  EXPECT_EQ(list_b.begin()->source.file->content.data, file->content.data);
+  EXPECT_EQ(list_b.begin()->source.file->content.data_view,
+            file->content.data_view);
+  EXPECT_EQ(list_b.begin()->source.file->content.lines, file->content.lines);
+}
+
+}  // namespace
+}  // namespace diag
+}  // namespace tint
--- a/src/diagnostic/formatter.cc
+++ b/src/diagnostic/formatter.cc
@ -142,12 +142,12 @@ void Formatter::format(const Diagnostic& diag, State& state) const {
  std::vector<TextAndColor> prefix;
  prefix.reserve(6);

-  if (style_.print_file && !src.file_path.empty()) {
+  if (style_.print_file && src.file != nullptr) {
    if (rng.begin.line > 0) {
-      prefix.emplace_back(TextAndColor{src.file_path + ":" + to_str(rng.begin),
+      prefix.emplace_back(TextAndColor{src.file->path + ":" + to_str(rng.begin),
                                       Color::kDefault});
    } else {
-      prefix.emplace_back(TextAndColor{src.file_path, Color::kDefault});
+      prefix.emplace_back(TextAndColor{src.file->path, Color::kDefault});
    }
  } else if (rng.begin.line > 0) {
    prefix.emplace_back(TextAndColor{to_str(rng.begin), Color::kDefault});
@ -190,15 +190,15 @@ void Formatter::format(const Diagnostic& diag, State& state) const {
  }
  state << diag.message;

-  if (style_.print_line && src.file_content != nullptr && rng.begin.line > 0) {
+  if (style_.print_line && src.file && rng.begin.line > 0) {
    state.newline();
    state.set_style({Color::kDefault, false});

    for (size_t line_num = rng.begin.line;
         (line_num <= rng.end.line) &&
-         (line_num <= src.file_content->lines.size());
+         (line_num <= src.file->content.lines.size());
         line_num++) {
-      auto& line = src.file_content->lines[line_num - 1];
+      auto& line = src.file->content.lines[line_num - 1];
      auto line_len = line.size();

      for (auto c : line) {
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc
@ -53,10 +53,9 @@ uint32_t hex_value(char c) {

 }  // namespace

-Lexer::Lexer(const std::string& file_path, const Source::FileContent* content)
-    : file_path_(file_path),
-      content_(content),
-      len_(static_cast<uint32_t>(content->data.size())),
+Lexer::Lexer(const Source::File* file)
+    : file_(file),
+      len_(static_cast<uint32_t>(file->content.data.size())),
      location_{1, 1} {}

 Lexer::~Lexer() = default;
@ -96,8 +95,7 @@ Token Lexer::next() {

 Source Lexer::begin_source() const {
  Source src{};
-  src.file_path = file_path_;
-  src.file_content = content_;
+  src.file = file_;
  src.range.begin = location_;
  src.range.end = location_;
  return src;
@ -112,7 +110,7 @@ bool Lexer::is_eof() const {
 }

 bool Lexer::is_null() const {
-  return (pos_ < len_) && (content_->data[pos_] == 0);
+  return (pos_ < len_) && (file_->content.data[pos_] == 0);
 }

 bool Lexer::is_alpha(char ch) const {
@ -134,13 +132,13 @@ bool Lexer::is_hex(char ch) const {
 bool Lexer::matches(size_t pos, std::string_view substr) {
  if (pos >= len_)
    return false;
-  return content_->data_view.substr(pos, substr.size()) == substr;
+  return file_->content.data_view.substr(pos, substr.size()) == substr;
 }

 Token Lexer::skip_whitespace_and_comments() {
  for (;;) {
    auto pos = pos_;
-    while (!is_eof() && is_whitespace(content_->data[pos_])) {
+    while (!is_eof() && is_whitespace(file_->content.data[pos_])) {
      if (matches(pos_, "\n")) {
        pos_++;
        location_.line++;
@ -235,7 +233,7 @@ Token Lexer::try_float() {
  if (matches(end, "-")) {
    end++;
  }
-  while (end < len_ && is_digit(content_->data[end])) {
+  while (end < len_ && is_digit(file_->content.data[end])) {
    has_mantissa_digits = true;
    end++;
  }
@ -246,7 +244,7 @@ Token Lexer::try_float() {
    end++;
  }

-  while (end < len_ && is_digit(content_->data[end])) {
+  while (end < len_ && is_digit(file_->content.data[end])) {
    has_mantissa_digits = true;
    end++;
  }
@ -263,14 +261,14 @@ Token Lexer::try_float() {
      end++;
    }

-    while (end < len_ && isdigit(content_->data[end])) {
+    while (end < len_ && isdigit(file_->content.data[end])) {
      has_exponent = true;
      end++;
    }

    // If an 'e' or 'E' was present, then the number part must also be present.
    if (!has_exponent) {
-      const auto str = content_->data.substr(start, end - start);
+      const auto str = file_->content.data.substr(start, end - start);
      return {Token::Type::kError, source,
              "incomplete exponent for floating point literal: " + str};
    }
@ -288,14 +286,14 @@ Token Lexer::try_float() {
  }

  // Save the error string, for use by diagnostics.
-  const auto str = content_->data.substr(start, end - start);
+  const auto str = file_->content.data.substr(start, end - start);

  pos_ = end;
  location_.column += (end - start);

  end_source(source);

-  auto res = strtod(content_->data.c_str() + start, nullptr);
+  auto res = strtod(file_->content.data.c_str() + start, nullptr);
  // This errors out if a non-zero magnitude is too small to represent in a
  // float. It can't be represented faithfully in an f32.
  const auto magnitude = std::fabs(res);
@ -386,7 +384,7 @@ Token Lexer::try_hex_float() {

  // Collect integer range (if any)
  auto integer_range = std::make_pair(end, end);
-  while (end < len_ && is_hex(content_->data[end])) {
+  while (end < len_ && is_hex(file_->content.data[end])) {
    integer_range.second = ++end;
  }

@ -399,7 +397,7 @@ Token Lexer::try_hex_float() {

  // Collect fractional range (if any)
  auto fractional_range = std::make_pair(end, end);
-  while (end < len_ && is_hex(content_->data[end])) {
+  while (end < len_ && is_hex(file_->content.data[end])) {
    fractional_range.second = ++end;
  }

@ -429,7 +427,7 @@ Token Lexer::try_hex_float() {
  // The magnitude is zero if and only if seen_prior_one_bits is false.
  bool seen_prior_one_bits = false;
  for (auto i = integer_range.first; i < integer_range.second; ++i) {
-    const auto nibble = hex_value(content_->data[i]);
+    const auto nibble = hex_value(file_->content.data[i]);
    if (nibble != 0) {
      has_zero_integer = false;
    }
@ -455,7 +453,7 @@ Token Lexer::try_hex_float() {
  // Parse fractional part
  // [0-9a-fA-F]*
  for (auto i = fractional_range.first; i < fractional_range.second; ++i) {
-    auto nibble = hex_value(content_->data[i]);
+    auto nibble = hex_value(file_->content.data[i]);
    for (int32_t bit = 3; bit >= 0; --bit) {
      auto v = 1 & (nibble >> bit);

@ -503,10 +501,11 @@ Token Lexer::try_hex_float() {
    // Allow overflow (in uint32_t) when the floating point value magnitude is
    // zero.
    bool has_exponent_digits = false;
-    while (end < len_ && isdigit(content_->data[end])) {
+    while (end < len_ && isdigit(file_->content.data[end])) {
      has_exponent_digits = true;
      auto prev_exponent = input_exponent;
-      input_exponent = (input_exponent * 10) + dec_value(content_->data[end]);
+      input_exponent =
+          (input_exponent * 10) + dec_value(file_->content.data[end]);
      // Check if we've overflowed input_exponent. This only matters when
      // the mantissa is non-zero.
      if (!is_zero && (prev_exponent > input_exponent)) {
@ -618,13 +617,13 @@ Token Lexer::build_token_from_int_if_possible(Source source,
                                              size_t start,
                                              size_t end,
                                              int32_t base) {
-  auto res = strtoll(content_->data.c_str() + start, nullptr, base);
+  auto res = strtoll(file_->content.data.c_str() + start, nullptr, base);
  if (matches(pos_, "u")) {
    if (static_cast<uint64_t>(res) >
        static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) {
-      return {
-          Token::Type::kError, source,
-          "u32 (" + content_->data.substr(start, end - start) + ") too large"};
+      return {Token::Type::kError, source,
+              "u32 (" + file_->content.data.substr(start, end - start) +
+                  ") too large"};
    }
    pos_ += 1;
    location_.column += 1;
@ -633,14 +632,14 @@ Token Lexer::build_token_from_int_if_possible(Source source,
  }

  if (res < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
-    return {
-        Token::Type::kError, source,
-        "i32 (" + content_->data.substr(start, end - start) + ") too small"};
+    return {Token::Type::kError, source,
+            "i32 (" + file_->content.data.substr(start, end - start) +
+                ") too small"};
  }
  if (res > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
-    return {
-        Token::Type::kError, source,
-        "i32 (" + content_->data.substr(start, end - start) + ") too large"};
+    return {Token::Type::kError, source,
+            "i32 (" + file_->content.data.substr(start, end - start) +
+                ") too large"};
  }
  end_source(source);
  return {source, static_cast<int32_t>(res)};
@ -664,14 +663,14 @@ Token Lexer::try_hex_integer() {
  }

  auto first = end;
-  while (!is_eof() && is_hex(content_->data[end])) {
+  while (!is_eof() && is_hex(file_->content.data[end])) {
    end++;

    auto digits = end - first;
    if (digits > kMaxDigits) {
      return {Token::Type::kError, source,
              "integer literal (" +
-                  content_->data.substr(start, end - 1 - start) +
+                  file_->content.data.substr(start, end - 1 - start) +
                  "...) has too many digits"};
    }
  }
@ -697,7 +696,7 @@ Token Lexer::try_integer() {
    end++;
  }

-  if (end >= len_ || !is_digit(content_->data[end])) {
+  if (end >= len_ || !is_digit(file_->content.data[end])) {
    return {};
  }

@ -706,20 +705,21 @@ Token Lexer::try_integer() {
  // are not allowed.
  auto next = first + 1;
  if (next < len_) {
-    if (content_->data[first] == '0' && is_digit(content_->data[next])) {
+    if (file_->content.data[first] == '0' &&
+        is_digit(file_->content.data[next])) {
      return {Token::Type::kError, source,
              "integer literal (" +
-                  content_->data.substr(start, end - 1 - start) +
+                  file_->content.data.substr(start, end - 1 - start) +
                  "...) has leading 0s"};
    }
  }

-  while (end < len_ && is_digit(content_->data[end])) {
+  while (end < len_ && is_digit(file_->content.data[end])) {
    auto digits = end - first;
    if (digits > kMaxDigits) {
      return {Token::Type::kError, source,
              "integer literal (" +
-                  content_->data.substr(start, end - 1 - start) +
+                  file_->content.data.substr(start, end - 1 - start) +
                  "...) has too many digits"};
    }

@ -734,29 +734,30 @@ Token Lexer::try_integer() {

 Token Lexer::try_ident() {
  // Must begin with an a-zA-Z_
-  if (!(is_alpha(content_->data[pos_]) || content_->data[pos_] == '_')) {
+  if (!(is_alpha(file_->content.data[pos_]) ||
+        file_->content.data[pos_] == '_')) {
    return {};
  }

  auto source = begin_source();

  auto s = pos_;
-  while (!is_eof() && is_alphanum_underscore(content_->data[pos_])) {
+  while (!is_eof() && is_alphanum_underscore(file_->content.data[pos_])) {
    pos_++;
    location_.column++;
  }

-  if (content_->data[s] == '_') {
+  if (file_->content.data[s] == '_') {
    // Check for an underscore on its own (special token), or a
    // double-underscore (not allowed).
-    if ((pos_ == s + 1) || (content_->data[s + 1] == '_')) {
+    if ((pos_ == s + 1) || (file_->content.data[s + 1] == '_')) {
      location_.column -= (pos_ - s);
      pos_ = s;
      return {};
    }
  }

-  auto str = content_->data_view.substr(s, pos_ - s);
+  auto str = file_->content.data_view.substr(s, pos_ - s);
  end_source(source);

  auto t = check_keyword(source, str);
--- a/src/reader/wgsl/lexer.h
+++ b/src/reader/wgsl/lexer.h
@ -27,9 +27,8 @@ namespace wgsl {
 class Lexer {
 public:
  /// Creates a new Lexer
-  /// @param file_path the path to the file containing the source
-  /// @param content the source content
-  Lexer(const std::string& file_path, const Source::FileContent* content);
+  /// @param file the source file
+  explicit Lexer(const Source::File* file);
  ~Lexer();

  /// Returns the next token in the input stream.
@ -91,10 +90,8 @@ class Lexer {
  bool is_alphanum_underscore(char ch) const;
  bool matches(size_t pos, std::string_view substr);

-  /// The source file path
-  std::string const file_path_;
  /// The source file content
-  Source::FileContent const* const content_;
+  Source::File const* const file_;
  /// The length of the input
  uint32_t len_ = 0;
  /// The current position within the input
--- a/src/reader/wgsl/lexer_test.cc
+++ b/src/reader/wgsl/lexer_test.cc
@ -26,15 +26,15 @@ namespace {
 using LexerTest = testing::Test;

 TEST_F(LexerTest, Empty) {
-  Source::FileContent content("");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "");
+  Lexer l(&file);
  auto t = l.next();
  EXPECT_TRUE(t.IsEof());
 }

 TEST_F(LexerTest, Skips_Whitespace) {
-  Source::FileContent content("\t\r\n\t    ident\t\n\t  \r ");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "\t\r\n\t    ident\t\n\t  \r ");
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsIdentifier());
@ -49,11 +49,11 @@ TEST_F(LexerTest, Skips_Whitespace) {
 }

 TEST_F(LexerTest, Skips_Comments_Line) {
-  Source::FileContent content(R"(//starts with comment
+  Source::File file("", R"(//starts with comment
 ident1 //ends with comment
 // blank line
 ident2)");
-  Lexer l("test.wgsl", &content);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsIdentifier());
@ -76,9 +76,9 @@ ident1 //ends with comment
 }

 TEST_F(LexerTest, Skips_Comments_Block) {
-  Source::FileContent content(R"(/* comment
+  Source::File file("", R"(/* comment
 text */ident)");
-  Lexer l("test.wgsl", &content);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsIdentifier());
@ -93,10 +93,10 @@ text */ident)");
 }

 TEST_F(LexerTest, Skips_Comments_Block_Nested) {
-  Source::FileContent content(R"(/* comment
+  Source::File file("", R"(/* comment
 text // nested line comments are ignored /* more text
 /////**/ */*/ident)");
-  Lexer l("test.wgsl", &content);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsIdentifier());
@ -113,11 +113,11 @@ text // nested line comments are ignored /* more text
 TEST_F(LexerTest, Skips_Comments_Block_Unterminated) {
  // I had to break up the /* because otherwise the clang readability check
  // errored out saying it could not find the end of a multi-line comment.
-  Source::FileContent content(R"(
+  Source::File file("", R"(
  /)"
                        R"(*
 abcd)");
-  Lexer l("test.wgsl", &content);
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -129,8 +129,8 @@ abcd)");
 }

 TEST_F(LexerTest, Null_InWhitespace_IsError) {
-  Source::FileContent content(std::string{' ', 0, ' '});
-  Lexer l("test.wgsl", &content);
+  Source::File file("", std::string{' ', 0, ' '});
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsError());
@ -142,8 +142,8 @@ TEST_F(LexerTest, Null_InWhitespace_IsError) {
 }

 TEST_F(LexerTest, Null_InLineComment_IsError) {
-  Source::FileContent content(std::string{'/', '/', ' ', 0, ' '});
-  Lexer l("test.wgsl", &content);
+  Source::File file("", std::string{'/', '/', ' ', 0, ' '});
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsError());
@ -155,8 +155,8 @@ TEST_F(LexerTest, Null_InLineComment_IsError) {
 }

 TEST_F(LexerTest, Null_InBlockComment_IsError) {
-  Source::FileContent content(std::string{'/', '*', ' ', 0, '*', '/'});
-  Lexer l("test.wgsl", &content);
+  Source::File file("", std::string{'/', '*', ' ', 0, '*', '/'});
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsError());
@ -171,8 +171,8 @@ TEST_F(LexerTest, Null_InIdentifier_IsError) {
  // Try inserting a null in an identifier. Other valid token
  // kinds will behave similarly, so use the identifier case
  // as a representative.
-  Source::FileContent content(std::string{'a', 0, 'c'});
-  Lexer l("test.wgsl", &content);
+  Source::File file("", std::string{'a', 0, 'c'});
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsIdentifier());
@ -197,8 +197,8 @@ inline std::ostream& operator<<(std::ostream& out, FloatData data) {
 using FloatTest = testing::TestWithParam<FloatData>;
 TEST_P(FloatTest, Parse) {
  auto params = GetParam();
-  Source::FileContent content(params.input);
-  Lexer l("test.wgsl", &content);
+  Source::File file("", params.input);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.Is(Token::Type::kFloatLiteral));
@ -275,8 +275,8 @@ INSTANTIATE_TEST_SUITE_P(LexerTest,

 using FloatTest_Invalid = testing::TestWithParam<const char*>;
 TEST_P(FloatTest_Invalid, Handles) {
-  Source::FileContent content(GetParam());
-  Lexer l("test.wgsl", &content);
+  Source::File file("", GetParam());
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_FALSE(t.Is(Token::Type::kFloatLiteral));
@ -317,8 +317,8 @@ INSTANTIATE_TEST_SUITE_P(

 using IdentifierTest = testing::TestWithParam<const char*>;
 TEST_P(IdentifierTest, Parse) {
-  Source::FileContent content(GetParam());
-  Lexer l("test.wgsl", &content);
+  Source::File file("", GetParam());
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.IsIdentifier());
@ -343,24 +343,24 @@ INSTANTIATE_TEST_SUITE_P(LexerTest,
                                         "alldigits_0123456789"));

 TEST_F(LexerTest, IdentifierTest_SingleUnderscoreDoesNotMatch) {
-  Source::FileContent content("_");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "_");
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_FALSE(t.IsIdentifier());
 }

 TEST_F(LexerTest, IdentifierTest_DoesNotStartWithDoubleUnderscore) {
-  Source::FileContent content("__test");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "__test");
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_FALSE(t.IsIdentifier());
 }

 TEST_F(LexerTest, IdentifierTest_DoesNotStartWithNumber) {
-  Source::FileContent content("01test");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "01test");
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_FALSE(t.IsIdentifier());
@ -378,8 +378,8 @@ inline std::ostream& operator<<(std::ostream& out, HexSignedIntData data) {
 using IntegerTest_HexSigned = testing::TestWithParam<HexSignedIntData>;
 TEST_P(IntegerTest_HexSigned, Matches) {
  auto params = GetParam();
-  Source::FileContent content(params.input);
-  Lexer l("test.wgsl", &content);
+  Source::File file("", params.input);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.Is(Token::Type::kSintLiteral));
@ -408,8 +408,8 @@ INSTANTIATE_TEST_SUITE_P(

 TEST_F(LexerTest, HexPrefixOnly_IsError) {
  // Could be the start of a hex integer or hex float, but is neither.
-  Source::FileContent content("0x");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "0x");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -419,8 +419,8 @@ TEST_F(LexerTest, HexPrefixOnly_IsError) {

 TEST_F(LexerTest, HexPrefixUpperCaseOnly_IsError) {
  // Could be the start of a hex integer or hex float, but is neither.
-  Source::FileContent content("0X");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "0X");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -430,8 +430,8 @@ TEST_F(LexerTest, HexPrefixUpperCaseOnly_IsError) {

 TEST_F(LexerTest, NegativeHexPrefixOnly_IsError) {
  // Could be the start of a hex integer or hex float, but is neither.
-  Source::FileContent content("-0x");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "-0x");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -441,8 +441,8 @@ TEST_F(LexerTest, NegativeHexPrefixOnly_IsError) {

 TEST_F(LexerTest, NegativeHexPrefixUpperCaseOnly_IsError) {
  // Could be the start of a hex integer or hex float, but is neither.
-  Source::FileContent content("-0X");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "-0X");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -451,8 +451,8 @@ TEST_F(LexerTest, NegativeHexPrefixUpperCaseOnly_IsError) {
 }

 TEST_F(LexerTest, IntegerTest_HexSignedTooLarge) {
-  Source::FileContent content("0x80000000");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "0x80000000");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -460,8 +460,8 @@ TEST_F(LexerTest, IntegerTest_HexSignedTooLarge) {
 }

 TEST_F(LexerTest, IntegerTest_HexSignedTooSmall) {
-  Source::FileContent content("-0x8000000F");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "-0x8000000F");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -470,8 +470,8 @@ TEST_F(LexerTest, IntegerTest_HexSignedTooSmall) {

 TEST_F(LexerTest, IntegerTest_HexSignedTooManyDigits) {
  {
-    Source::FileContent content("-0x100000000000000000000000");
-    Lexer l("test.wgsl", &content);
+    Source::File file("", "-0x100000000000000000000000");
+    Lexer l(&file);

    auto t = l.next();
    ASSERT_TRUE(t.Is(Token::Type::kError));
@ -479,8 +479,8 @@ TEST_F(LexerTest, IntegerTest_HexSignedTooManyDigits) {
              "integer literal (-0x10000000...) has too many digits");
  }
  {
-    Source::FileContent content("0x100000000000000");
-    Lexer l("test.wgsl", &content);
+    Source::File file("", "0x100000000000000");
+    Lexer l(&file);

    auto t = l.next();
    ASSERT_TRUE(t.Is(Token::Type::kError));
@ -500,8 +500,8 @@ inline std::ostream& operator<<(std::ostream& out, HexUnsignedIntData data) {
 using IntegerTest_HexUnsigned = testing::TestWithParam<HexUnsignedIntData>;
 TEST_P(IntegerTest_HexUnsigned, Matches) {
  auto params = GetParam();
-  Source::FileContent content(params.input);
-  Lexer l("test.wgsl", &content);
+  Source::File file("", params.input);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.Is(Token::Type::kUintLiteral));
@ -526,8 +526,8 @@ INSTANTIATE_TEST_SUITE_P(
                                       std::numeric_limits<uint32_t>::max()}));

 TEST_F(LexerTest, IntegerTest_HexUnsignedTooManyDigits) {
-  Source::FileContent content("0x1000000000000000000000u");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "0x1000000000000000000000u");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -545,8 +545,8 @@ inline std::ostream& operator<<(std::ostream& out, UnsignedIntData data) {
 using IntegerTest_Unsigned = testing::TestWithParam<UnsignedIntData>;
 TEST_P(IntegerTest_Unsigned, Matches) {
  auto params = GetParam();
-  Source::FileContent content(params.input);
-  Lexer l("test.wgsl", &content);
+  Source::File file("", params.input);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.Is(Token::Type::kUintLiteral));
@ -564,8 +564,8 @@ INSTANTIATE_TEST_SUITE_P(LexerTest,
                                                         4294967295u}));

 TEST_F(LexerTest, IntegerTest_UnsignedTooManyDigits) {
-  Source::FileContent content("10000000000000000000000u");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "10000000000000000000000u");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -583,8 +583,8 @@ inline std::ostream& operator<<(std::ostream& out, SignedIntData data) {
 using IntegerTest_Signed = testing::TestWithParam<SignedIntData>;
 TEST_P(IntegerTest_Signed, Matches) {
  auto params = GetParam();
-  Source::FileContent content(params.input);
-  Lexer l("test.wgsl", &content);
+  Source::File file("", params.input);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.Is(Token::Type::kSintLiteral));
@ -605,8 +605,8 @@ INSTANTIATE_TEST_SUITE_P(
                    SignedIntData{"-2147483648", -2147483648LL}));

 TEST_F(LexerTest, IntegerTest_SignedTooManyDigits) {
-  Source::FileContent content("-10000000000000000");
-  Lexer l("test.wgsl", &content);
+  Source::File file("", "-10000000000000000");
+  Lexer l(&file);

  auto t = l.next();
  ASSERT_TRUE(t.Is(Token::Type::kError));
@ -615,8 +615,8 @@ TEST_F(LexerTest, IntegerTest_SignedTooManyDigits) {

 using IntegerTest_Invalid = testing::TestWithParam<const char*>;
 TEST_P(IntegerTest_Invalid, Parses) {
-  Source::FileContent content(GetParam());
-  Lexer l("test.wgsl", &content);
+  Source::File file("", GetParam());
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_FALSE(t.Is(Token::Type::kSintLiteral));
@ -642,8 +642,8 @@ inline std::ostream& operator<<(std::ostream& out, TokenData data) {
 using PunctuationTest = testing::TestWithParam<TokenData>;
 TEST_P(PunctuationTest, Parses) {
  auto params = GetParam();
-  Source::FileContent content(params.input);
-  Lexer l("test.wgsl", &content);
+  Source::File file("", params.input);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.Is(params.type));
@ -701,8 +701,8 @@ INSTANTIATE_TEST_SUITE_P(
 using KeywordTest = testing::TestWithParam<TokenData>;
 TEST_P(KeywordTest, Parses) {
  auto params = GetParam();
-  Source::FileContent content(params.input);
-  Lexer l("test.wgsl", &content);
+  Source::File file("", params.input);
+  Lexer l(&file);

  auto t = l.next();
  EXPECT_TRUE(t.Is(params.type)) << params.input;
--- a/src/reader/wgsl/parser_impl.cc
+++ b/src/reader/wgsl/parser_impl.cc
@ -255,7 +255,7 @@ ParserImpl::VarDeclInfo::VarDeclInfo(Source source_in,
 ParserImpl::VarDeclInfo::~VarDeclInfo() = default;

 ParserImpl::ParserImpl(Source::File const* file)
-    : lexer_(std::make_unique<Lexer>(file->path, &file->content)) {}
+    : lexer_(std::make_unique<Lexer>(file)) {}

 ParserImpl::~ParserImpl() = default;

@ -331,7 +331,7 @@ void ParserImpl::translation_unit() {
    }
    expect_global_decl();
    if (builder_.Diagnostics().error_count() >= max_errors_) {
-      add_error(Source{{}, p.source().file_path},
+      add_error(Source{{}, p.source().file},
                "stopping after " + std::to_string(max_errors_) + " errors");
      break;
    }
--- a/src/source.cc
+++ b/src/source.cc
@ -68,8 +68,8 @@ Source::File::~File() = default;
 std::ostream& operator<<(std::ostream& out, const Source& source) {
  auto rng = source.range;

-  if (!source.file_path.empty()) {
-    out << source.file_path << ":";
+  if (source.file) {
+    out << source.file->path << ":";
  }
  if (rng.begin.line) {
    out << rng.begin.line << ":";
@ -77,7 +77,7 @@ std::ostream& operator<<(std::ostream& out, const Source& source) {
      out << rng.begin.column;
    }

-    if (source.file_content) {
+    if (source.file) {
      out << std::endl << std::endl;

      auto repeat = [&](char c, size_t n) {
@ -87,10 +87,10 @@ std::ostream& operator<<(std::ostream& out, const Source& source) {
      };

      for (size_t line = rng.begin.line; line <= rng.end.line; line++) {
-        if (line < source.file_content->lines.size() + 1) {
-          auto len = source.file_content->lines[line - 1].size();
+        if (line < source.file->content.lines.size() + 1) {
+          auto len = source.file->content.lines[line - 1].size();

-          out << source.file_content->lines[line - 1];
+          out << source.file->content.lines[line - 1];

          out << std::endl;

--- a/src/source.h
+++ b/src/source.h
@ -58,10 +58,16 @@ class Source {
    inline File(const std::string& p, const std::string& c)
        : path(p), content(c) {}

+    /// Copy constructor
+    File(const File&) = default;
+
+    /// Move constructor
+    File(File&&) = default;
+
    /// Destructor
    ~File();

-    /// file path (optional)
+    /// file path
    const std::string path;
    /// file content
    const FileContent content;
@ -152,45 +158,25 @@ class Source {

  /// Constructs the Source with the Range `rng` and File `file`
  /// @param rng the source range
-  /// @param file the source file
-  inline Source(const Range& rng, File const* file)
-      : range(rng), file_path(file->path), file_content(&file->content) {}
-
-  /// Constructs the Source with the Range `rng`, file path `path` and content
-  /// `content`
-  /// @param rng the source range
-  /// @param path the source file path
-  /// @param content the source file content
-  inline Source(const Range& rng,
-                const std::string& path,
-                const FileContent* content = nullptr)
-      : range(rng), file_path(path), file_content(content) {}
+  /// @param f the source file
+  inline Source(const Range& rng, File const* f) : range(rng), file(f) {}

  /// @returns a Source that points to the begin range of this Source.
-  inline Source Begin() const {
-    return Source(Range{range.begin}, file_path, file_content);
-  }
+  inline Source Begin() const { return Source(Range{range.begin}, file); }

  /// @returns a Source that points to the end range of this Source.
-  inline Source End() const {
-    return Source(Range{range.end}, file_path, file_content);
-  }
+  inline Source End() const { return Source(Range{range.end}, file); }

  /// Return a column-shifted Source
  /// @param n the number of characters to shift by
  /// @returns a Source with the range's columns shifted by `n`
-  inline Source operator+(size_t n) const {
-    return Source(range + n, file_path, file_content);
-  }
+  inline Source operator+(size_t n) const { return Source(range + n, file); }

  /// Returns true of `this` Source is lexicographically less than `rhs`
  /// @param rhs source to compare against
  /// @returns true if `this` < `rhs`
  inline bool operator<(const Source& rhs) {
-    if (file_path != rhs.file_path) {
-      return false;
-    }
-    if (file_content != rhs.file_content) {
+    if (file != rhs.file) {
      return false;
    }
    return range.begin < rhs.range.begin;
@ -202,16 +188,13 @@ class Source {
  /// @param end the end source of the range
  /// @returns the combined source
  inline static Source Combine(const Source& start, const Source& end) {
-    return Source(Source::Range(start.range.begin, end.range.end),
-                  start.file_path, start.file_content);
+    return Source(Source::Range(start.range.begin, end.range.end), start.file);
  }

-  /// range is the span of text this source refers to in #file_path
+  /// range is the span of text this source refers to in #file
  Range range;
-  /// file is the optional file path this source refers to
-  std::string file_path;
  /// file is the optional source content this source refers to
-  const FileContent* file_content = nullptr;
+  const File* file = nullptr;
 };

 /// Writes the Source::Location to the std::ostream.
--- a/test/BUILD.gn
+++ b/test/BUILD.gn
@ -215,6 +215,7 @@ tint_unittests_source_set("tint_unittests_ast_src") {

 tint_unittests_source_set("tint_unittests_diagnostic_src") {
  sources = [
+    "../src/diagnostic/diagnostic_test.cc",
    "../src/diagnostic/formatter_test.cc",
    "../src/diagnostic/printer_test.cc",
  ]