dawn-cmake/src/tint/source.cc

// Copyright 2020 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "src/tint/source.h"

#include <algorithm>
#include <sstream>
#include <string_view>
#include <utility>

#include "src/tint/text/unicode.h"

namespace tint {
namespace {

bool ParseLineBreak(std::string_view str,
                    size_t i,
                    bool* is_line_break,
                    size_t* line_break_size) {
  // See https://www.w3.org/TR/WGSL/#blankspace

  auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
  auto [cp, n] = text::utf8::Decode(utf8, str.size() - i);

  if (n == 0) {
    return false;
  }

  static const auto kLF = text::CodePoint(0x000A);    // line feed
  static const auto kVTab = text::CodePoint(0x000B);  // vertical tab
  static const auto kFF = text::CodePoint(0x000C);    // form feed
  static const auto kNL = text::CodePoint(0x0085);    // next line
  static const auto kCR = text::CodePoint(0x000D);    // carriage return
  static const auto kLS = text::CodePoint(0x2028);    // line separator
  static const auto kPS = text::CodePoint(0x2029);    // parargraph separator

  if (cp == kLF || cp == kVTab || cp == kFF || cp == kNL || cp == kPS ||
      cp == kLS) {
    *is_line_break = true;
    *line_break_size = n;
    return true;
  }

  // Handle CRLF as one line break, and CR alone as one line break
  if (cp == kCR) {
    *is_line_break = true;
    *line_break_size = n;

    if (auto next_i = i + n; next_i < str.size()) {
      auto* next_utf8 = reinterpret_cast<const uint8_t*>(&str[next_i]);
      auto [next_cp, next_n] =
          text::utf8::Decode(next_utf8, str.size() - next_i);

      if (next_n == 0) {
        return false;
      }

      if (next_cp == kLF) {
        // CRLF as one break
        *line_break_size = n + next_n;
      }
    }

    return true;
  }

  *is_line_break = false;
  return true;
}

std::vector<std::string_view> SplitLines(std::string_view str) {
  std::vector<std::string_view> lines;

  size_t lineStart = 0;
  for (size_t i = 0; i < str.size();) {
    bool is_line_break{};
    size_t line_break_size{};
    // We don't handle decode errors from ParseLineBreak. Instead, we rely on
    // the Lexer to do so.
    ParseLineBreak(str, i, &is_line_break, &line_break_size);
    if (is_line_break) {
      lines.push_back(str.substr(lineStart, i - lineStart));
      i += line_break_size;
      lineStart = i;
    } else {
      ++i;
    }
  }
  if (lineStart < str.size()) {
    lines.push_back(str.substr(lineStart));
  }

  return lines;
}

std::vector<std::string_view> CopyRelativeStringViews(
    const std::vector<std::string_view>& src_list,
    const std::string_view& src_view,
    const std::string_view& dst_view) {
  std::vector<std::string_view> out(src_list.size());
  for (size_t i = 0; i < src_list.size(); i++) {
    auto offset = static_cast<size_t>(&src_list[i].front() - &src_view.front());
    auto count = src_list[i].length();
    out[i] = dst_view.substr(offset, count);
  }
  return out;
}

}  // namespace

Source::FileContent::FileContent(const std::string& body)
    : data(body), data_view(data), lines(SplitLines(data_view)) {}

Source::FileContent::FileContent(const FileContent& rhs)
    : data(rhs.data),
      data_view(data),
      lines(CopyRelativeStringViews(rhs.lines, rhs.data_view, data_view)) {}

Source::FileContent::~FileContent() = default;

Source::File::~File() = default;

std::ostream& operator<<(std::ostream& out, const Source& source) {
  auto rng = source.range;

  if (source.file) {
    out << source.file->path << ":";
  }
  if (rng.begin.line) {
    out << rng.begin.line << ":";
    if (rng.begin.column) {
      out << rng.begin.column;
    }

    if (source.file) {
      out << std::endl << std::endl;

      auto repeat = [&](char c, size_t n) {
        while (n--) {
          out << c;
        }
      };

      for (size_t line = rng.begin.line; line <= rng.end.line; line++) {
        if (line < source.file->content.lines.size() + 1) {
          auto len = source.file->content.lines[line - 1].size();

          out << source.file->content.lines[line - 1];

          out << std::endl;

          if (line == rng.begin.line && line == rng.end.line) {
            // Single line
            repeat(' ', rng.begin.column - 1);
            repeat('^', std::max<size_t>(rng.end.column - rng.begin.column, 1));
          } else if (line == rng.begin.line) {
            // Start of multi-line
            repeat(' ', rng.begin.column - 1);
            repeat('^', len - (rng.begin.column - 1));
          } else if (line == rng.end.line) {
            // End of multi-line
            repeat('^', rng.end.column - 1);
          } else {
            // Middle of multi-line
            repeat('^', len);
          }

          out << std::endl;
        }
      }
    }
  }
  return out;
}

}  // namespace tint