Move text/unicode into utils.

This CL consolidates the unicode code into utils. These are utility libraries, so the extra namespace doesn't add much. Change-Id: Id0de612b6be036392a3cb018bfe66733f2f1ebcb Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127403 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Reviewed-by: Ben Clayton <bclayton@google.com>
2025-06-05 06:03:34 +00:00 · 2023-04-20 10:06:25 +00:00 · 2023-04-20 10:06:25 +00:00 · 517278ac08
commit 517278ac08
parent 9e9c456075
14 changed files with 53 additions and 77 deletions
--- a/include/tint/tint.h
+++ b/include/tint/tint.h
@ -24,7 +24,6 @@
 #include "src/tint/diagnostic/printer.h"
 #include "src/tint/inspector/inspector.h"
 #include "src/tint/reader/reader.h"
-#include "src/tint/text/unicode.h"
 #include "src/tint/transform/first_index_offset.h"
 #include "src/tint/transform/manager.h"
 #include "src/tint/transform/renamer.h"
@ -32,6 +31,7 @@
 #include "src/tint/transform/substitute_override.h"
 #include "src/tint/transform/vertex_pulling.h"
 #include "src/tint/type/manager.h"
+#include "src/tint/utils/unicode.h"
 #include "src/tint/writer/array_length_from_uniform_options.h"
 #include "src/tint/writer/binding_point.h"
 #include "src/tint/writer/binding_remapper_options.h"
--- a/src/dawn/native/CompilationMessages.cpp
+++ b/src/dawn/native/CompilationMessages.cpp
@ -37,14 +37,14 @@ WGPUCompilationMessageType tintSeverityToMessageType(tint::diag::Severity severi
 }  // anonymous namespace

 ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String) {
-    if (tint::text::utf8::IsASCII(utf8String)) {
+    if (tint::utils::utf8::IsASCII(utf8String)) {
        return utf8String.size();
    }

    uint64_t numberOfUTF16CodeUnits = 0;
    std::string_view remaining = utf8String;
    while (!remaining.empty()) {
-        auto [codePoint, utf8CharacterByteLength] = tint::text::utf8::Decode(remaining);
+        auto [codePoint, utf8CharacterByteLength] = tint::utils::utf8::Decode(remaining);
        // Directly return as something wrong has happened during the UTF-8 decoding.
        if (utf8CharacterByteLength == 0) {
            return DAWN_INTERNAL_ERROR("Fail to decode the unicode string");
@ -87,7 +87,7 @@ void OwnedCompilationMessages::AddMessageForTesting(std::string message,
    ASSERT(mCompilationInfo.messages == nullptr);

    // Message can only contain ascii characters.
-    ASSERT(tint::text::utf8::IsASCII(message));
+    ASSERT(tint::utils::utf8::IsASCII(message));

    mMessageStrings.push_back(message);
    mMessages.push_back({nullptr, nullptr, static_cast<WGPUCompilationMessageType>(type), lineNum,
--- a/src/dawn/tests/unittests/UnicodeTests.cpp
+++ b/src/dawn/tests/unittests/UnicodeTests.cpp
@ -23,7 +23,7 @@ TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
        uint64_t lengthInUTF16;
    };

-    // Referenced from src/tint/text/unicode_test.cc
+    // Referenced from src/tint/utils/unicode_test.cc
    constexpr std::array<TestCase, 12> kTestCases = {{
        {"", 0},
        {"abc", 3},
@ -48,7 +48,7 @@ TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
 }

 TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, InvalidUnicodeString) {
-    // Referenced from src/tint/text/unicode_test.cc
+    // Referenced from src/tint/utils/unicode_test.cc
    constexpr std::array<const char*, 12> kTestCases = {{
        "\xed\xa0\x80",  // CodePoint == 0xD7FF + 1
        "\xed\xbf\xbf",  // CodePoint == 0xE000 - 1
--- a/src/tint/BUILD.gn
+++ b/src/tint/BUILD.gn
@ -243,6 +243,8 @@ libtint_source_set("libtint_base_src") {
    "utils/string_stream.cc",
    "utils/string_stream.h",
    "utils/traits.h",
+    "utils/unicode.cc",
+    "utils/unicode.h",
    "utils/unique_allocator.h",
    "utils/unique_vector.h",
    "utils/vector.h",
@ -255,8 +257,6 @@ libtint_source_set("libtint_base_src") {
  } else {
    sources += [ "diagnostic/printer_other.cc" ]
  }
-
-  deps = [ ":libtint_text_src" ]
 }

 libtint_source_set("libtint_clone_context_hdrs") {
@ -303,7 +303,6 @@ libtint_source_set("libtint_program_src") {
    ":libtint_builtins_src",
    ":libtint_constant_src",
    ":libtint_sem_src",
-    ":libtint_text_src",
    ":libtint_type_src",
  ]
 }
@ -333,13 +332,6 @@ libtint_source_set("libtint_inspector_src") {
  ]
 }

-libtint_source_set("libtint_text_src") {
-  sources = [
-    "text/unicode.cc",
-    "text/unicode.h",
-  ]
-}
-
 libtint_source_set("libtint_transform_src") {
  sources = [
    "transform/add_block_attribute.cc",
@ -451,7 +443,6 @@ libtint_source_set("libtint_transform_src") {
    ":libtint_builtins_src",
    ":libtint_program_src",
    ":libtint_sem_src",
-    ":libtint_text_src",
    ":libtint_type_src",
  ]
 }
@ -974,7 +965,6 @@ libtint_source_set("libtint_wgsl_reader_src") {
    ":libtint_builtins_src",
    ":libtint_program_src",
    ":libtint_reader_src",
-    ":libtint_text_src",
    ":libtint_type_src",
  ]
 }
@ -1089,7 +1079,6 @@ source_set("libtint") {
    ":libtint_inspector_src",
    ":libtint_program_src",
    ":libtint_sem_src",
-    ":libtint_text_src",
    ":libtint_transform_src",
    ":libtint_type_src",
    ":libtint_writer_src",
@ -1511,11 +1500,6 @@ if (tint_build_unittests) {
    ]
  }

-  tint_unittests_source_set("tint_unittests_text_src") {
-    sources = [ "text/unicode_test.cc" ]
-    deps = [ ":libtint_text_src" ]
-  }
-
  tint_unittests_source_set("tint_unittests_transform_src") {
    sources = [
      "transform/add_block_attribute_test.cc",
@ -1609,6 +1593,7 @@ if (tint_build_unittests) {
      "utils/string_test.cc",
      "utils/traits_test.cc",
      "utils/transform_test.cc",
+      "utils/unicode_test.cc",
      "utils/unique_allocator_test.cc",
      "utils/unique_vector_test.cc",
      "utils/vector_test.cc",
@ -2036,7 +2021,6 @@ if (tint_build_unittests) {
      ":tint_unittests_inspector_src",
      ":tint_unittests_resolver_src",
      ":tint_unittests_sem_src",
-      ":tint_unittests_text_src",
      ":tint_unittests_transform_src",
      ":tint_unittests_type_src",
      ":tint_unittests_utils_src",
--- a/src/tint/CMakeLists.txt
+++ b/src/tint/CMakeLists.txt
@ -66,10 +66,10 @@ add_library(tint_diagnostic_utils
  diagnostic/formatter.h
  diagnostic/printer.cc
  diagnostic/printer.h
-  text/unicode.cc
-  text/unicode.h
  utils/debugger.cc
  utils/debugger.h
+  utils/unicode.cc
+  utils/unicode.h
 )
 tint_default_compile_options(tint_diagnostic_utils)

@ -967,7 +967,6 @@ if(TINT_BUILD_TESTS)
    symbol_table_test.cc
    symbol_test.cc
    test_main.cc
-    text/unicode_test.cc
    transform/transform_test.cc
    type/array_test.cc
    type/atomic_test.cc
@ -1014,6 +1013,7 @@ if(TINT_BUILD_TESTS)
    utils/string_test.cc
    utils/traits_test.cc
    utils/transform_test.cc
+    utils/unicode_test.cc
    utils/unique_allocator_test.cc
    utils/unique_vector_test.cc
    utils/vector_test.cc
--- a/src/tint/reader/wgsl/lexer.cc
+++ b/src/tint/reader/wgsl/lexer.cc
@ -28,7 +28,7 @@
 #include "absl/strings/charconv.h"
 #include "src/tint/debug.h"
 #include "src/tint/number.h"
-#include "src/tint/text/unicode.h"
+#include "src/tint/utils/unicode.h"

 namespace tint::reader::wgsl {
 namespace {
@ -45,16 +45,16 @@ bool read_blankspace(std::string_view str, size_t i, bool* is_blankspace, size_t
    // See https://www.w3.org/TR/WGSL/#blankspace

    auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
-    auto [cp, n] = text::utf8::Decode(utf8, str.size() - i);
+    auto [cp, n] = utils::utf8::Decode(utf8, str.size() - i);

    if (n == 0) {
        return false;
    }

-    static const auto kSpace = text::CodePoint(0x0020);  // space
-    static const auto kHTab = text::CodePoint(0x0009);   // horizontal tab
-    static const auto kL2R = text::CodePoint(0x200E);    // left-to-right mark
-    static const auto kR2L = text::CodePoint(0x200F);    // right-to-left mark
+    static const auto kSpace = utils::CodePoint(0x0020);  // space
+    static const auto kHTab = utils::CodePoint(0x0009);   // horizontal tab
+    static const auto kL2R = utils::CodePoint(0x200E);    // left-to-right mark
+    static const auto kR2L = utils::CodePoint(0x200F);    // right-to-left mark

    if (cp == kSpace || cp == kHTab || cp == kL2R || cp == kR2L) {
        *is_blankspace = true;
@ -959,12 +959,12 @@ Token Lexer::try_ident() {
    // Must begin with an XID_Source unicode character, or underscore
    {
        auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
-        auto [code_point, n] = text::utf8::Decode(utf8, length() - pos());
+        auto [code_point, n] = utils::utf8::Decode(utf8, length() - pos());
        if (n == 0) {
            advance();  // Skip the bad byte.
            return {Token::Type::kError, source, "invalid UTF-8"};
        }
-        if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
+        if (code_point != utils::CodePoint('_') && !code_point.IsXIDStart()) {
            return {};
        }
        // Consume start codepoint
@ -974,7 +974,7 @@ Token Lexer::try_ident() {
    while (!is_eol()) {
        // Must continue with an XID_Continue unicode character
        auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
-        auto [code_point, n] = text::utf8::Decode(utf8, line().size() - pos());
+        auto [code_point, n] = utils::utf8::Decode(utf8, line().size() - pos());
        if (n == 0) {
            advance();  // Skip the bad byte.
            return {Token::Type::kError, source, "invalid UTF-8"};
--- a/src/tint/source.cc
+++ b/src/tint/source.cc
@ -18,7 +18,7 @@
 #include <string_view>
 #include <utility>

-#include "src/tint/text/unicode.h"
+#include "src/tint/utils/unicode.h"

 namespace tint {
 namespace {
@ -27,19 +27,19 @@ bool ParseLineBreak(std::string_view str, size_t i, bool* is_line_break, size_t*
    // See https://www.w3.org/TR/WGSL/#blankspace

    auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
-    auto [cp, n] = text::utf8::Decode(utf8, str.size() - i);
+    auto [cp, n] = utils::utf8::Decode(utf8, str.size() - i);

    if (n == 0) {
        return false;
    }

-    static const auto kLF = text::CodePoint(0x000A);    // line feed
-    static const auto kVTab = text::CodePoint(0x000B);  // vertical tab
-    static const auto kFF = text::CodePoint(0x000C);    // form feed
-    static const auto kNL = text::CodePoint(0x0085);    // next line
-    static const auto kCR = text::CodePoint(0x000D);    // carriage return
-    static const auto kLS = text::CodePoint(0x2028);    // line separator
-    static const auto kPS = text::CodePoint(0x2029);    // parargraph separator
+    static const auto kLF = utils::CodePoint(0x000A);    // line feed
+    static const auto kVTab = utils::CodePoint(0x000B);  // vertical tab
+    static const auto kFF = utils::CodePoint(0x000C);    // form feed
+    static const auto kNL = utils::CodePoint(0x0085);    // next line
+    static const auto kCR = utils::CodePoint(0x000D);    // carriage return
+    static const auto kLS = utils::CodePoint(0x2028);    // line separator
+    static const auto kPS = utils::CodePoint(0x2029);    // parargraph separator

    if (cp == kLF || cp == kVTab || cp == kFF || cp == kNL || cp == kPS || cp == kLS) {
        *is_line_break = true;
@ -54,7 +54,7 @@ bool ParseLineBreak(std::string_view str, size_t i, bool* is_line_break, size_t*

        if (auto next_i = i + n; next_i < str.size()) {
            auto* next_utf8 = reinterpret_cast<const uint8_t*>(&str[next_i]);
-            auto [next_cp, next_n] = text::utf8::Decode(next_utf8, str.size() - next_i);
+            auto [next_cp, next_n] = utils::utf8::Decode(next_utf8, str.size() - next_i);

            if (next_n == 0) {
                return false;
--- a/src/tint/transform/renamer.cc
+++ b/src/tint/transform/renamer.cc
@ -25,7 +25,7 @@
 #include "src/tint/sem/value_constructor.h"
 #include "src/tint/sem/value_conversion.h"
 #include "src/tint/switch.h"
-#include "src/tint/text/unicode.h"
+#include "src/tint/utils/unicode.h"

 TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer);
 TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data);
@ -1333,7 +1333,7 @@ Transform::ApplyResult Renamer::Apply(const Program* src,
            return true;
        }
        auto name = symbol.Name();
-        if (!text::utf8::IsASCII(name)) {
+        if (!utils::utf8::IsASCII(name)) {
            // name is non-ascii. All of the backend keywords are ascii, so rename if we're not
            // preserving unicode symbols.
            return !preserve_unicode;
--- a/src/tint/transform/truncate_interstage_variables.cc
+++ b/src/tint/transform/truncate_interstage_variables.cc
@ -24,7 +24,7 @@
 #include "src/tint/sem/member_accessor_expression.h"
 #include "src/tint/sem/statement.h"
 #include "src/tint/sem/variable.h"
-#include "src/tint/text/unicode.h"
+#include "src/tint/utils/unicode.h"

 TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables);
 TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables::Config);
--- a/src/tint/utils/string_stream.cc
+++ b/src/tint/utils/string_stream.cc
@ -24,10 +24,6 @@ StringStream::StringStream() {

 StringStream::~StringStream() = default;

-}  // namespace tint::utils
-
-namespace tint::text {
-
 utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point) {
    if (code_point < 0x7f) {
        // See https://en.cppreference.com/w/cpp/language/escape
@ -52,4 +48,4 @@ utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point)
    return out << "'U+" << std::hex << code_point.value << "'";
 }

-}  // namespace tint::text
+}  // namespace tint::utils
--- a/src/tint/utils/string_stream.h
+++ b/src/tint/utils/string_stream.h
@ -23,7 +23,7 @@
 #include <string>
 #include <utility>

-#include "src/tint/text/unicode.h"
+#include "src/tint/utils/unicode.h"

 namespace tint::utils {

@ -183,16 +183,12 @@ class StringStream {
    std::stringstream sstream_;
 };

-}  // namespace tint::utils
-
-namespace tint::text {
-
 /// Writes the CodePoint to the stream.
 /// @param out the stream to write to
 /// @param codepoint the CodePoint to write
 /// @returns out so calls can be chained
 utils::StringStream& operator<<(utils::StringStream& out, CodePoint codepoint);

-}  // namespace tint::text
+}  // namespace tint::utils

 #endif  // SRC_TINT_UTILS_STRING_STREAM_H_
--- a/src/tint/utils/unicode.cc
+++ b/src/tint/utils/unicode.cc
@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "src/tint/text/unicode.h"
+#include "src/tint/utils/unicode.h"

 #include <algorithm>

-namespace tint::text {
+namespace tint::utils {
 namespace {

 struct CodePointRange {
@ -418,4 +418,4 @@ bool IsASCII(std::string_view str) {

 }  // namespace utf8

-}  // namespace tint::text
+}  // namespace tint::utils
--- a/src/tint/utils/unicode.h
+++ b/src/tint/utils/unicode.h
@ -12,15 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifndef SRC_TINT_TEXT_UNICODE_H_
-#define SRC_TINT_TEXT_UNICODE_H_
+#ifndef SRC_TINT_UTILS_UNICODE_H_
+#define SRC_TINT_UTILS_UNICODE_H_

 #include <cstddef>
 #include <cstdint>
 #include <string_view>
 #include <utility>

-namespace tint::text {
+namespace tint::utils {

 /// CodePoint is a unicode code point.
 struct CodePoint {
@ -75,6 +75,6 @@ bool IsASCII(std::string_view);

 }  // namespace utf8

-}  // namespace tint::text
+}  // namespace tint::utils

-#endif  // SRC_TINT_TEXT_UNICODE_H_
+#endif  // SRC_TINT_UTILS_UNICODE_H_
--- a/src/tint/utils/unicode_test.cc
+++ b/src/tint/utils/unicode_test.cc
@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "src/tint/text/unicode.h"
+#include "src/tint/utils/unicode.h"

 #include <string>
 #include <vector>
@ -22,7 +22,7 @@
 /// Helper for constructing a CodePoint
 #define C(x) CodePoint(x)

-namespace tint::text {
+namespace tint::utils {

 ////////////////////////////////////////////////////////////////////////////////
 // CodePoint character set tests
@ -335,16 +335,16 @@ INSTANTIATE_TEST_SUITE_P(Hindi,
                                 {C(0x0928), 3},  // न
                                 {C(0x092e), 3},  // म
                                 {C(0x0938), 3},  // स
-                                 {C(0x094d), 3},  // ्
+                                 {C(0x094d), 3},  // ् //
                                 {C(0x0924), 3},  // त
-                                 {C(0x0947), 3},  // े
+                                 {C(0x0947), 3},  // े //
                                 {C(' '), 1},
                                 {C(0x0926), 3},  // द
-                                 {C(0x0941), 3},  // ु
+                                 {C(0x0941), 3},  // ु //
                                 {C(0x0928), 3},  // न
-                                 {C(0x093f), 3},  // ि
+                                 {C(0x093f), 3},  // ि //
                                 {C(0x092f), 3},  // य
-                                 {C(0x093e), 3},  // ा
+                                 {C(0x093e), 3},  // ा //
                             },
                         }}));

@ -487,4 +487,4 @@ INSTANTIATE_TEST_SUITE_P(Invalid,

 }  // namespace

-}  // namespace tint::text
+}  // namespace tint::utils