Move text/unicode into utils.

This CL consolidates the unicode code into utils. These are utility
libraries, so the extra namespace doesn't add much.

Change-Id: Id0de612b6be036392a3cb018bfe66733f2f1ebcb
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127403
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
This commit is contained in:
dan sinclair 2023-04-20 10:06:25 +00:00 committed by Dawn LUCI CQ
parent 9e9c456075
commit 517278ac08
14 changed files with 53 additions and 77 deletions

View File

@ -24,7 +24,6 @@
#include "src/tint/diagnostic/printer.h" #include "src/tint/diagnostic/printer.h"
#include "src/tint/inspector/inspector.h" #include "src/tint/inspector/inspector.h"
#include "src/tint/reader/reader.h" #include "src/tint/reader/reader.h"
#include "src/tint/text/unicode.h"
#include "src/tint/transform/first_index_offset.h" #include "src/tint/transform/first_index_offset.h"
#include "src/tint/transform/manager.h" #include "src/tint/transform/manager.h"
#include "src/tint/transform/renamer.h" #include "src/tint/transform/renamer.h"
@ -32,6 +31,7 @@
#include "src/tint/transform/substitute_override.h" #include "src/tint/transform/substitute_override.h"
#include "src/tint/transform/vertex_pulling.h" #include "src/tint/transform/vertex_pulling.h"
#include "src/tint/type/manager.h" #include "src/tint/type/manager.h"
#include "src/tint/utils/unicode.h"
#include "src/tint/writer/array_length_from_uniform_options.h" #include "src/tint/writer/array_length_from_uniform_options.h"
#include "src/tint/writer/binding_point.h" #include "src/tint/writer/binding_point.h"
#include "src/tint/writer/binding_remapper_options.h" #include "src/tint/writer/binding_remapper_options.h"

View File

@ -37,14 +37,14 @@ WGPUCompilationMessageType tintSeverityToMessageType(tint::diag::Severity severi
} // anonymous namespace } // anonymous namespace
ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String) { ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String) {
if (tint::text::utf8::IsASCII(utf8String)) { if (tint::utils::utf8::IsASCII(utf8String)) {
return utf8String.size(); return utf8String.size();
} }
uint64_t numberOfUTF16CodeUnits = 0; uint64_t numberOfUTF16CodeUnits = 0;
std::string_view remaining = utf8String; std::string_view remaining = utf8String;
while (!remaining.empty()) { while (!remaining.empty()) {
auto [codePoint, utf8CharacterByteLength] = tint::text::utf8::Decode(remaining); auto [codePoint, utf8CharacterByteLength] = tint::utils::utf8::Decode(remaining);
// Directly return as something wrong has happened during the UTF-8 decoding. // Directly return as something wrong has happened during the UTF-8 decoding.
if (utf8CharacterByteLength == 0) { if (utf8CharacterByteLength == 0) {
return DAWN_INTERNAL_ERROR("Fail to decode the unicode string"); return DAWN_INTERNAL_ERROR("Fail to decode the unicode string");
@ -87,7 +87,7 @@ void OwnedCompilationMessages::AddMessageForTesting(std::string message,
ASSERT(mCompilationInfo.messages == nullptr); ASSERT(mCompilationInfo.messages == nullptr);
// Message can only contain ascii characters. // Message can only contain ascii characters.
ASSERT(tint::text::utf8::IsASCII(message)); ASSERT(tint::utils::utf8::IsASCII(message));
mMessageStrings.push_back(message); mMessageStrings.push_back(message);
mMessages.push_back({nullptr, nullptr, static_cast<WGPUCompilationMessageType>(type), lineNum, mMessages.push_back({nullptr, nullptr, static_cast<WGPUCompilationMessageType>(type), lineNum,

View File

@ -23,7 +23,7 @@ TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
uint64_t lengthInUTF16; uint64_t lengthInUTF16;
}; };
// Referenced from src/tint/text/unicode_test.cc // Referenced from src/tint/utils/unicode_test.cc
constexpr std::array<TestCase, 12> kTestCases = {{ constexpr std::array<TestCase, 12> kTestCases = {{
{"", 0}, {"", 0},
{"abc", 3}, {"abc", 3},
@ -48,7 +48,7 @@ TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
} }
TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, InvalidUnicodeString) { TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, InvalidUnicodeString) {
// Referenced from src/tint/text/unicode_test.cc // Referenced from src/tint/utils/unicode_test.cc
constexpr std::array<const char*, 12> kTestCases = {{ constexpr std::array<const char*, 12> kTestCases = {{
"\xed\xa0\x80", // CodePoint == 0xD7FF + 1 "\xed\xa0\x80", // CodePoint == 0xD7FF + 1
"\xed\xbf\xbf", // CodePoint == 0xE000 - 1 "\xed\xbf\xbf", // CodePoint == 0xE000 - 1

View File

@ -243,6 +243,8 @@ libtint_source_set("libtint_base_src") {
"utils/string_stream.cc", "utils/string_stream.cc",
"utils/string_stream.h", "utils/string_stream.h",
"utils/traits.h", "utils/traits.h",
"utils/unicode.cc",
"utils/unicode.h",
"utils/unique_allocator.h", "utils/unique_allocator.h",
"utils/unique_vector.h", "utils/unique_vector.h",
"utils/vector.h", "utils/vector.h",
@ -255,8 +257,6 @@ libtint_source_set("libtint_base_src") {
} else { } else {
sources += [ "diagnostic/printer_other.cc" ] sources += [ "diagnostic/printer_other.cc" ]
} }
deps = [ ":libtint_text_src" ]
} }
libtint_source_set("libtint_clone_context_hdrs") { libtint_source_set("libtint_clone_context_hdrs") {
@ -303,7 +303,6 @@ libtint_source_set("libtint_program_src") {
":libtint_builtins_src", ":libtint_builtins_src",
":libtint_constant_src", ":libtint_constant_src",
":libtint_sem_src", ":libtint_sem_src",
":libtint_text_src",
":libtint_type_src", ":libtint_type_src",
] ]
} }
@ -333,13 +332,6 @@ libtint_source_set("libtint_inspector_src") {
] ]
} }
libtint_source_set("libtint_text_src") {
sources = [
"text/unicode.cc",
"text/unicode.h",
]
}
libtint_source_set("libtint_transform_src") { libtint_source_set("libtint_transform_src") {
sources = [ sources = [
"transform/add_block_attribute.cc", "transform/add_block_attribute.cc",
@ -451,7 +443,6 @@ libtint_source_set("libtint_transform_src") {
":libtint_builtins_src", ":libtint_builtins_src",
":libtint_program_src", ":libtint_program_src",
":libtint_sem_src", ":libtint_sem_src",
":libtint_text_src",
":libtint_type_src", ":libtint_type_src",
] ]
} }
@ -974,7 +965,6 @@ libtint_source_set("libtint_wgsl_reader_src") {
":libtint_builtins_src", ":libtint_builtins_src",
":libtint_program_src", ":libtint_program_src",
":libtint_reader_src", ":libtint_reader_src",
":libtint_text_src",
":libtint_type_src", ":libtint_type_src",
] ]
} }
@ -1089,7 +1079,6 @@ source_set("libtint") {
":libtint_inspector_src", ":libtint_inspector_src",
":libtint_program_src", ":libtint_program_src",
":libtint_sem_src", ":libtint_sem_src",
":libtint_text_src",
":libtint_transform_src", ":libtint_transform_src",
":libtint_type_src", ":libtint_type_src",
":libtint_writer_src", ":libtint_writer_src",
@ -1511,11 +1500,6 @@ if (tint_build_unittests) {
] ]
} }
tint_unittests_source_set("tint_unittests_text_src") {
sources = [ "text/unicode_test.cc" ]
deps = [ ":libtint_text_src" ]
}
tint_unittests_source_set("tint_unittests_transform_src") { tint_unittests_source_set("tint_unittests_transform_src") {
sources = [ sources = [
"transform/add_block_attribute_test.cc", "transform/add_block_attribute_test.cc",
@ -1609,6 +1593,7 @@ if (tint_build_unittests) {
"utils/string_test.cc", "utils/string_test.cc",
"utils/traits_test.cc", "utils/traits_test.cc",
"utils/transform_test.cc", "utils/transform_test.cc",
"utils/unicode_test.cc",
"utils/unique_allocator_test.cc", "utils/unique_allocator_test.cc",
"utils/unique_vector_test.cc", "utils/unique_vector_test.cc",
"utils/vector_test.cc", "utils/vector_test.cc",
@ -2036,7 +2021,6 @@ if (tint_build_unittests) {
":tint_unittests_inspector_src", ":tint_unittests_inspector_src",
":tint_unittests_resolver_src", ":tint_unittests_resolver_src",
":tint_unittests_sem_src", ":tint_unittests_sem_src",
":tint_unittests_text_src",
":tint_unittests_transform_src", ":tint_unittests_transform_src",
":tint_unittests_type_src", ":tint_unittests_type_src",
":tint_unittests_utils_src", ":tint_unittests_utils_src",

View File

@ -66,10 +66,10 @@ add_library(tint_diagnostic_utils
diagnostic/formatter.h diagnostic/formatter.h
diagnostic/printer.cc diagnostic/printer.cc
diagnostic/printer.h diagnostic/printer.h
text/unicode.cc
text/unicode.h
utils/debugger.cc utils/debugger.cc
utils/debugger.h utils/debugger.h
utils/unicode.cc
utils/unicode.h
) )
tint_default_compile_options(tint_diagnostic_utils) tint_default_compile_options(tint_diagnostic_utils)
@ -967,7 +967,6 @@ if(TINT_BUILD_TESTS)
symbol_table_test.cc symbol_table_test.cc
symbol_test.cc symbol_test.cc
test_main.cc test_main.cc
text/unicode_test.cc
transform/transform_test.cc transform/transform_test.cc
type/array_test.cc type/array_test.cc
type/atomic_test.cc type/atomic_test.cc
@ -1014,6 +1013,7 @@ if(TINT_BUILD_TESTS)
utils/string_test.cc utils/string_test.cc
utils/traits_test.cc utils/traits_test.cc
utils/transform_test.cc utils/transform_test.cc
utils/unicode_test.cc
utils/unique_allocator_test.cc utils/unique_allocator_test.cc
utils/unique_vector_test.cc utils/unique_vector_test.cc
utils/vector_test.cc utils/vector_test.cc

View File

@ -28,7 +28,7 @@
#include "absl/strings/charconv.h" #include "absl/strings/charconv.h"
#include "src/tint/debug.h" #include "src/tint/debug.h"
#include "src/tint/number.h" #include "src/tint/number.h"
#include "src/tint/text/unicode.h" #include "src/tint/utils/unicode.h"
namespace tint::reader::wgsl { namespace tint::reader::wgsl {
namespace { namespace {
@ -45,16 +45,16 @@ bool read_blankspace(std::string_view str, size_t i, bool* is_blankspace, size_t
// See https://www.w3.org/TR/WGSL/#blankspace // See https://www.w3.org/TR/WGSL/#blankspace
auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]); auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
auto [cp, n] = text::utf8::Decode(utf8, str.size() - i); auto [cp, n] = utils::utf8::Decode(utf8, str.size() - i);
if (n == 0) { if (n == 0) {
return false; return false;
} }
static const auto kSpace = text::CodePoint(0x0020); // space static const auto kSpace = utils::CodePoint(0x0020); // space
static const auto kHTab = text::CodePoint(0x0009); // horizontal tab static const auto kHTab = utils::CodePoint(0x0009); // horizontal tab
static const auto kL2R = text::CodePoint(0x200E); // left-to-right mark static const auto kL2R = utils::CodePoint(0x200E); // left-to-right mark
static const auto kR2L = text::CodePoint(0x200F); // right-to-left mark static const auto kR2L = utils::CodePoint(0x200F); // right-to-left mark
if (cp == kSpace || cp == kHTab || cp == kL2R || cp == kR2L) { if (cp == kSpace || cp == kHTab || cp == kL2R || cp == kR2L) {
*is_blankspace = true; *is_blankspace = true;
@ -959,12 +959,12 @@ Token Lexer::try_ident() {
// Must begin with an XID_Source unicode character, or underscore // Must begin with an XID_Source unicode character, or underscore
{ {
auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos())); auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
auto [code_point, n] = text::utf8::Decode(utf8, length() - pos()); auto [code_point, n] = utils::utf8::Decode(utf8, length() - pos());
if (n == 0) { if (n == 0) {
advance(); // Skip the bad byte. advance(); // Skip the bad byte.
return {Token::Type::kError, source, "invalid UTF-8"}; return {Token::Type::kError, source, "invalid UTF-8"};
} }
if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) { if (code_point != utils::CodePoint('_') && !code_point.IsXIDStart()) {
return {}; return {};
} }
// Consume start codepoint // Consume start codepoint
@ -974,7 +974,7 @@ Token Lexer::try_ident() {
while (!is_eol()) { while (!is_eol()) {
// Must continue with an XID_Continue unicode character // Must continue with an XID_Continue unicode character
auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos())); auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
auto [code_point, n] = text::utf8::Decode(utf8, line().size() - pos()); auto [code_point, n] = utils::utf8::Decode(utf8, line().size() - pos());
if (n == 0) { if (n == 0) {
advance(); // Skip the bad byte. advance(); // Skip the bad byte.
return {Token::Type::kError, source, "invalid UTF-8"}; return {Token::Type::kError, source, "invalid UTF-8"};

View File

@ -18,7 +18,7 @@
#include <string_view> #include <string_view>
#include <utility> #include <utility>
#include "src/tint/text/unicode.h" #include "src/tint/utils/unicode.h"
namespace tint { namespace tint {
namespace { namespace {
@ -27,19 +27,19 @@ bool ParseLineBreak(std::string_view str, size_t i, bool* is_line_break, size_t*
// See https://www.w3.org/TR/WGSL/#blankspace // See https://www.w3.org/TR/WGSL/#blankspace
auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]); auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
auto [cp, n] = text::utf8::Decode(utf8, str.size() - i); auto [cp, n] = utils::utf8::Decode(utf8, str.size() - i);
if (n == 0) { if (n == 0) {
return false; return false;
} }
static const auto kLF = text::CodePoint(0x000A); // line feed static const auto kLF = utils::CodePoint(0x000A); // line feed
static const auto kVTab = text::CodePoint(0x000B); // vertical tab static const auto kVTab = utils::CodePoint(0x000B); // vertical tab
static const auto kFF = text::CodePoint(0x000C); // form feed static const auto kFF = utils::CodePoint(0x000C); // form feed
static const auto kNL = text::CodePoint(0x0085); // next line static const auto kNL = utils::CodePoint(0x0085); // next line
static const auto kCR = text::CodePoint(0x000D); // carriage return static const auto kCR = utils::CodePoint(0x000D); // carriage return
static const auto kLS = text::CodePoint(0x2028); // line separator static const auto kLS = utils::CodePoint(0x2028); // line separator
static const auto kPS = text::CodePoint(0x2029); // parargraph separator static const auto kPS = utils::CodePoint(0x2029); // parargraph separator
if (cp == kLF || cp == kVTab || cp == kFF || cp == kNL || cp == kPS || cp == kLS) { if (cp == kLF || cp == kVTab || cp == kFF || cp == kNL || cp == kPS || cp == kLS) {
*is_line_break = true; *is_line_break = true;
@ -54,7 +54,7 @@ bool ParseLineBreak(std::string_view str, size_t i, bool* is_line_break, size_t*
if (auto next_i = i + n; next_i < str.size()) { if (auto next_i = i + n; next_i < str.size()) {
auto* next_utf8 = reinterpret_cast<const uint8_t*>(&str[next_i]); auto* next_utf8 = reinterpret_cast<const uint8_t*>(&str[next_i]);
auto [next_cp, next_n] = text::utf8::Decode(next_utf8, str.size() - next_i); auto [next_cp, next_n] = utils::utf8::Decode(next_utf8, str.size() - next_i);
if (next_n == 0) { if (next_n == 0) {
return false; return false;

View File

@ -25,7 +25,7 @@
#include "src/tint/sem/value_constructor.h" #include "src/tint/sem/value_constructor.h"
#include "src/tint/sem/value_conversion.h" #include "src/tint/sem/value_conversion.h"
#include "src/tint/switch.h" #include "src/tint/switch.h"
#include "src/tint/text/unicode.h" #include "src/tint/utils/unicode.h"
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer); TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer);
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data); TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data);
@ -1333,7 +1333,7 @@ Transform::ApplyResult Renamer::Apply(const Program* src,
return true; return true;
} }
auto name = symbol.Name(); auto name = symbol.Name();
if (!text::utf8::IsASCII(name)) { if (!utils::utf8::IsASCII(name)) {
// name is non-ascii. All of the backend keywords are ascii, so rename if we're not // name is non-ascii. All of the backend keywords are ascii, so rename if we're not
// preserving unicode symbols. // preserving unicode symbols.
return !preserve_unicode; return !preserve_unicode;

View File

@ -24,7 +24,7 @@
#include "src/tint/sem/member_accessor_expression.h" #include "src/tint/sem/member_accessor_expression.h"
#include "src/tint/sem/statement.h" #include "src/tint/sem/statement.h"
#include "src/tint/sem/variable.h" #include "src/tint/sem/variable.h"
#include "src/tint/text/unicode.h" #include "src/tint/utils/unicode.h"
TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables); TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables);
TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables::Config); TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables::Config);

View File

@ -24,10 +24,6 @@ StringStream::StringStream() {
StringStream::~StringStream() = default; StringStream::~StringStream() = default;
} // namespace tint::utils
namespace tint::text {
utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point) { utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point) {
if (code_point < 0x7f) { if (code_point < 0x7f) {
// See https://en.cppreference.com/w/cpp/language/escape // See https://en.cppreference.com/w/cpp/language/escape
@ -52,4 +48,4 @@ utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point)
return out << "'U+" << std::hex << code_point.value << "'"; return out << "'U+" << std::hex << code_point.value << "'";
} }
} // namespace tint::text } // namespace tint::utils

View File

@ -23,7 +23,7 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include "src/tint/text/unicode.h" #include "src/tint/utils/unicode.h"
namespace tint::utils { namespace tint::utils {
@ -183,16 +183,12 @@ class StringStream {
std::stringstream sstream_; std::stringstream sstream_;
}; };
} // namespace tint::utils
namespace tint::text {
/// Writes the CodePoint to the stream. /// Writes the CodePoint to the stream.
/// @param out the stream to write to /// @param out the stream to write to
/// @param codepoint the CodePoint to write /// @param codepoint the CodePoint to write
/// @returns out so calls can be chained /// @returns out so calls can be chained
utils::StringStream& operator<<(utils::StringStream& out, CodePoint codepoint); utils::StringStream& operator<<(utils::StringStream& out, CodePoint codepoint);
} // namespace tint::text } // namespace tint::utils
#endif // SRC_TINT_UTILS_STRING_STREAM_H_ #endif // SRC_TINT_UTILS_STRING_STREAM_H_

View File

@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "src/tint/text/unicode.h" #include "src/tint/utils/unicode.h"
#include <algorithm> #include <algorithm>
namespace tint::text { namespace tint::utils {
namespace { namespace {
struct CodePointRange { struct CodePointRange {
@ -418,4 +418,4 @@ bool IsASCII(std::string_view str) {
} // namespace utf8 } // namespace utf8
} // namespace tint::text } // namespace tint::utils

View File

@ -12,15 +12,15 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef SRC_TINT_TEXT_UNICODE_H_ #ifndef SRC_TINT_UTILS_UNICODE_H_
#define SRC_TINT_TEXT_UNICODE_H_ #define SRC_TINT_UTILS_UNICODE_H_
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <string_view> #include <string_view>
#include <utility> #include <utility>
namespace tint::text { namespace tint::utils {
/// CodePoint is a unicode code point. /// CodePoint is a unicode code point.
struct CodePoint { struct CodePoint {
@ -75,6 +75,6 @@ bool IsASCII(std::string_view);
} // namespace utf8 } // namespace utf8
} // namespace tint::text } // namespace tint::utils
#endif // SRC_TINT_TEXT_UNICODE_H_ #endif // SRC_TINT_UTILS_UNICODE_H_

View File

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "src/tint/text/unicode.h" #include "src/tint/utils/unicode.h"
#include <string> #include <string>
#include <vector> #include <vector>
@ -22,7 +22,7 @@
/// Helper for constructing a CodePoint /// Helper for constructing a CodePoint
#define C(x) CodePoint(x) #define C(x) CodePoint(x)
namespace tint::text { namespace tint::utils {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// CodePoint character set tests // CodePoint character set tests
@ -335,16 +335,16 @@ INSTANTIATE_TEST_SUITE_P(Hindi,
{C(0x0928), 3}, // न {C(0x0928), 3}, // न
{C(0x092e), 3}, // म {C(0x092e), 3}, // म
{C(0x0938), 3}, // स {C(0x0938), 3}, // स
{C(0x094d), 3}, // ् {C(0x094d), 3}, // ् //
{C(0x0924), 3}, // त {C(0x0924), 3}, // त
{C(0x0947), 3}, // े {C(0x0947), 3}, // े //
{C(' '), 1}, {C(' '), 1},
{C(0x0926), 3}, // द {C(0x0926), 3}, // द
{C(0x0941), 3}, // ु {C(0x0941), 3}, // ु //
{C(0x0928), 3}, // न {C(0x0928), 3}, // न
{C(0x093f), 3}, // ि {C(0x093f), 3}, // ि //
{C(0x092f), 3}, // य {C(0x092f), 3}, // य
{C(0x093e), 3}, // ा {C(0x093e), 3}, // ा //
}, },
}})); }}));
@ -487,4 +487,4 @@ INSTANTIATE_TEST_SUITE_P(Invalid,
} // namespace } // namespace
} // namespace tint::text } // namespace tint::utils