Move text/unicode into utils.

This CL consolidates the unicode code into utils. These are utility
libraries, so the extra namespace doesn't add much.

Change-Id: Id0de612b6be036392a3cb018bfe66733f2f1ebcb
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127403
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
This commit is contained in:
dan sinclair 2023-04-20 10:06:25 +00:00 committed by Dawn LUCI CQ
parent 9e9c456075
commit 517278ac08
14 changed files with 53 additions and 77 deletions

View File

@ -24,7 +24,6 @@
#include "src/tint/diagnostic/printer.h"
#include "src/tint/inspector/inspector.h"
#include "src/tint/reader/reader.h"
#include "src/tint/text/unicode.h"
#include "src/tint/transform/first_index_offset.h"
#include "src/tint/transform/manager.h"
#include "src/tint/transform/renamer.h"
@ -32,6 +31,7 @@
#include "src/tint/transform/substitute_override.h"
#include "src/tint/transform/vertex_pulling.h"
#include "src/tint/type/manager.h"
#include "src/tint/utils/unicode.h"
#include "src/tint/writer/array_length_from_uniform_options.h"
#include "src/tint/writer/binding_point.h"
#include "src/tint/writer/binding_remapper_options.h"

View File

@ -37,14 +37,14 @@ WGPUCompilationMessageType tintSeverityToMessageType(tint::diag::Severity severi
} // anonymous namespace
ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String) {
if (tint::text::utf8::IsASCII(utf8String)) {
if (tint::utils::utf8::IsASCII(utf8String)) {
return utf8String.size();
}
uint64_t numberOfUTF16CodeUnits = 0;
std::string_view remaining = utf8String;
while (!remaining.empty()) {
auto [codePoint, utf8CharacterByteLength] = tint::text::utf8::Decode(remaining);
auto [codePoint, utf8CharacterByteLength] = tint::utils::utf8::Decode(remaining);
// Directly return as something wrong has happened during the UTF-8 decoding.
if (utf8CharacterByteLength == 0) {
return DAWN_INTERNAL_ERROR("Fail to decode the unicode string");
@ -87,7 +87,7 @@ void OwnedCompilationMessages::AddMessageForTesting(std::string message,
ASSERT(mCompilationInfo.messages == nullptr);
// Message can only contain ascii characters.
ASSERT(tint::text::utf8::IsASCII(message));
ASSERT(tint::utils::utf8::IsASCII(message));
mMessageStrings.push_back(message);
mMessages.push_back({nullptr, nullptr, static_cast<WGPUCompilationMessageType>(type), lineNum,

View File

@ -23,7 +23,7 @@ TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
uint64_t lengthInUTF16;
};
// Referenced from src/tint/text/unicode_test.cc
// Referenced from src/tint/utils/unicode_test.cc
constexpr std::array<TestCase, 12> kTestCases = {{
{"", 0},
{"abc", 3},
@ -48,7 +48,7 @@ TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
}
TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, InvalidUnicodeString) {
// Referenced from src/tint/text/unicode_test.cc
// Referenced from src/tint/utils/unicode_test.cc
constexpr std::array<const char*, 12> kTestCases = {{
"\xed\xa0\x80", // CodePoint == 0xD7FF + 1
"\xed\xbf\xbf", // CodePoint == 0xE000 - 1

View File

@ -243,6 +243,8 @@ libtint_source_set("libtint_base_src") {
"utils/string_stream.cc",
"utils/string_stream.h",
"utils/traits.h",
"utils/unicode.cc",
"utils/unicode.h",
"utils/unique_allocator.h",
"utils/unique_vector.h",
"utils/vector.h",
@ -255,8 +257,6 @@ libtint_source_set("libtint_base_src") {
} else {
sources += [ "diagnostic/printer_other.cc" ]
}
deps = [ ":libtint_text_src" ]
}
libtint_source_set("libtint_clone_context_hdrs") {
@ -303,7 +303,6 @@ libtint_source_set("libtint_program_src") {
":libtint_builtins_src",
":libtint_constant_src",
":libtint_sem_src",
":libtint_text_src",
":libtint_type_src",
]
}
@ -333,13 +332,6 @@ libtint_source_set("libtint_inspector_src") {
]
}
libtint_source_set("libtint_text_src") {
sources = [
"text/unicode.cc",
"text/unicode.h",
]
}
libtint_source_set("libtint_transform_src") {
sources = [
"transform/add_block_attribute.cc",
@ -451,7 +443,6 @@ libtint_source_set("libtint_transform_src") {
":libtint_builtins_src",
":libtint_program_src",
":libtint_sem_src",
":libtint_text_src",
":libtint_type_src",
]
}
@ -974,7 +965,6 @@ libtint_source_set("libtint_wgsl_reader_src") {
":libtint_builtins_src",
":libtint_program_src",
":libtint_reader_src",
":libtint_text_src",
":libtint_type_src",
]
}
@ -1089,7 +1079,6 @@ source_set("libtint") {
":libtint_inspector_src",
":libtint_program_src",
":libtint_sem_src",
":libtint_text_src",
":libtint_transform_src",
":libtint_type_src",
":libtint_writer_src",
@ -1511,11 +1500,6 @@ if (tint_build_unittests) {
]
}
tint_unittests_source_set("tint_unittests_text_src") {
sources = [ "text/unicode_test.cc" ]
deps = [ ":libtint_text_src" ]
}
tint_unittests_source_set("tint_unittests_transform_src") {
sources = [
"transform/add_block_attribute_test.cc",
@ -1609,6 +1593,7 @@ if (tint_build_unittests) {
"utils/string_test.cc",
"utils/traits_test.cc",
"utils/transform_test.cc",
"utils/unicode_test.cc",
"utils/unique_allocator_test.cc",
"utils/unique_vector_test.cc",
"utils/vector_test.cc",
@ -2036,7 +2021,6 @@ if (tint_build_unittests) {
":tint_unittests_inspector_src",
":tint_unittests_resolver_src",
":tint_unittests_sem_src",
":tint_unittests_text_src",
":tint_unittests_transform_src",
":tint_unittests_type_src",
":tint_unittests_utils_src",

View File

@ -66,10 +66,10 @@ add_library(tint_diagnostic_utils
diagnostic/formatter.h
diagnostic/printer.cc
diagnostic/printer.h
text/unicode.cc
text/unicode.h
utils/debugger.cc
utils/debugger.h
utils/unicode.cc
utils/unicode.h
)
tint_default_compile_options(tint_diagnostic_utils)
@ -967,7 +967,6 @@ if(TINT_BUILD_TESTS)
symbol_table_test.cc
symbol_test.cc
test_main.cc
text/unicode_test.cc
transform/transform_test.cc
type/array_test.cc
type/atomic_test.cc
@ -1014,6 +1013,7 @@ if(TINT_BUILD_TESTS)
utils/string_test.cc
utils/traits_test.cc
utils/transform_test.cc
utils/unicode_test.cc
utils/unique_allocator_test.cc
utils/unique_vector_test.cc
utils/vector_test.cc

View File

@ -28,7 +28,7 @@
#include "absl/strings/charconv.h"
#include "src/tint/debug.h"
#include "src/tint/number.h"
#include "src/tint/text/unicode.h"
#include "src/tint/utils/unicode.h"
namespace tint::reader::wgsl {
namespace {
@ -45,16 +45,16 @@ bool read_blankspace(std::string_view str, size_t i, bool* is_blankspace, size_t
// See https://www.w3.org/TR/WGSL/#blankspace
auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
auto [cp, n] = text::utf8::Decode(utf8, str.size() - i);
auto [cp, n] = utils::utf8::Decode(utf8, str.size() - i);
if (n == 0) {
return false;
}
static const auto kSpace = text::CodePoint(0x0020); // space
static const auto kHTab = text::CodePoint(0x0009); // horizontal tab
static const auto kL2R = text::CodePoint(0x200E); // left-to-right mark
static const auto kR2L = text::CodePoint(0x200F); // right-to-left mark
static const auto kSpace = utils::CodePoint(0x0020); // space
static const auto kHTab = utils::CodePoint(0x0009); // horizontal tab
static const auto kL2R = utils::CodePoint(0x200E); // left-to-right mark
static const auto kR2L = utils::CodePoint(0x200F); // right-to-left mark
if (cp == kSpace || cp == kHTab || cp == kL2R || cp == kR2L) {
*is_blankspace = true;
@ -959,12 +959,12 @@ Token Lexer::try_ident() {
// Must begin with an XID_Source unicode character, or underscore
{
auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
auto [code_point, n] = text::utf8::Decode(utf8, length() - pos());
auto [code_point, n] = utils::utf8::Decode(utf8, length() - pos());
if (n == 0) {
advance(); // Skip the bad byte.
return {Token::Type::kError, source, "invalid UTF-8"};
}
if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
if (code_point != utils::CodePoint('_') && !code_point.IsXIDStart()) {
return {};
}
// Consume start codepoint
@ -974,7 +974,7 @@ Token Lexer::try_ident() {
while (!is_eol()) {
// Must continue with an XID_Continue unicode character
auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
auto [code_point, n] = text::utf8::Decode(utf8, line().size() - pos());
auto [code_point, n] = utils::utf8::Decode(utf8, line().size() - pos());
if (n == 0) {
advance(); // Skip the bad byte.
return {Token::Type::kError, source, "invalid UTF-8"};

View File

@ -18,7 +18,7 @@
#include <string_view>
#include <utility>
#include "src/tint/text/unicode.h"
#include "src/tint/utils/unicode.h"
namespace tint {
namespace {
@ -27,19 +27,19 @@ bool ParseLineBreak(std::string_view str, size_t i, bool* is_line_break, size_t*
// See https://www.w3.org/TR/WGSL/#blankspace
auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
auto [cp, n] = text::utf8::Decode(utf8, str.size() - i);
auto [cp, n] = utils::utf8::Decode(utf8, str.size() - i);
if (n == 0) {
return false;
}
static const auto kLF = text::CodePoint(0x000A); // line feed
static const auto kVTab = text::CodePoint(0x000B); // vertical tab
static const auto kFF = text::CodePoint(0x000C); // form feed
static const auto kNL = text::CodePoint(0x0085); // next line
static const auto kCR = text::CodePoint(0x000D); // carriage return
static const auto kLS = text::CodePoint(0x2028); // line separator
static const auto kPS = text::CodePoint(0x2029); // parargraph separator
static const auto kLF = utils::CodePoint(0x000A); // line feed
static const auto kVTab = utils::CodePoint(0x000B); // vertical tab
static const auto kFF = utils::CodePoint(0x000C); // form feed
static const auto kNL = utils::CodePoint(0x0085); // next line
static const auto kCR = utils::CodePoint(0x000D); // carriage return
static const auto kLS = utils::CodePoint(0x2028); // line separator
static const auto kPS = utils::CodePoint(0x2029); // parargraph separator
if (cp == kLF || cp == kVTab || cp == kFF || cp == kNL || cp == kPS || cp == kLS) {
*is_line_break = true;
@ -54,7 +54,7 @@ bool ParseLineBreak(std::string_view str, size_t i, bool* is_line_break, size_t*
if (auto next_i = i + n; next_i < str.size()) {
auto* next_utf8 = reinterpret_cast<const uint8_t*>(&str[next_i]);
auto [next_cp, next_n] = text::utf8::Decode(next_utf8, str.size() - next_i);
auto [next_cp, next_n] = utils::utf8::Decode(next_utf8, str.size() - next_i);
if (next_n == 0) {
return false;

View File

@ -25,7 +25,7 @@
#include "src/tint/sem/value_constructor.h"
#include "src/tint/sem/value_conversion.h"
#include "src/tint/switch.h"
#include "src/tint/text/unicode.h"
#include "src/tint/utils/unicode.h"
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer);
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data);
@ -1333,7 +1333,7 @@ Transform::ApplyResult Renamer::Apply(const Program* src,
return true;
}
auto name = symbol.Name();
if (!text::utf8::IsASCII(name)) {
if (!utils::utf8::IsASCII(name)) {
// name is non-ascii. All of the backend keywords are ascii, so rename if we're not
// preserving unicode symbols.
return !preserve_unicode;

View File

@ -24,7 +24,7 @@
#include "src/tint/sem/member_accessor_expression.h"
#include "src/tint/sem/statement.h"
#include "src/tint/sem/variable.h"
#include "src/tint/text/unicode.h"
#include "src/tint/utils/unicode.h"
TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables);
TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables::Config);

View File

@ -24,10 +24,6 @@ StringStream::StringStream() {
StringStream::~StringStream() = default;
} // namespace tint::utils
namespace tint::text {
utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point) {
if (code_point < 0x7f) {
// See https://en.cppreference.com/w/cpp/language/escape
@ -52,4 +48,4 @@ utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point)
return out << "'U+" << std::hex << code_point.value << "'";
}
} // namespace tint::text
} // namespace tint::utils

View File

@ -23,7 +23,7 @@
#include <string>
#include <utility>
#include "src/tint/text/unicode.h"
#include "src/tint/utils/unicode.h"
namespace tint::utils {
@ -183,16 +183,12 @@ class StringStream {
std::stringstream sstream_;
};
} // namespace tint::utils
namespace tint::text {
/// Writes the CodePoint to the stream.
/// @param out the stream to write to
/// @param codepoint the CodePoint to write
/// @returns out so calls can be chained
utils::StringStream& operator<<(utils::StringStream& out, CodePoint codepoint);
} // namespace tint::text
} // namespace tint::utils
#endif // SRC_TINT_UTILS_STRING_STREAM_H_

View File

@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/tint/text/unicode.h"
#include "src/tint/utils/unicode.h"
#include <algorithm>
namespace tint::text {
namespace tint::utils {
namespace {
struct CodePointRange {
@ -418,4 +418,4 @@ bool IsASCII(std::string_view str) {
} // namespace utf8
} // namespace tint::text
} // namespace tint::utils

View File

@ -12,15 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SRC_TINT_TEXT_UNICODE_H_
#define SRC_TINT_TEXT_UNICODE_H_
#ifndef SRC_TINT_UTILS_UNICODE_H_
#define SRC_TINT_UTILS_UNICODE_H_
#include <cstddef>
#include <cstdint>
#include <string_view>
#include <utility>
namespace tint::text {
namespace tint::utils {
/// CodePoint is a unicode code point.
struct CodePoint {
@ -75,6 +75,6 @@ bool IsASCII(std::string_view);
} // namespace utf8
} // namespace tint::text
} // namespace tint::utils
#endif // SRC_TINT_TEXT_UNICODE_H_
#endif // SRC_TINT_UTILS_UNICODE_H_

View File

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/tint/text/unicode.h"
#include "src/tint/utils/unicode.h"
#include <string>
#include <vector>
@ -22,7 +22,7 @@
/// Helper for constructing a CodePoint
#define C(x) CodePoint(x)
namespace tint::text {
namespace tint::utils {
////////////////////////////////////////////////////////////////////////////////
// CodePoint character set tests
@ -335,16 +335,16 @@ INSTANTIATE_TEST_SUITE_P(Hindi,
{C(0x0928), 3}, // न
{C(0x092e), 3}, // म
{C(0x0938), 3}, // स
{C(0x094d), 3}, // ्
{C(0x094d), 3}, // ् //
{C(0x0924), 3}, // त
{C(0x0947), 3}, // े
{C(0x0947), 3}, // े //
{C(' '), 1},
{C(0x0926), 3}, // द
{C(0x0941), 3}, // ु
{C(0x0941), 3}, // ु //
{C(0x0928), 3}, // न
{C(0x093f), 3}, // ि
{C(0x093f), 3}, // ि //
{C(0x092f), 3}, // य
{C(0x093e), 3}, // ा
{C(0x093e), 3}, // ा //
},
}}));
@ -487,4 +487,4 @@ INSTANTIATE_TEST_SUITE_P(Invalid,
} // namespace
} // namespace tint::text
} // namespace tint::utils