Move text/unicode into utils.
This CL consolidates the unicode code into utils. These are utility libraries, so the extra namespace doesn't add much. Change-Id: Id0de612b6be036392a3cb018bfe66733f2f1ebcb Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127403 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Reviewed-by: Ben Clayton <bclayton@google.com>
This commit is contained in:
parent
9e9c456075
commit
517278ac08
|
@ -24,7 +24,6 @@
|
|||
#include "src/tint/diagnostic/printer.h"
|
||||
#include "src/tint/inspector/inspector.h"
|
||||
#include "src/tint/reader/reader.h"
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/transform/first_index_offset.h"
|
||||
#include "src/tint/transform/manager.h"
|
||||
#include "src/tint/transform/renamer.h"
|
||||
|
@ -32,6 +31,7 @@
|
|||
#include "src/tint/transform/substitute_override.h"
|
||||
#include "src/tint/transform/vertex_pulling.h"
|
||||
#include "src/tint/type/manager.h"
|
||||
#include "src/tint/utils/unicode.h"
|
||||
#include "src/tint/writer/array_length_from_uniform_options.h"
|
||||
#include "src/tint/writer/binding_point.h"
|
||||
#include "src/tint/writer/binding_remapper_options.h"
|
||||
|
|
|
@ -37,14 +37,14 @@ WGPUCompilationMessageType tintSeverityToMessageType(tint::diag::Severity severi
|
|||
} // anonymous namespace
|
||||
|
||||
ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String) {
|
||||
if (tint::text::utf8::IsASCII(utf8String)) {
|
||||
if (tint::utils::utf8::IsASCII(utf8String)) {
|
||||
return utf8String.size();
|
||||
}
|
||||
|
||||
uint64_t numberOfUTF16CodeUnits = 0;
|
||||
std::string_view remaining = utf8String;
|
||||
while (!remaining.empty()) {
|
||||
auto [codePoint, utf8CharacterByteLength] = tint::text::utf8::Decode(remaining);
|
||||
auto [codePoint, utf8CharacterByteLength] = tint::utils::utf8::Decode(remaining);
|
||||
// Directly return as something wrong has happened during the UTF-8 decoding.
|
||||
if (utf8CharacterByteLength == 0) {
|
||||
return DAWN_INTERNAL_ERROR("Fail to decode the unicode string");
|
||||
|
@ -87,7 +87,7 @@ void OwnedCompilationMessages::AddMessageForTesting(std::string message,
|
|||
ASSERT(mCompilationInfo.messages == nullptr);
|
||||
|
||||
// Message can only contain ascii characters.
|
||||
ASSERT(tint::text::utf8::IsASCII(message));
|
||||
ASSERT(tint::utils::utf8::IsASCII(message));
|
||||
|
||||
mMessageStrings.push_back(message);
|
||||
mMessages.push_back({nullptr, nullptr, static_cast<WGPUCompilationMessageType>(type), lineNum,
|
||||
|
|
|
@ -23,7 +23,7 @@ TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
|
|||
uint64_t lengthInUTF16;
|
||||
};
|
||||
|
||||
// Referenced from src/tint/text/unicode_test.cc
|
||||
// Referenced from src/tint/utils/unicode_test.cc
|
||||
constexpr std::array<TestCase, 12> kTestCases = {{
|
||||
{"", 0},
|
||||
{"abc", 3},
|
||||
|
@ -48,7 +48,7 @@ TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
|
|||
}
|
||||
|
||||
TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, InvalidUnicodeString) {
|
||||
// Referenced from src/tint/text/unicode_test.cc
|
||||
// Referenced from src/tint/utils/unicode_test.cc
|
||||
constexpr std::array<const char*, 12> kTestCases = {{
|
||||
"\xed\xa0\x80", // CodePoint == 0xD7FF + 1
|
||||
"\xed\xbf\xbf", // CodePoint == 0xE000 - 1
|
||||
|
|
|
@ -243,6 +243,8 @@ libtint_source_set("libtint_base_src") {
|
|||
"utils/string_stream.cc",
|
||||
"utils/string_stream.h",
|
||||
"utils/traits.h",
|
||||
"utils/unicode.cc",
|
||||
"utils/unicode.h",
|
||||
"utils/unique_allocator.h",
|
||||
"utils/unique_vector.h",
|
||||
"utils/vector.h",
|
||||
|
@ -255,8 +257,6 @@ libtint_source_set("libtint_base_src") {
|
|||
} else {
|
||||
sources += [ "diagnostic/printer_other.cc" ]
|
||||
}
|
||||
|
||||
deps = [ ":libtint_text_src" ]
|
||||
}
|
||||
|
||||
libtint_source_set("libtint_clone_context_hdrs") {
|
||||
|
@ -303,7 +303,6 @@ libtint_source_set("libtint_program_src") {
|
|||
":libtint_builtins_src",
|
||||
":libtint_constant_src",
|
||||
":libtint_sem_src",
|
||||
":libtint_text_src",
|
||||
":libtint_type_src",
|
||||
]
|
||||
}
|
||||
|
@ -333,13 +332,6 @@ libtint_source_set("libtint_inspector_src") {
|
|||
]
|
||||
}
|
||||
|
||||
libtint_source_set("libtint_text_src") {
|
||||
sources = [
|
||||
"text/unicode.cc",
|
||||
"text/unicode.h",
|
||||
]
|
||||
}
|
||||
|
||||
libtint_source_set("libtint_transform_src") {
|
||||
sources = [
|
||||
"transform/add_block_attribute.cc",
|
||||
|
@ -451,7 +443,6 @@ libtint_source_set("libtint_transform_src") {
|
|||
":libtint_builtins_src",
|
||||
":libtint_program_src",
|
||||
":libtint_sem_src",
|
||||
":libtint_text_src",
|
||||
":libtint_type_src",
|
||||
]
|
||||
}
|
||||
|
@ -974,7 +965,6 @@ libtint_source_set("libtint_wgsl_reader_src") {
|
|||
":libtint_builtins_src",
|
||||
":libtint_program_src",
|
||||
":libtint_reader_src",
|
||||
":libtint_text_src",
|
||||
":libtint_type_src",
|
||||
]
|
||||
}
|
||||
|
@ -1089,7 +1079,6 @@ source_set("libtint") {
|
|||
":libtint_inspector_src",
|
||||
":libtint_program_src",
|
||||
":libtint_sem_src",
|
||||
":libtint_text_src",
|
||||
":libtint_transform_src",
|
||||
":libtint_type_src",
|
||||
":libtint_writer_src",
|
||||
|
@ -1511,11 +1500,6 @@ if (tint_build_unittests) {
|
|||
]
|
||||
}
|
||||
|
||||
tint_unittests_source_set("tint_unittests_text_src") {
|
||||
sources = [ "text/unicode_test.cc" ]
|
||||
deps = [ ":libtint_text_src" ]
|
||||
}
|
||||
|
||||
tint_unittests_source_set("tint_unittests_transform_src") {
|
||||
sources = [
|
||||
"transform/add_block_attribute_test.cc",
|
||||
|
@ -1609,6 +1593,7 @@ if (tint_build_unittests) {
|
|||
"utils/string_test.cc",
|
||||
"utils/traits_test.cc",
|
||||
"utils/transform_test.cc",
|
||||
"utils/unicode_test.cc",
|
||||
"utils/unique_allocator_test.cc",
|
||||
"utils/unique_vector_test.cc",
|
||||
"utils/vector_test.cc",
|
||||
|
@ -2036,7 +2021,6 @@ if (tint_build_unittests) {
|
|||
":tint_unittests_inspector_src",
|
||||
":tint_unittests_resolver_src",
|
||||
":tint_unittests_sem_src",
|
||||
":tint_unittests_text_src",
|
||||
":tint_unittests_transform_src",
|
||||
":tint_unittests_type_src",
|
||||
":tint_unittests_utils_src",
|
||||
|
|
|
@ -66,10 +66,10 @@ add_library(tint_diagnostic_utils
|
|||
diagnostic/formatter.h
|
||||
diagnostic/printer.cc
|
||||
diagnostic/printer.h
|
||||
text/unicode.cc
|
||||
text/unicode.h
|
||||
utils/debugger.cc
|
||||
utils/debugger.h
|
||||
utils/unicode.cc
|
||||
utils/unicode.h
|
||||
)
|
||||
tint_default_compile_options(tint_diagnostic_utils)
|
||||
|
||||
|
@ -967,7 +967,6 @@ if(TINT_BUILD_TESTS)
|
|||
symbol_table_test.cc
|
||||
symbol_test.cc
|
||||
test_main.cc
|
||||
text/unicode_test.cc
|
||||
transform/transform_test.cc
|
||||
type/array_test.cc
|
||||
type/atomic_test.cc
|
||||
|
@ -1014,6 +1013,7 @@ if(TINT_BUILD_TESTS)
|
|||
utils/string_test.cc
|
||||
utils/traits_test.cc
|
||||
utils/transform_test.cc
|
||||
utils/unicode_test.cc
|
||||
utils/unique_allocator_test.cc
|
||||
utils/unique_vector_test.cc
|
||||
utils/vector_test.cc
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#include "absl/strings/charconv.h"
|
||||
#include "src/tint/debug.h"
|
||||
#include "src/tint/number.h"
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/utils/unicode.h"
|
||||
|
||||
namespace tint::reader::wgsl {
|
||||
namespace {
|
||||
|
@ -45,16 +45,16 @@ bool read_blankspace(std::string_view str, size_t i, bool* is_blankspace, size_t
|
|||
// See https://www.w3.org/TR/WGSL/#blankspace
|
||||
|
||||
auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
|
||||
auto [cp, n] = text::utf8::Decode(utf8, str.size() - i);
|
||||
auto [cp, n] = utils::utf8::Decode(utf8, str.size() - i);
|
||||
|
||||
if (n == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static const auto kSpace = text::CodePoint(0x0020); // space
|
||||
static const auto kHTab = text::CodePoint(0x0009); // horizontal tab
|
||||
static const auto kL2R = text::CodePoint(0x200E); // left-to-right mark
|
||||
static const auto kR2L = text::CodePoint(0x200F); // right-to-left mark
|
||||
static const auto kSpace = utils::CodePoint(0x0020); // space
|
||||
static const auto kHTab = utils::CodePoint(0x0009); // horizontal tab
|
||||
static const auto kL2R = utils::CodePoint(0x200E); // left-to-right mark
|
||||
static const auto kR2L = utils::CodePoint(0x200F); // right-to-left mark
|
||||
|
||||
if (cp == kSpace || cp == kHTab || cp == kL2R || cp == kR2L) {
|
||||
*is_blankspace = true;
|
||||
|
@ -959,12 +959,12 @@ Token Lexer::try_ident() {
|
|||
// Must begin with an XID_Source unicode character, or underscore
|
||||
{
|
||||
auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
|
||||
auto [code_point, n] = text::utf8::Decode(utf8, length() - pos());
|
||||
auto [code_point, n] = utils::utf8::Decode(utf8, length() - pos());
|
||||
if (n == 0) {
|
||||
advance(); // Skip the bad byte.
|
||||
return {Token::Type::kError, source, "invalid UTF-8"};
|
||||
}
|
||||
if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
|
||||
if (code_point != utils::CodePoint('_') && !code_point.IsXIDStart()) {
|
||||
return {};
|
||||
}
|
||||
// Consume start codepoint
|
||||
|
@ -974,7 +974,7 @@ Token Lexer::try_ident() {
|
|||
while (!is_eol()) {
|
||||
// Must continue with an XID_Continue unicode character
|
||||
auto* utf8 = reinterpret_cast<const uint8_t*>(&at(pos()));
|
||||
auto [code_point, n] = text::utf8::Decode(utf8, line().size() - pos());
|
||||
auto [code_point, n] = utils::utf8::Decode(utf8, line().size() - pos());
|
||||
if (n == 0) {
|
||||
advance(); // Skip the bad byte.
|
||||
return {Token::Type::kError, source, "invalid UTF-8"};
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/utils/unicode.h"
|
||||
|
||||
namespace tint {
|
||||
namespace {
|
||||
|
@ -27,19 +27,19 @@ bool ParseLineBreak(std::string_view str, size_t i, bool* is_line_break, size_t*
|
|||
// See https://www.w3.org/TR/WGSL/#blankspace
|
||||
|
||||
auto* utf8 = reinterpret_cast<const uint8_t*>(&str[i]);
|
||||
auto [cp, n] = text::utf8::Decode(utf8, str.size() - i);
|
||||
auto [cp, n] = utils::utf8::Decode(utf8, str.size() - i);
|
||||
|
||||
if (n == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static const auto kLF = text::CodePoint(0x000A); // line feed
|
||||
static const auto kVTab = text::CodePoint(0x000B); // vertical tab
|
||||
static const auto kFF = text::CodePoint(0x000C); // form feed
|
||||
static const auto kNL = text::CodePoint(0x0085); // next line
|
||||
static const auto kCR = text::CodePoint(0x000D); // carriage return
|
||||
static const auto kLS = text::CodePoint(0x2028); // line separator
|
||||
static const auto kPS = text::CodePoint(0x2029); // parargraph separator
|
||||
static const auto kLF = utils::CodePoint(0x000A); // line feed
|
||||
static const auto kVTab = utils::CodePoint(0x000B); // vertical tab
|
||||
static const auto kFF = utils::CodePoint(0x000C); // form feed
|
||||
static const auto kNL = utils::CodePoint(0x0085); // next line
|
||||
static const auto kCR = utils::CodePoint(0x000D); // carriage return
|
||||
static const auto kLS = utils::CodePoint(0x2028); // line separator
|
||||
static const auto kPS = utils::CodePoint(0x2029); // parargraph separator
|
||||
|
||||
if (cp == kLF || cp == kVTab || cp == kFF || cp == kNL || cp == kPS || cp == kLS) {
|
||||
*is_line_break = true;
|
||||
|
@ -54,7 +54,7 @@ bool ParseLineBreak(std::string_view str, size_t i, bool* is_line_break, size_t*
|
|||
|
||||
if (auto next_i = i + n; next_i < str.size()) {
|
||||
auto* next_utf8 = reinterpret_cast<const uint8_t*>(&str[next_i]);
|
||||
auto [next_cp, next_n] = text::utf8::Decode(next_utf8, str.size() - next_i);
|
||||
auto [next_cp, next_n] = utils::utf8::Decode(next_utf8, str.size() - next_i);
|
||||
|
||||
if (next_n == 0) {
|
||||
return false;
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "src/tint/sem/value_constructor.h"
|
||||
#include "src/tint/sem/value_conversion.h"
|
||||
#include "src/tint/switch.h"
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/utils/unicode.h"
|
||||
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer);
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data);
|
||||
|
@ -1333,7 +1333,7 @@ Transform::ApplyResult Renamer::Apply(const Program* src,
|
|||
return true;
|
||||
}
|
||||
auto name = symbol.Name();
|
||||
if (!text::utf8::IsASCII(name)) {
|
||||
if (!utils::utf8::IsASCII(name)) {
|
||||
// name is non-ascii. All of the backend keywords are ascii, so rename if we're not
|
||||
// preserving unicode symbols.
|
||||
return !preserve_unicode;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include "src/tint/sem/member_accessor_expression.h"
|
||||
#include "src/tint/sem/statement.h"
|
||||
#include "src/tint/sem/variable.h"
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/utils/unicode.h"
|
||||
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables);
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::TruncateInterstageVariables::Config);
|
||||
|
|
|
@ -24,10 +24,6 @@ StringStream::StringStream() {
|
|||
|
||||
StringStream::~StringStream() = default;
|
||||
|
||||
} // namespace tint::utils
|
||||
|
||||
namespace tint::text {
|
||||
|
||||
utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point) {
|
||||
if (code_point < 0x7f) {
|
||||
// See https://en.cppreference.com/w/cpp/language/escape
|
||||
|
@ -52,4 +48,4 @@ utils::StringStream& operator<<(utils::StringStream& out, CodePoint code_point)
|
|||
return out << "'U+" << std::hex << code_point.value << "'";
|
||||
}
|
||||
|
||||
} // namespace tint::text
|
||||
} // namespace tint::utils
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/utils/unicode.h"
|
||||
|
||||
namespace tint::utils {
|
||||
|
||||
|
@ -183,16 +183,12 @@ class StringStream {
|
|||
std::stringstream sstream_;
|
||||
};
|
||||
|
||||
} // namespace tint::utils
|
||||
|
||||
namespace tint::text {
|
||||
|
||||
/// Writes the CodePoint to the stream.
|
||||
/// @param out the stream to write to
|
||||
/// @param codepoint the CodePoint to write
|
||||
/// @returns out so calls can be chained
|
||||
utils::StringStream& operator<<(utils::StringStream& out, CodePoint codepoint);
|
||||
|
||||
} // namespace tint::text
|
||||
} // namespace tint::utils
|
||||
|
||||
#endif // SRC_TINT_UTILS_STRING_STREAM_H_
|
||||
|
|
|
@ -12,11 +12,11 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/utils/unicode.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace tint::text {
|
||||
namespace tint::utils {
|
||||
namespace {
|
||||
|
||||
struct CodePointRange {
|
||||
|
@ -418,4 +418,4 @@ bool IsASCII(std::string_view str) {
|
|||
|
||||
} // namespace utf8
|
||||
|
||||
} // namespace tint::text
|
||||
} // namespace tint::utils
|
|
@ -12,15 +12,15 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef SRC_TINT_TEXT_UNICODE_H_
|
||||
#define SRC_TINT_TEXT_UNICODE_H_
|
||||
#ifndef SRC_TINT_UTILS_UNICODE_H_
|
||||
#define SRC_TINT_UTILS_UNICODE_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
namespace tint::text {
|
||||
namespace tint::utils {
|
||||
|
||||
/// CodePoint is a unicode code point.
|
||||
struct CodePoint {
|
||||
|
@ -75,6 +75,6 @@ bool IsASCII(std::string_view);
|
|||
|
||||
} // namespace utf8
|
||||
|
||||
} // namespace tint::text
|
||||
} // namespace tint::utils
|
||||
|
||||
#endif // SRC_TINT_TEXT_UNICODE_H_
|
||||
#endif // SRC_TINT_UTILS_UNICODE_H_
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/utils/unicode.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -22,7 +22,7 @@
|
|||
/// Helper for constructing a CodePoint
|
||||
#define C(x) CodePoint(x)
|
||||
|
||||
namespace tint::text {
|
||||
namespace tint::utils {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// CodePoint character set tests
|
||||
|
@ -335,16 +335,16 @@ INSTANTIATE_TEST_SUITE_P(Hindi,
|
|||
{C(0x0928), 3}, // न
|
||||
{C(0x092e), 3}, // म
|
||||
{C(0x0938), 3}, // स
|
||||
{C(0x094d), 3}, // ्
|
||||
{C(0x094d), 3}, // ् //
|
||||
{C(0x0924), 3}, // त
|
||||
{C(0x0947), 3}, // े
|
||||
{C(0x0947), 3}, // े //
|
||||
{C(' '), 1},
|
||||
{C(0x0926), 3}, // द
|
||||
{C(0x0941), 3}, // ु
|
||||
{C(0x0941), 3}, // ु //
|
||||
{C(0x0928), 3}, // न
|
||||
{C(0x093f), 3}, // ि
|
||||
{C(0x093f), 3}, // ि //
|
||||
{C(0x092f), 3}, // य
|
||||
{C(0x093e), 3}, // ा
|
||||
{C(0x093e), 3}, // ा //
|
||||
},
|
||||
}}));
|
||||
|
||||
|
@ -487,4 +487,4 @@ INSTANTIATE_TEST_SUITE_P(Invalid,
|
|||
|
||||
} // namespace
|
||||
|
||||
} // namespace tint::text
|
||||
} // namespace tint::utils
|
Loading…
Reference in New Issue