reader/wgsl: Support unicode identifiers

Bug: tint:1437
Change-Id: Ie00ccb3e93d207111e55117dfc989f79b76164bf
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/80844
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: David Neto <dneto@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
This commit is contained in:
Ben Clayton 2022-02-18 20:39:43 +00:00 committed by Tint LUCI CQ
parent 1c6c6b19ab
commit d29d3d5740
29 changed files with 483 additions and 95 deletions

View File

@ -12,6 +12,7 @@
* Module-scope declarations can now be declared in any order. [tint:1266](crbug.com/tint/1266) * Module-scope declarations can now be declared in any order. [tint:1266](crbug.com/tint/1266)
* The `override` keyword and `@id()` attribute for pipeline-overridable constants are now supported, replacing the `@override` attribute. [tint:1403](crbug.com/tint/1403) * The `override` keyword and `@id()` attribute for pipeline-overridable constants are now supported, replacing the `@override` attribute. [tint:1403](crbug.com/tint/1403)
* Tint now supports unicode identifiers. [tint:1437](crbug.com/tint/1437)
## Changes for M99 ## Changes for M99

View File

@ -1119,7 +1119,8 @@ int main(int argc, const char** argv) {
case Format::kMsl: { case Format::kMsl: {
#if TINT_BUILD_MSL_WRITER #if TINT_BUILD_MSL_WRITER
transform_inputs.Add<tint::transform::Renamer::Config>( transform_inputs.Add<tint::transform::Renamer::Config>(
tint::transform::Renamer::Target::kMslKeywords); tint::transform::Renamer::Target::kMslKeywords,
/* preserve_unicode */ false);
transform_manager.Add<tint::transform::Renamer>(); transform_manager.Add<tint::transform::Renamer>();
#endif // TINT_BUILD_MSL_WRITER #endif // TINT_BUILD_MSL_WRITER
break; break;
@ -1132,7 +1133,8 @@ int main(int argc, const char** argv) {
case Format::kHlsl: { case Format::kHlsl: {
#if TINT_BUILD_HLSL_WRITER #if TINT_BUILD_HLSL_WRITER
transform_inputs.Add<tint::transform::Renamer::Config>( transform_inputs.Add<tint::transform::Renamer::Config>(
tint::transform::Renamer::Target::kHlslKeywords); tint::transform::Renamer::Target::kHlslKeywords,
/* preserve_unicode */ false);
transform_manager.Add<tint::transform::Renamer>(); transform_manager.Add<tint::transform::Renamer>();
#endif // TINT_BUILD_HLSL_WRITER #endif // TINT_BUILD_HLSL_WRITER
break; break;

View File

@ -21,6 +21,7 @@
#include <utility> #include <utility>
#include "src/debug.h" #include "src/debug.h"
#include "src/text/unicode.h"
namespace tint { namespace tint {
namespace reader { namespace reader {
@ -113,18 +114,10 @@ bool Lexer::is_null() const {
return (pos_ < len_) && (file_->content.data[pos_] == 0); return (pos_ < len_) && (file_->content.data[pos_] == 0);
} }
bool Lexer::is_alpha(char ch) const {
return std::isalpha(ch);
}
bool Lexer::is_digit(char ch) const { bool Lexer::is_digit(char ch) const {
return std::isdigit(ch); return std::isdigit(ch);
} }
bool Lexer::is_alphanum_underscore(char ch) const {
return is_alpha(ch) || is_digit(ch) || ch == '_';
}
bool Lexer::is_hex(char ch) const { bool Lexer::is_hex(char ch) const {
return std::isxdigit(ch); return std::isxdigit(ch);
} }
@ -733,31 +726,52 @@ Token Lexer::try_integer() {
} }
Token Lexer::try_ident() { Token Lexer::try_ident() {
// Must begin with an a-zA-Z_ auto source = begin_source();
if (!(is_alpha(file_->content.data[pos_]) || auto start = pos_;
file_->content.data[pos_] == '_')) {
// This below assumes that the size of a single std::string element is 1 byte.
static_assert(sizeof(file_->content.data[0]) == sizeof(uint8_t),
"tint::reader::wgsl requires the size of a std::string element "
"to be a single byte");
// Must begin with an XID_Source unicode character, or underscore
{
auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
auto [code_point, n] =
text::utf8::Decode(utf8, file_->content.data.size() - pos_);
if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
return {}; return {};
} }
// Consume start codepoint
auto source = begin_source(); pos_ += n;
location_.column += n;
auto s = pos_;
while (!is_eof() && is_alphanum_underscore(file_->content.data[pos_])) {
pos_++;
location_.column++;
} }
if (file_->content.data[s] == '_') { while (!is_eof()) {
// Must continue with an XID_Continue unicode character
auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
auto [code_point, n] =
text::utf8::Decode(utf8, file_->content.data.size() - pos_);
if (!code_point.IsXIDContinue()) {
break;
}
// Consume continuing codepoint
pos_ += n;
location_.column += n;
}
if (file_->content.data[start] == '_') {
// Check for an underscore on its own (special token), or a // Check for an underscore on its own (special token), or a
// double-underscore (not allowed). // double-underscore (not allowed).
if ((pos_ == s + 1) || (file_->content.data[s + 1] == '_')) { if ((pos_ == start + 1) || (file_->content.data[start + 1] == '_')) {
location_.column -= (pos_ - s); location_.column -= (pos_ - start);
pos_ = s; pos_ = start;
return {}; return {};
} }
} }
auto str = file_->content.data_view.substr(s, pos_ - s); auto str = file_->content.data_view.substr(start, pos_ - start);
end_source(source); end_source(source);
auto t = check_keyword(source, str); auto t = check_keyword(source, str);

View File

@ -76,25 +76,18 @@ class Lexer {
/// it is not null. /// it is not null.
bool is_null() const; bool is_null() const;
/// @param ch a character /// @param ch a character
/// @returns true if 'ch' is an alphabetic character
bool is_alpha(char ch) const;
/// @param ch a character
/// @returns true if 'ch' is a decimal digit /// @returns true if 'ch' is a decimal digit
bool is_digit(char ch) const; bool is_digit(char ch) const;
/// @param ch a character /// @param ch a character
/// @returns true if 'ch' is a hexadecimal digit /// @returns true if 'ch' is a hexadecimal digit
bool is_hex(char ch) const; bool is_hex(char ch) const;
/// @param ch a character
/// @returns true if 'ch' is a digit, an alphabetic character,
/// or an underscore.
bool is_alphanum_underscore(char ch) const;
bool matches(size_t pos, std::string_view substr); bool matches(size_t pos, std::string_view substr);
/// The source file content /// The source file content
Source::File const* const file_; Source::File const* const file_;
/// The length of the input /// The length of the input
uint32_t len_ = 0; uint32_t len_ = 0;
/// The current position within the input /// The current position in utf-8 code units (bytes) within the input
uint32_t pos_ = 0; uint32_t pos_ = 0;
/// The current location within the input /// The current location within the input
Source::Location location_; Source::Location location_;

View File

@ -315,8 +315,8 @@ INSTANTIATE_TEST_SUITE_P(
"2.5E+ 123", "2.5E+ 123",
"2.5E- 123")); "2.5E- 123"));
using IdentifierTest = testing::TestWithParam<const char*>; using AsciiIdentifierTest = testing::TestWithParam<const char*>;
TEST_P(IdentifierTest, Parse) { TEST_P(AsciiIdentifierTest, Parse) {
Source::File file("", GetParam()); Source::File file("", GetParam());
Lexer l(&file); Lexer l(&file);
@ -329,7 +329,7 @@ TEST_P(IdentifierTest, Parse) {
EXPECT_EQ(t.to_str(), GetParam()); EXPECT_EQ(t.to_str(), GetParam());
} }
INSTANTIATE_TEST_SUITE_P(LexerTest, INSTANTIATE_TEST_SUITE_P(LexerTest,
IdentifierTest, AsciiIdentifierTest,
testing::Values("a", testing::Values("a",
"test", "test",
"test01", "test01",
@ -342,6 +342,57 @@ INSTANTIATE_TEST_SUITE_P(LexerTest,
"ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"alldigits_0123456789")); "alldigits_0123456789"));
struct UnicodeCase {
const char* utf8;
size_t code_units;
};
using UnicodeIdentifierTest = testing::TestWithParam<UnicodeCase>;
TEST_P(UnicodeIdentifierTest, Parse) {
Source::File file("", GetParam().utf8);
Lexer l(&file);
auto t = l.next();
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + GetParam().code_units);
EXPECT_EQ(t.to_str(), GetParam().utf8);
}
INSTANTIATE_TEST_SUITE_P(
LexerTest,
UnicodeIdentifierTest,
testing::ValuesIn({
UnicodeCase{// "𝐢𝐝𝐞𝐧𝐭𝐢𝐟𝐢𝐞𝐫"
"\xf0\x9d\x90\xa2\xf0\x9d\x90\x9d\xf0\x9d\x90\x9e\xf0\x9d"
"\x90\xa7\xf0\x9d\x90\xad\xf0\x9d\x90\xa2\xf0\x9d\x90\x9f"
"\xf0\x9d\x90\xa2\xf0\x9d\x90\x9e\xf0\x9d\x90\xab",
40},
UnicodeCase{// "𝑖𝑑𝑒𝑛𝑡𝑖𝑓𝑖𝑒𝑟"
"\xf0\x9d\x91\x96\xf0\x9d\x91\x91\xf0\x9d\x91\x92\xf0\x9d"
"\x91\x9b\xf0\x9d\x91\xa1\xf0\x9d\x91\x96\xf0\x9d\x91\x93"
"\xf0\x9d\x91\x96\xf0\x9d\x91\x92\xf0\x9d\x91\x9f",
40},
UnicodeCase{
// ""
"\xef\xbd\x89\xef\xbd\x84\xef\xbd\x85\xef\xbd\x8e\xef\xbd\x94\xef"
"\xbd\x89\xef\xbd\x86\xef\xbd\x89\xef\xbd\x85\xef\xbd\x92",
30},
UnicodeCase{// "𝕚𝕕𝕖𝕟𝕥𝕚𝕗𝕚𝕖𝕣𝟙𝟚𝟛"
"\xf0\x9d\x95\x9a\xf0\x9d\x95\x95\xf0\x9d\x95\x96\xf0\x9d"
"\x95\x9f\xf0\x9d\x95\xa5\xf0\x9d\x95\x9a\xf0\x9d\x95\x97"
"\xf0\x9d\x95\x9a\xf0\x9d\x95\x96\xf0\x9d\x95\xa3\xf0\x9d"
"\x9f\x99\xf0\x9d\x9f\x9a\xf0\x9d\x9f\x9b",
52},
UnicodeCase{
// "𝖎𝖉𝖊𝖓𝖙𝖎𝖋𝖎𝖊𝖗123"
"\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
"\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
"\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33",
43},
}));
TEST_F(LexerTest, IdentifierTest_SingleUnderscoreDoesNotMatch) { TEST_F(LexerTest, IdentifierTest_SingleUnderscoreDoesNotMatch) {
Source::File file("", "_"); Source::File file("", "_");
Lexer l(&file); Lexer l(&file);

View File

@ -14,6 +14,7 @@
#include "src/ast/workgroup_attribute.h" #include "src/ast/workgroup_attribute.h"
#include "src/reader/wgsl/parser_impl_test_helper.h" #include "src/reader/wgsl/parser_impl_test_helper.h"
#include "src/utils/string.h"
namespace tint { namespace tint {
namespace reader { namespace reader {
@ -48,6 +49,51 @@ TEST_F(ParserImplTest, FunctionDecl) {
EXPECT_TRUE(body->statements[0]->Is<ast::ReturnStatement>()); EXPECT_TRUE(body->statements[0]->Is<ast::ReturnStatement>());
} }
TEST_F(ParserImplTest, FunctionDecl_Unicode) {
const std::string function_ident = // "𝗳𝘂𝗻𝗰𝘁𝗶𝗼𝗻"
"\xf0\x9d\x97\xb3\xf0\x9d\x98\x82\xf0\x9d\x97\xbb\xf0\x9d\x97\xb0\xf0\x9d"
"\x98\x81\xf0\x9d\x97\xb6\xf0\x9d\x97\xbc\xf0\x9d\x97\xbb";
const std::string param_a_ident = // "𝓹𝓪𝓻𝓪𝓶_𝓪"
"\xf0\x9d\x93\xb9\xf0\x9d\x93\xaa\xf0\x9d\x93\xbb\xf0\x9d\x93\xaa\xf0\x9d"
"\x93\xb6\x5f\xf0\x9d\x93\xaa";
const std::string param_b_ident = // "𝕡𝕒𝕣𝕒𝕞_𝕓"
"\xf0\x9d\x95\xa1\xf0\x9d\x95\x92\xf0\x9d\x95\xa3\xf0\x9d\x95\x92\xf0\x9d"
"\x95\x9e\x5f\xf0\x9d\x95\x93";
std::string src = "fn $function($param_a : i32, $param_b : f32) { return; }";
src = utils::ReplaceAll(src, "$function", function_ident);
src = utils::ReplaceAll(src, "$param_a", param_a_ident);
src = utils::ReplaceAll(src, "$param_b", param_b_ident);
auto p = parser(src);
auto attrs = p->attribute_list();
EXPECT_FALSE(p->has_error()) << p->error();
ASSERT_FALSE(attrs.errored);
EXPECT_FALSE(attrs.matched);
auto f = p->function_decl(attrs.value);
EXPECT_FALSE(p->has_error()) << p->error();
EXPECT_FALSE(f.errored);
EXPECT_TRUE(f.matched);
ASSERT_NE(f.value, nullptr);
EXPECT_EQ(f->symbol, p->builder().Symbols().Get(function_ident));
ASSERT_NE(f->return_type, nullptr);
EXPECT_TRUE(f->return_type->Is<ast::Void>());
ASSERT_EQ(f->params.size(), 2u);
EXPECT_EQ(f->params[0]->symbol, p->builder().Symbols().Get(param_a_ident));
EXPECT_EQ(f->params[1]->symbol, p->builder().Symbols().Get(param_b_ident));
ASSERT_NE(f->return_type, nullptr);
EXPECT_TRUE(f->return_type->Is<ast::Void>());
auto* body = f->body;
ASSERT_EQ(body->statements.size(), 1u);
EXPECT_TRUE(body->statements[0]->Is<ast::ReturnStatement>());
}
TEST_F(ParserImplTest, FunctionDecl_AttributeList) { TEST_F(ParserImplTest, FunctionDecl_AttributeList) {
auto p = parser("@workgroup_size(2, 3, 4) fn main() { return; }"); auto p = parser("@workgroup_size(2, 3, 4) fn main() { return; }");
auto attrs = p->attribute_list(); auto attrs = p->attribute_list();

View File

@ -14,6 +14,7 @@
#include "src/ast/struct_block_attribute.h" #include "src/ast/struct_block_attribute.h"
#include "src/reader/wgsl/parser_impl_test_helper.h" #include "src/reader/wgsl/parser_impl_test_helper.h"
#include "src/utils/string.h"
namespace tint { namespace tint {
namespace reader { namespace reader {
@ -42,6 +43,46 @@ struct S {
EXPECT_EQ(s->members[1]->symbol, p->builder().Symbols().Register("b")); EXPECT_EQ(s->members[1]->symbol, p->builder().Symbols().Register("b"));
} }
TEST_F(ParserImplTest, StructDecl_Unicode_Parses) {
const std::string struct_ident = // "𝓼𝓽𝓻𝓾𝓬𝓽𝓾𝓻𝓮"
"\xf0\x9d\x93\xbc\xf0\x9d\x93\xbd\xf0\x9d\x93\xbb\xf0\x9d\x93\xbe\xf0\x9d"
"\x93\xac\xf0\x9d\x93\xbd\xf0\x9d\x93\xbe\xf0\x9d\x93\xbb\xf0\x9d\x93"
"\xae";
const std::string member_a_ident = // "𝕞𝕖𝕞𝕓𝕖𝕣_𝕒"
"\xf0\x9d\x95\x9e\xf0\x9d\x95\x96\xf0\x9d\x95\x9e\xf0\x9d\x95\x93\xf0\x9d"
"\x95\x96\xf0\x9d\x95\xa3\x5f\xf0\x9d\x95\x92";
const std::string member_b_ident = // "𝔪𝔢𝔪𝔟𝔢𝔯_𝔟"
"\xf0\x9d\x94\xaa\xf0\x9d\x94\xa2\xf0\x9d\x94\xaa\xf0\x9d\x94\x9f\xf0\x9d"
"\x94\xa2\xf0\x9d\x94\xaf\x5f\xf0\x9d\x94\x9f";
std::string src = R"(
struct $struct {
$member_a : i32;
$member_b : f32;
})";
src = utils::ReplaceAll(src, "$struct", struct_ident);
src = utils::ReplaceAll(src, "$member_a", member_a_ident);
src = utils::ReplaceAll(src, "$member_b", member_b_ident);
auto p = parser(src);
auto attrs = p->attribute_list();
EXPECT_FALSE(attrs.errored);
EXPECT_FALSE(attrs.matched);
ASSERT_EQ(attrs.value.size(), 0u);
auto s = p->struct_decl(attrs.value);
EXPECT_FALSE(p->has_error());
EXPECT_FALSE(s.errored);
EXPECT_TRUE(s.matched);
ASSERT_NE(s.value, nullptr);
ASSERT_EQ(s->name, p->builder().Symbols().Register(struct_ident));
ASSERT_EQ(s->members.size(), 2u);
EXPECT_EQ(s->members[0]->symbol,
p->builder().Symbols().Register(member_a_ident));
EXPECT_EQ(s->members[1]->symbol,
p->builder().Symbols().Register(member_b_ident));
}
TEST_F(ParserImplTest, StructDecl_ParsesWithAttribute) { TEST_F(ParserImplTest, StructDecl_ParsesWithAttribute) {
auto p = parser(R"( auto p = parser(R"(
[[block]] struct B { [[block]] struct B {

View File

@ -34,7 +34,7 @@ TEST_F(ParserImplTest, TypeDecl_ParsesType) {
EXPECT_EQ(t.value->source.range, (Source::Range{{1u, 1u}, {1u, 13u}})); EXPECT_EQ(t.value->source.range, (Source::Range{{1u, 1u}, {1u, 13u}}));
} }
TEST_F(ParserImplTest, TypeDecl_ParsesStruct_Ident) { TEST_F(ParserImplTest, TypeDecl_Parses_Ident) {
auto p = parser("type a = B"); auto p = parser("type a = B");
auto t = p->type_alias(); auto t = p->type_alias();
@ -49,6 +49,25 @@ TEST_F(ParserImplTest, TypeDecl_ParsesStruct_Ident) {
EXPECT_EQ(alias->source.range, (Source::Range{{1u, 1u}, {1u, 11u}})); EXPECT_EQ(alias->source.range, (Source::Range{{1u, 1u}, {1u, 11u}}));
} }
TEST_F(ParserImplTest, TypeDecl_Unicode_Parses_Ident) {
const std::string ident = // "𝓶𝔂_𝓽𝔂𝓹𝓮"
"\xf0\x9d\x93\xb6\xf0\x9d\x94\x82\x5f\xf0\x9d\x93\xbd\xf0\x9d\x94\x82\xf0"
"\x9d\x93\xb9\xf0\x9d\x93\xae";
auto p = parser("type " + ident + " = i32");
auto t = p->type_alias();
EXPECT_FALSE(p->has_error());
EXPECT_FALSE(t.errored);
EXPECT_TRUE(t.matched);
ASSERT_NE(t.value, nullptr);
ASSERT_TRUE(t.value->Is<ast::Alias>());
auto* alias = t.value->As<ast::Alias>();
EXPECT_EQ(p->builder().Symbols().NameFor(alias->name), ident);
EXPECT_TRUE(alias->type->Is<ast::I32>());
EXPECT_EQ(alias->source.range, (Source::Range{{1u, 1u}, {1u, 37u}}));
}
TEST_F(ParserImplTest, TypeDecl_MissingIdent) { TEST_F(ParserImplTest, TypeDecl_MissingIdent) {
auto p = parser("type = i32"); auto p = parser("type = i32");
auto t = p->type_alias(); auto t = p->type_alias();

View File

@ -18,7 +18,6 @@ namespace tint {
namespace reader { namespace reader {
namespace wgsl { namespace wgsl {
namespace { namespace {
TEST_F(ParserImplTest, VariableDecl_Parses) { TEST_F(ParserImplTest, VariableDecl_Parses) {
auto p = parser("var my_var : f32"); auto p = parser("var my_var : f32");
auto v = p->variable_decl(); auto v = p->variable_decl();
@ -33,6 +32,25 @@ TEST_F(ParserImplTest, VariableDecl_Parses) {
EXPECT_EQ(v->type->source.range, (Source::Range{{1u, 14u}, {1u, 17u}})); EXPECT_EQ(v->type->source.range, (Source::Range{{1u, 14u}, {1u, 17u}}));
} }
TEST_F(ParserImplTest, VariableDecl_Unicode_Parses) {
const std::string ident = // "𝖎𝖉𝖊𝖓𝖙𝖎𝖋𝖎𝖊𝖗123"
"\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
"\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
"\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33";
auto p = parser("var " + ident + " : f32");
auto v = p->variable_decl();
EXPECT_FALSE(p->has_error());
EXPECT_TRUE(v.matched);
EXPECT_FALSE(v.errored);
EXPECT_EQ(v->name, ident);
EXPECT_NE(v->type, nullptr);
EXPECT_TRUE(v->type->Is<ast::F32>());
EXPECT_EQ(v->source.range, (Source::Range{{1u, 5u}, {1u, 48u}}));
EXPECT_EQ(v->type->source.range, (Source::Range{{1u, 51u}, {1u, 54u}}));
}
TEST_F(ParserImplTest, VariableDecl_Inferred_Parses) { TEST_F(ParserImplTest, VariableDecl_Inferred_Parses) {
auto p = parser("var my_var = 1.0"); auto p = parser("var my_var = 1.0");
auto v = p->variable_decl(/*allow_inferred = */ true); auto v = p->variable_decl(/*allow_inferred = */ true);

View File

@ -27,7 +27,7 @@ namespace tint {
/// Source describes a range of characters within a source file. /// Source describes a range of characters within a source file.
class Source { class Source {
public: public:
/// FileContent describes the content of a source file. /// FileContent describes the content of a source file encoded using utf-8.
class FileContent { class FileContent {
public: public:
/// Constructs the FileContent with the given file content. /// Constructs the FileContent with the given file content.
@ -78,7 +78,8 @@ class Source {
public: public:
/// the 1-based line number. 0 represents no line information. /// the 1-based line number. 0 represents no line information.
size_t line = 0; size_t line = 0;
/// the 1-based column number. 0 represents no column information. /// the 1-based column number in utf8-code units (bytes).
/// 0 represents no column information.
size_t column = 0; size_t column = 0;
/// Returns true of `this` location is lexicographically less than `rhs` /// Returns true of `this` location is lexicographically less than `rhs`

View File

@ -427,7 +427,9 @@ std::ostream& operator<<(std::ostream& out, CodePoint code_point) {
return out << "'U+" << std::hex << code_point.value << "'"; return out << "'U+" << std::hex << code_point.value << "'";
} }
std::pair<CodePoint, size_t> utf8::Decode(const uint8_t* ptr, size_t len) { namespace utf8 {
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len) {
if (len < 1) { if (len < 1) {
return {}; return {};
} }
@ -490,4 +492,15 @@ std::pair<CodePoint, size_t> utf8::Decode(const uint8_t* ptr, size_t len) {
return {c, n}; return {c, n};
} }
bool IsASCII(std::string_view str) {
for (auto c : str) {
if (c & 0x80) {
return false;
}
}
return true;
}
} // namespace utf8
} // namespace tint::text } // namespace tint::text

View File

@ -69,6 +69,10 @@ namespace utf8 {
/// If the next code point cannot be decoded then returns [0,0]. /// If the next code point cannot be decoded then returns [0,0].
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len); std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len);
/// @returns true if all the utf-8 code points in the string are ASCII
/// (code-points 0x00..0x7f).
bool IsASCII(std::string_view);
} // namespace utf8 } // namespace utf8
} // namespace tint::text } // namespace tint::text

View File

@ -56,7 +56,8 @@ Output Glsl::Run(const Program* in, const DataMap& inputs) const {
data.Add<SingleEntryPoint::Config>(cfg->entry_point); data.Add<SingleEntryPoint::Config>(cfg->entry_point);
} }
manager.Add<Renamer>(); manager.Add<Renamer>();
data.Add<Renamer::Config>(Renamer::Target::kGlslKeywords); data.Add<Renamer::Config>(Renamer::Target::kGlslKeywords,
/* preserve_unicode */ false);
manager.Add<Unshadow>(); manager.Add<Unshadow>();
// Attempt to convert `loop`s into for-loops. This is to try and massage the // Attempt to convert `loop`s into for-loops. This is to try and massage the

View File

@ -21,13 +21,13 @@
#include "src/program_builder.h" #include "src/program_builder.h"
#include "src/sem/call.h" #include "src/sem/call.h"
#include "src/sem/member_accessor_expression.h" #include "src/sem/member_accessor_expression.h"
#include "src/text/unicode.h"
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer); TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer);
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data); TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data);
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Config); TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Config);
namespace tint { namespace tint::transform {
namespace transform {
namespace { namespace {
@ -1245,7 +1245,7 @@ Renamer::Data::Data(Remappings&& r) : remappings(std::move(r)) {}
Renamer::Data::Data(const Data&) = default; Renamer::Data::Data(const Data&) = default;
Renamer::Data::~Data() = default; Renamer::Data::~Data() = default;
Renamer::Config::Config(Target t) : target(t) {} Renamer::Config::Config(Target t, bool pu) : target(t), preserve_unicode(pu) {}
Renamer::Config::Config(const Config&) = default; Renamer::Config::Config(const Config&) = default;
Renamer::Config::~Config() = default; Renamer::Config::~Config() = default;
@ -1293,13 +1293,16 @@ Output Renamer::Run(const Program* in, const DataMap& inputs) const {
Data::Remappings remappings; Data::Remappings remappings;
Target target = Target::kAll; Target target = Target::kAll;
bool preserve_unicode = false;
if (auto* cfg = inputs.Get<Config>()) { if (auto* cfg = inputs.Get<Config>()) {
target = cfg->target; target = cfg->target;
preserve_unicode = cfg->preserve_unicode;
} }
ctx.ReplaceAll([&](Symbol sym_in) { ctx.ReplaceAll([&](Symbol sym_in) {
auto name_in = ctx.src->Symbols().NameFor(sym_in); auto name_in = ctx.src->Symbols().NameFor(sym_in);
if (preserve_unicode || text::utf8::IsASCII(name_in)) {
switch (target) { switch (target) {
case Target::kAll: case Target::kAll:
// Always rename. // Always rename.
@ -1336,6 +1339,7 @@ Output Renamer::Run(const Program* in, const DataMap& inputs) const {
} }
break; break;
} }
}
auto sym_out = ctx.dst->Sym(); auto sym_out = ctx.dst->Sym();
remappings.emplace(name_in, ctx.dst->Symbols().NameFor(sym_out)); remappings.emplace(name_in, ctx.dst->Symbols().NameFor(sym_out));
@ -1359,5 +1363,4 @@ Output Renamer::Run(const Program* in, const DataMap& inputs) const {
std::make_unique<Data>(std::move(remappings))); std::make_unique<Data>(std::move(remappings)));
} }
} // namespace transform } // namespace tint::transform
} // namespace tint

View File

@ -20,8 +20,7 @@
#include "src/transform/transform.h" #include "src/transform/transform.h"
namespace tint { namespace tint::transform {
namespace transform {
/// Renamer is a Transform that renames all the symbols in a program. /// Renamer is a Transform that renames all the symbols in a program.
class Renamer : public Castable<Renamer, Transform> { class Renamer : public Castable<Renamer, Transform> {
@ -63,7 +62,9 @@ class Renamer : public Castable<Renamer, Transform> {
struct Config : public Castable<Config, transform::Data> { struct Config : public Castable<Config, transform::Data> {
/// Constructor /// Constructor
/// @param tgt the targets to rename /// @param tgt the targets to rename
explicit Config(Target tgt); /// @param keep_unicode if false, symbols with non-ascii code-points are
/// renamed
explicit Config(Target tgt, bool keep_unicode = false);
/// Copy constructor /// Copy constructor
Config(const Config&); Config(const Config&);
@ -73,6 +74,9 @@ class Renamer : public Castable<Renamer, Transform> {
/// The targets to rename /// The targets to rename
Target const target = Target::kAll; Target const target = Target::kAll;
/// If false, symbols with non-ascii code-points are renamed.
bool preserve_unicode = false;
}; };
/// Constructor using a the configuration provided in the input Data /// Constructor using a the configuration provided in the input Data
@ -88,7 +92,6 @@ class Renamer : public Castable<Renamer, Transform> {
Output Run(const Program* program, const DataMap& data = {}) const override; Output Run(const Program* program, const DataMap& data = {}) const override;
}; };
} // namespace transform } // namespace tint::transform
} // namespace tint
#endif // SRC_TRANSFORM_RENAMER_H_ #endif // SRC_TRANSFORM_RENAMER_H_

View File

@ -23,6 +23,11 @@ namespace tint {
namespace transform { namespace transform {
namespace { namespace {
constexpr const char kUnicodeIdentifier[] = // "𝖎𝖉𝖊𝖓𝖙𝖎𝖋𝖎𝖊𝖗123"
"\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
"\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
"\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33";
using ::testing::ContainerEq; using ::testing::ContainerEq;
using RenamerTest = TransformTest; using RenamerTest = TransformTest;
@ -184,6 +189,25 @@ fn tint_symbol() {
EXPECT_THAT(data->remappings, ContainerEq(expected_remappings)); EXPECT_THAT(data->remappings, ContainerEq(expected_remappings));
} }
TEST_F(RenamerTest, PreserveUnicode) {
auto src = R"(
@stage(fragment)
fn frag_main() {
var )" + std::string(kUnicodeIdentifier) +
R"( : i32;
}
)";
auto expect = src;
DataMap inputs;
inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords,
/* preserve_unicode */ true);
auto got = Run<Renamer>(src, inputs);
EXPECT_EQ(expect, str(got));
}
TEST_F(RenamerTest, AttemptSymbolCollision) { TEST_F(RenamerTest, AttemptSymbolCollision) {
auto* src = R"( auto* src = R"(
@stage(vertex) @stage(vertex)
@ -244,7 +268,8 @@ fn frag_main() {
)"; )";
DataMap inputs; DataMap inputs;
inputs.Add<Renamer::Config>(Renamer::Target::kGlslKeywords); inputs.Add<Renamer::Config>(Renamer::Target::kGlslKeywords,
/* preserve_unicode */ false);
auto got = Run<Renamer>(src, inputs); auto got = Run<Renamer>(src, inputs);
EXPECT_EQ(expect, str(got)); EXPECT_EQ(expect, str(got));
@ -269,7 +294,8 @@ fn frag_main() {
)"; )";
DataMap inputs; DataMap inputs;
inputs.Add<Renamer::Config>(Renamer::Target::kHlslKeywords); inputs.Add<Renamer::Config>(Renamer::Target::kHlslKeywords,
/* preserve_unicode */ false);
auto got = Run<Renamer>(src, inputs); auto got = Run<Renamer>(src, inputs);
EXPECT_EQ(expect, str(got)); EXPECT_EQ(expect, str(got));
@ -294,7 +320,8 @@ fn frag_main() {
)"; )";
DataMap inputs; DataMap inputs;
inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords); inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords,
/* preserve_unicode */ false);
auto got = Run<Renamer>(src, inputs); auto got = Run<Renamer>(src, inputs);
EXPECT_EQ(expect, str(got)); EXPECT_EQ(expect, str(got));
@ -528,7 +555,8 @@ INSTANTIATE_TEST_SUITE_P(RenamerTestGlsl,
// "void", // WGSL keyword // "void", // WGSL keyword
"volatile", "volatile",
// "while", // WGSL keyword // "while", // WGSL keyword
"writeonly")); "writeonly",
kUnicodeIdentifier));
INSTANTIATE_TEST_SUITE_P(RenamerTestHlsl, INSTANTIATE_TEST_SUITE_P(RenamerTestHlsl,
RenamerTestHlsl, RenamerTestHlsl,
@ -1142,8 +1170,9 @@ INSTANTIATE_TEST_SUITE_P(RenamerTestHlsl,
"vertexshader", "vertexshader",
"virtual", "virtual",
// "void", // WGSL keyword // "void", // WGSL keyword
"volatile")); "volatile",
// "while" // WGSL reserved keyword // "while" // WGSL reserved keyword
kUnicodeIdentifier));
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
RenamerTestMsl, RenamerTestMsl,
@ -1425,7 +1454,9 @@ INSTANTIATE_TEST_SUITE_P(
"M_2_PI_H", "M_2_PI_H",
"M_2_SQRTPI_H", "M_2_SQRTPI_H",
"M_SQRT2_H", "M_SQRT2_H",
"M_SQRT1_2_H")); "M_SQRT1_2_H",
// "while" // WGSL reserved keyword
kUnicodeIdentifier));
} // namespace } // namespace
} // namespace transform } // namespace transform

View File

@ -89,7 +89,8 @@ class TestHelperBase : public BODY, public ProgramBuilder {
transform::Manager transform_manager; transform::Manager transform_manager;
transform::DataMap transform_data; transform::DataMap transform_data;
transform_data.Add<transform::Renamer::Config>( transform_data.Add<transform::Renamer::Config>(
transform::Renamer::Target::kHlslKeywords); transform::Renamer::Target::kHlslKeywords,
/* preserve_unicode */ true);
transform_manager.Add<tint::transform::Renamer>(); transform_manager.Add<tint::transform::Renamer>();
auto result = auto result =
transform_manager.Run(&sanitized_result.program, transform_data); transform_manager.Run(&sanitized_result.program, transform_data);

View File

@ -0,0 +1,11 @@
// Øꚫ쁹Ǵ𐌒岾🥍又ᮗ
/*
/* 👋🌎 */
नमस्ते दुनिया
*/
@stage(fragment)
fn /* こんにちは世界 */ main( /* 你好世界 */ ) {
// 안녕하세요 세계
}

View File

@ -0,0 +1,10 @@
#version 310 es
precision mediump float;
void tint_symbol() {
}
void main() {
tint_symbol();
return;
}

View File

@ -0,0 +1,3 @@
void main() {
return;
}

View File

@ -0,0 +1,7 @@
#include <metal_stdlib>
using namespace metal;
fragment void tint_symbol() {
return;
}

View File

@ -0,0 +1,16 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 5
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main"
OpExecutionMode %main OriginUpperLeft
OpName %main "main"
%void = OpTypeVoid
%1 = OpTypeFunction %void
%main = OpFunction %void None %1
%4 = OpLabel
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,3 @@
@stage(fragment)
fn main() {
}

View File

@ -0,0 +1,12 @@
type 𝓉𝓎𝓅_𝒶 = i32;
type 𝐭𝐲𝐩𝐞_𝐛 = f32;
fn 𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷(ᵖᵃʳᵃᵐ : 𝓉𝓎𝓅_𝒶) -> 𝐭𝐲𝐩𝐞_𝐛 {
return 𝐭𝐲𝐩𝐞_𝐛(ᵖᵃʳᵃᵐ);
}
@stage(fragment)
fn 𝕖𝕟𝕥𝕣𝕪𝕡𝕠𝕚𝕟𝕥() {
var 𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚 : 𝓉𝓎𝓅_𝒶;
let 𝖗𝖊𝖘𝖚𝖑𝖙 = 𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷(𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚);
}

View File

@ -0,0 +1,16 @@
#version 310 es
precision mediump float;
float tint_symbol_2(int tint_symbol_3) {
return float(tint_symbol_3);
}
void tint_symbol_4() {
int tint_symbol_5 = 0;
float tint_symbol_6 = tint_symbol_2(tint_symbol_5);
}
void main() {
tint_symbol_4();
return;
}

View File

@ -0,0 +1,9 @@
float tint_symbol_2(int tint_symbol_3) {
return float(tint_symbol_3);
}
void tint_symbol_4() {
int tint_symbol_5 = 0;
const float tint_symbol_6 = tint_symbol_2(tint_symbol_5);
return;
}

View File

@ -0,0 +1,13 @@
#include <metal_stdlib>
using namespace metal;
float tint_symbol_2(int tint_symbol_3) {
return float(tint_symbol_3);
}
fragment void tint_symbol_4() {
int tint_symbol_5 = 0;
float const tint_symbol_6 = tint_symbol_2(tint_symbol_5);
return;
}

View File

@ -0,0 +1,33 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 17
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %________________________________________ "𝕖𝕟𝕥𝕣𝕪𝕡𝕠𝕚𝕟𝕥"
OpExecutionMode %________________________________________ OriginUpperLeft
OpName %________________________________ "𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷"
OpName %______________ "ᵖᵃʳᵃᵐ"
OpName %________________________________________ "𝕖𝕟𝕥𝕣𝕪𝕡𝕠𝕚𝕟𝕥"
OpName %_________________________________0 "𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚"
%float = OpTypeFloat 32
%int = OpTypeInt 32 1
%1 = OpTypeFunction %float %int
%void = OpTypeVoid
%8 = OpTypeFunction %void
%_ptr_Function_int = OpTypePointer Function %int
%14 = OpConstantNull %int
%________________________________ = OpFunction %float None %1
%______________ = OpFunctionParameter %int
%6 = OpLabel
%7 = OpConvertSToF %float %______________
OpReturnValue %7
OpFunctionEnd
%________________________________________ = OpFunction %void None %8
%11 = OpLabel
%_________________________________0 = OpVariable %_ptr_Function_int Function %14
%16 = OpLoad %int %_________________________________0
%15 = OpFunctionCall %float %________________________________ %16
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,13 @@
type 𝓉𝓎𝓅_𝒶 = i32;
type 𝐭𝐲𝐩𝐞_𝐛 = f32;
fn 𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷(ᵖᵃʳᵃᵐ : 𝓉𝓎𝓅_𝒶) -> 𝐭𝐲𝐩𝐞_𝐛 {
return 𝐭𝐲𝐩𝐞_𝐛(ᵖᵃʳᵃᵐ);
}
@stage(fragment)
fn 𝕖𝕟𝕥𝕣𝕪𝕡𝕠𝕚𝕟𝕥() {
var 𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚 : 𝓉𝓎𝓅_𝒶;
let 𝖗𝖊𝖘𝖚𝖑𝖙 = 𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷(𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚);
}