reader/wgsl: Support unicode identifiers
Bug: tint:1437 Change-Id: Ie00ccb3e93d207111e55117dfc989f79b76164bf Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/80844 Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: David Neto <dneto@google.com> Commit-Queue: Ben Clayton <bclayton@google.com>
This commit is contained in:
parent
1c6c6b19ab
commit
d29d3d5740
|
@ -12,6 +12,7 @@
|
|||
|
||||
* Module-scope declarations can now be declared in any order. [tint:1266](crbug.com/tint/1266)
|
||||
* The `override` keyword and `@id()` attribute for pipeline-overridable constants are now supported, replacing the `@override` attribute. [tint:1403](crbug.com/tint/1403)
|
||||
* Tint now supports unicode identifiers. [tint:1437](crbug.com/tint/1437)
|
||||
|
||||
## Changes for M99
|
||||
|
||||
|
|
|
@ -1119,7 +1119,8 @@ int main(int argc, const char** argv) {
|
|||
case Format::kMsl: {
|
||||
#if TINT_BUILD_MSL_WRITER
|
||||
transform_inputs.Add<tint::transform::Renamer::Config>(
|
||||
tint::transform::Renamer::Target::kMslKeywords);
|
||||
tint::transform::Renamer::Target::kMslKeywords,
|
||||
/* preserve_unicode */ false);
|
||||
transform_manager.Add<tint::transform::Renamer>();
|
||||
#endif // TINT_BUILD_MSL_WRITER
|
||||
break;
|
||||
|
@ -1132,7 +1133,8 @@ int main(int argc, const char** argv) {
|
|||
case Format::kHlsl: {
|
||||
#if TINT_BUILD_HLSL_WRITER
|
||||
transform_inputs.Add<tint::transform::Renamer::Config>(
|
||||
tint::transform::Renamer::Target::kHlslKeywords);
|
||||
tint::transform::Renamer::Target::kHlslKeywords,
|
||||
/* preserve_unicode */ false);
|
||||
transform_manager.Add<tint::transform::Renamer>();
|
||||
#endif // TINT_BUILD_HLSL_WRITER
|
||||
break;
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <utility>
|
||||
|
||||
#include "src/debug.h"
|
||||
#include "src/text/unicode.h"
|
||||
|
||||
namespace tint {
|
||||
namespace reader {
|
||||
|
@ -113,18 +114,10 @@ bool Lexer::is_null() const {
|
|||
return (pos_ < len_) && (file_->content.data[pos_] == 0);
|
||||
}
|
||||
|
||||
bool Lexer::is_alpha(char ch) const {
|
||||
return std::isalpha(ch);
|
||||
}
|
||||
|
||||
bool Lexer::is_digit(char ch) const {
|
||||
return std::isdigit(ch);
|
||||
}
|
||||
|
||||
bool Lexer::is_alphanum_underscore(char ch) const {
|
||||
return is_alpha(ch) || is_digit(ch) || ch == '_';
|
||||
}
|
||||
|
||||
bool Lexer::is_hex(char ch) const {
|
||||
return std::isxdigit(ch);
|
||||
}
|
||||
|
@ -733,31 +726,52 @@ Token Lexer::try_integer() {
|
|||
}
|
||||
|
||||
Token Lexer::try_ident() {
|
||||
// Must begin with an a-zA-Z_
|
||||
if (!(is_alpha(file_->content.data[pos_]) ||
|
||||
file_->content.data[pos_] == '_')) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto source = begin_source();
|
||||
auto start = pos_;
|
||||
|
||||
auto s = pos_;
|
||||
while (!is_eof() && is_alphanum_underscore(file_->content.data[pos_])) {
|
||||
pos_++;
|
||||
location_.column++;
|
||||
// This below assumes that the size of a single std::string element is 1 byte.
|
||||
static_assert(sizeof(file_->content.data[0]) == sizeof(uint8_t),
|
||||
"tint::reader::wgsl requires the size of a std::string element "
|
||||
"to be a single byte");
|
||||
|
||||
// Must begin with an XID_Source unicode character, or underscore
|
||||
{
|
||||
auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
|
||||
auto [code_point, n] =
|
||||
text::utf8::Decode(utf8, file_->content.data.size() - pos_);
|
||||
if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
|
||||
return {};
|
||||
}
|
||||
// Consume start codepoint
|
||||
pos_ += n;
|
||||
location_.column += n;
|
||||
}
|
||||
|
||||
if (file_->content.data[s] == '_') {
|
||||
while (!is_eof()) {
|
||||
// Must continue with an XID_Continue unicode character
|
||||
auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
|
||||
auto [code_point, n] =
|
||||
text::utf8::Decode(utf8, file_->content.data.size() - pos_);
|
||||
if (!code_point.IsXIDContinue()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Consume continuing codepoint
|
||||
pos_ += n;
|
||||
location_.column += n;
|
||||
}
|
||||
|
||||
if (file_->content.data[start] == '_') {
|
||||
// Check for an underscore on its own (special token), or a
|
||||
// double-underscore (not allowed).
|
||||
if ((pos_ == s + 1) || (file_->content.data[s + 1] == '_')) {
|
||||
location_.column -= (pos_ - s);
|
||||
pos_ = s;
|
||||
if ((pos_ == start + 1) || (file_->content.data[start + 1] == '_')) {
|
||||
location_.column -= (pos_ - start);
|
||||
pos_ = start;
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
auto str = file_->content.data_view.substr(s, pos_ - s);
|
||||
auto str = file_->content.data_view.substr(start, pos_ - start);
|
||||
end_source(source);
|
||||
|
||||
auto t = check_keyword(source, str);
|
||||
|
|
|
@ -76,25 +76,18 @@ class Lexer {
|
|||
/// it is not null.
|
||||
bool is_null() const;
|
||||
/// @param ch a character
|
||||
/// @returns true if 'ch' is an alphabetic character
|
||||
bool is_alpha(char ch) const;
|
||||
/// @param ch a character
|
||||
/// @returns true if 'ch' is a decimal digit
|
||||
bool is_digit(char ch) const;
|
||||
/// @param ch a character
|
||||
/// @returns true if 'ch' is a hexadecimal digit
|
||||
bool is_hex(char ch) const;
|
||||
/// @param ch a character
|
||||
/// @returns true if 'ch' is a digit, an alphabetic character,
|
||||
/// or an underscore.
|
||||
bool is_alphanum_underscore(char ch) const;
|
||||
bool matches(size_t pos, std::string_view substr);
|
||||
|
||||
/// The source file content
|
||||
Source::File const* const file_;
|
||||
/// The length of the input
|
||||
uint32_t len_ = 0;
|
||||
/// The current position within the input
|
||||
/// The current position in utf-8 code units (bytes) within the input
|
||||
uint32_t pos_ = 0;
|
||||
/// The current location within the input
|
||||
Source::Location location_;
|
||||
|
|
|
@ -315,8 +315,8 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
"2.5E+ 123",
|
||||
"2.5E- 123"));
|
||||
|
||||
using IdentifierTest = testing::TestWithParam<const char*>;
|
||||
TEST_P(IdentifierTest, Parse) {
|
||||
using AsciiIdentifierTest = testing::TestWithParam<const char*>;
|
||||
TEST_P(AsciiIdentifierTest, Parse) {
|
||||
Source::File file("", GetParam());
|
||||
Lexer l(&file);
|
||||
|
||||
|
@ -329,7 +329,7 @@ TEST_P(IdentifierTest, Parse) {
|
|||
EXPECT_EQ(t.to_str(), GetParam());
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(LexerTest,
|
||||
IdentifierTest,
|
||||
AsciiIdentifierTest,
|
||||
testing::Values("a",
|
||||
"test",
|
||||
"test01",
|
||||
|
@ -342,6 +342,57 @@ INSTANTIATE_TEST_SUITE_P(LexerTest,
|
|||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
"alldigits_0123456789"));
|
||||
|
||||
struct UnicodeCase {
|
||||
const char* utf8;
|
||||
size_t code_units;
|
||||
};
|
||||
|
||||
using UnicodeIdentifierTest = testing::TestWithParam<UnicodeCase>;
|
||||
TEST_P(UnicodeIdentifierTest, Parse) {
|
||||
Source::File file("", GetParam().utf8);
|
||||
Lexer l(&file);
|
||||
|
||||
auto t = l.next();
|
||||
EXPECT_TRUE(t.IsIdentifier());
|
||||
EXPECT_EQ(t.source().range.begin.line, 1u);
|
||||
EXPECT_EQ(t.source().range.begin.column, 1u);
|
||||
EXPECT_EQ(t.source().range.end.line, 1u);
|
||||
EXPECT_EQ(t.source().range.end.column, 1u + GetParam().code_units);
|
||||
EXPECT_EQ(t.to_str(), GetParam().utf8);
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
LexerTest,
|
||||
UnicodeIdentifierTest,
|
||||
testing::ValuesIn({
|
||||
UnicodeCase{// "𝐢𝐝𝐞𝐧𝐭𝐢𝐟𝐢𝐞𝐫"
|
||||
"\xf0\x9d\x90\xa2\xf0\x9d\x90\x9d\xf0\x9d\x90\x9e\xf0\x9d"
|
||||
"\x90\xa7\xf0\x9d\x90\xad\xf0\x9d\x90\xa2\xf0\x9d\x90\x9f"
|
||||
"\xf0\x9d\x90\xa2\xf0\x9d\x90\x9e\xf0\x9d\x90\xab",
|
||||
40},
|
||||
UnicodeCase{// "𝑖𝑑𝑒𝑛𝑡𝑖𝑓𝑖𝑒𝑟"
|
||||
"\xf0\x9d\x91\x96\xf0\x9d\x91\x91\xf0\x9d\x91\x92\xf0\x9d"
|
||||
"\x91\x9b\xf0\x9d\x91\xa1\xf0\x9d\x91\x96\xf0\x9d\x91\x93"
|
||||
"\xf0\x9d\x91\x96\xf0\x9d\x91\x92\xf0\x9d\x91\x9f",
|
||||
40},
|
||||
UnicodeCase{
|
||||
// "identifier"
|
||||
"\xef\xbd\x89\xef\xbd\x84\xef\xbd\x85\xef\xbd\x8e\xef\xbd\x94\xef"
|
||||
"\xbd\x89\xef\xbd\x86\xef\xbd\x89\xef\xbd\x85\xef\xbd\x92",
|
||||
30},
|
||||
UnicodeCase{// "𝕚𝕕𝕖𝕟𝕥𝕚𝕗𝕚𝕖𝕣𝟙𝟚𝟛"
|
||||
"\xf0\x9d\x95\x9a\xf0\x9d\x95\x95\xf0\x9d\x95\x96\xf0\x9d"
|
||||
"\x95\x9f\xf0\x9d\x95\xa5\xf0\x9d\x95\x9a\xf0\x9d\x95\x97"
|
||||
"\xf0\x9d\x95\x9a\xf0\x9d\x95\x96\xf0\x9d\x95\xa3\xf0\x9d"
|
||||
"\x9f\x99\xf0\x9d\x9f\x9a\xf0\x9d\x9f\x9b",
|
||||
52},
|
||||
UnicodeCase{
|
||||
// "𝖎𝖉𝖊𝖓𝖙𝖎𝖋𝖎𝖊𝖗123"
|
||||
"\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
|
||||
"\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
|
||||
"\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33",
|
||||
43},
|
||||
}));
|
||||
|
||||
TEST_F(LexerTest, IdentifierTest_SingleUnderscoreDoesNotMatch) {
|
||||
Source::File file("", "_");
|
||||
Lexer l(&file);
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "src/ast/workgroup_attribute.h"
|
||||
#include "src/reader/wgsl/parser_impl_test_helper.h"
|
||||
#include "src/utils/string.h"
|
||||
|
||||
namespace tint {
|
||||
namespace reader {
|
||||
|
@ -48,6 +49,51 @@ TEST_F(ParserImplTest, FunctionDecl) {
|
|||
EXPECT_TRUE(body->statements[0]->Is<ast::ReturnStatement>());
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, FunctionDecl_Unicode) {
|
||||
const std::string function_ident = // "𝗳𝘂𝗻𝗰𝘁𝗶𝗼𝗻"
|
||||
"\xf0\x9d\x97\xb3\xf0\x9d\x98\x82\xf0\x9d\x97\xbb\xf0\x9d\x97\xb0\xf0\x9d"
|
||||
"\x98\x81\xf0\x9d\x97\xb6\xf0\x9d\x97\xbc\xf0\x9d\x97\xbb";
|
||||
|
||||
const std::string param_a_ident = // "𝓹𝓪𝓻𝓪𝓶_𝓪"
|
||||
"\xf0\x9d\x93\xb9\xf0\x9d\x93\xaa\xf0\x9d\x93\xbb\xf0\x9d\x93\xaa\xf0\x9d"
|
||||
"\x93\xb6\x5f\xf0\x9d\x93\xaa";
|
||||
|
||||
const std::string param_b_ident = // "𝕡𝕒𝕣𝕒𝕞_𝕓"
|
||||
"\xf0\x9d\x95\xa1\xf0\x9d\x95\x92\xf0\x9d\x95\xa3\xf0\x9d\x95\x92\xf0\x9d"
|
||||
"\x95\x9e\x5f\xf0\x9d\x95\x93";
|
||||
|
||||
std::string src = "fn $function($param_a : i32, $param_b : f32) { return; }";
|
||||
src = utils::ReplaceAll(src, "$function", function_ident);
|
||||
src = utils::ReplaceAll(src, "$param_a", param_a_ident);
|
||||
src = utils::ReplaceAll(src, "$param_b", param_b_ident);
|
||||
|
||||
auto p = parser(src);
|
||||
auto attrs = p->attribute_list();
|
||||
EXPECT_FALSE(p->has_error()) << p->error();
|
||||
ASSERT_FALSE(attrs.errored);
|
||||
EXPECT_FALSE(attrs.matched);
|
||||
auto f = p->function_decl(attrs.value);
|
||||
EXPECT_FALSE(p->has_error()) << p->error();
|
||||
EXPECT_FALSE(f.errored);
|
||||
EXPECT_TRUE(f.matched);
|
||||
ASSERT_NE(f.value, nullptr);
|
||||
|
||||
EXPECT_EQ(f->symbol, p->builder().Symbols().Get(function_ident));
|
||||
ASSERT_NE(f->return_type, nullptr);
|
||||
EXPECT_TRUE(f->return_type->Is<ast::Void>());
|
||||
|
||||
ASSERT_EQ(f->params.size(), 2u);
|
||||
EXPECT_EQ(f->params[0]->symbol, p->builder().Symbols().Get(param_a_ident));
|
||||
EXPECT_EQ(f->params[1]->symbol, p->builder().Symbols().Get(param_b_ident));
|
||||
|
||||
ASSERT_NE(f->return_type, nullptr);
|
||||
EXPECT_TRUE(f->return_type->Is<ast::Void>());
|
||||
|
||||
auto* body = f->body;
|
||||
ASSERT_EQ(body->statements.size(), 1u);
|
||||
EXPECT_TRUE(body->statements[0]->Is<ast::ReturnStatement>());
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, FunctionDecl_AttributeList) {
|
||||
auto p = parser("@workgroup_size(2, 3, 4) fn main() { return; }");
|
||||
auto attrs = p->attribute_list();
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "src/ast/struct_block_attribute.h"
|
||||
#include "src/reader/wgsl/parser_impl_test_helper.h"
|
||||
#include "src/utils/string.h"
|
||||
|
||||
namespace tint {
|
||||
namespace reader {
|
||||
|
@ -42,6 +43,46 @@ struct S {
|
|||
EXPECT_EQ(s->members[1]->symbol, p->builder().Symbols().Register("b"));
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, StructDecl_Unicode_Parses) {
|
||||
const std::string struct_ident = // "𝓼𝓽𝓻𝓾𝓬𝓽𝓾𝓻𝓮"
|
||||
"\xf0\x9d\x93\xbc\xf0\x9d\x93\xbd\xf0\x9d\x93\xbb\xf0\x9d\x93\xbe\xf0\x9d"
|
||||
"\x93\xac\xf0\x9d\x93\xbd\xf0\x9d\x93\xbe\xf0\x9d\x93\xbb\xf0\x9d\x93"
|
||||
"\xae";
|
||||
const std::string member_a_ident = // "𝕞𝕖𝕞𝕓𝕖𝕣_𝕒"
|
||||
"\xf0\x9d\x95\x9e\xf0\x9d\x95\x96\xf0\x9d\x95\x9e\xf0\x9d\x95\x93\xf0\x9d"
|
||||
"\x95\x96\xf0\x9d\x95\xa3\x5f\xf0\x9d\x95\x92";
|
||||
const std::string member_b_ident = // "𝔪𝔢𝔪𝔟𝔢𝔯_𝔟"
|
||||
"\xf0\x9d\x94\xaa\xf0\x9d\x94\xa2\xf0\x9d\x94\xaa\xf0\x9d\x94\x9f\xf0\x9d"
|
||||
"\x94\xa2\xf0\x9d\x94\xaf\x5f\xf0\x9d\x94\x9f";
|
||||
|
||||
std::string src = R"(
|
||||
struct $struct {
|
||||
$member_a : i32;
|
||||
$member_b : f32;
|
||||
})";
|
||||
src = utils::ReplaceAll(src, "$struct", struct_ident);
|
||||
src = utils::ReplaceAll(src, "$member_a", member_a_ident);
|
||||
src = utils::ReplaceAll(src, "$member_b", member_b_ident);
|
||||
|
||||
auto p = parser(src);
|
||||
auto attrs = p->attribute_list();
|
||||
EXPECT_FALSE(attrs.errored);
|
||||
EXPECT_FALSE(attrs.matched);
|
||||
ASSERT_EQ(attrs.value.size(), 0u);
|
||||
|
||||
auto s = p->struct_decl(attrs.value);
|
||||
EXPECT_FALSE(p->has_error());
|
||||
EXPECT_FALSE(s.errored);
|
||||
EXPECT_TRUE(s.matched);
|
||||
ASSERT_NE(s.value, nullptr);
|
||||
ASSERT_EQ(s->name, p->builder().Symbols().Register(struct_ident));
|
||||
ASSERT_EQ(s->members.size(), 2u);
|
||||
EXPECT_EQ(s->members[0]->symbol,
|
||||
p->builder().Symbols().Register(member_a_ident));
|
||||
EXPECT_EQ(s->members[1]->symbol,
|
||||
p->builder().Symbols().Register(member_b_ident));
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, StructDecl_ParsesWithAttribute) {
|
||||
auto p = parser(R"(
|
||||
[[block]] struct B {
|
||||
|
|
|
@ -34,7 +34,7 @@ TEST_F(ParserImplTest, TypeDecl_ParsesType) {
|
|||
EXPECT_EQ(t.value->source.range, (Source::Range{{1u, 1u}, {1u, 13u}}));
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, TypeDecl_ParsesStruct_Ident) {
|
||||
TEST_F(ParserImplTest, TypeDecl_Parses_Ident) {
|
||||
auto p = parser("type a = B");
|
||||
|
||||
auto t = p->type_alias();
|
||||
|
@ -49,6 +49,25 @@ TEST_F(ParserImplTest, TypeDecl_ParsesStruct_Ident) {
|
|||
EXPECT_EQ(alias->source.range, (Source::Range{{1u, 1u}, {1u, 11u}}));
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, TypeDecl_Unicode_Parses_Ident) {
|
||||
const std::string ident = // "𝓶𝔂_𝓽𝔂𝓹𝓮"
|
||||
"\xf0\x9d\x93\xb6\xf0\x9d\x94\x82\x5f\xf0\x9d\x93\xbd\xf0\x9d\x94\x82\xf0"
|
||||
"\x9d\x93\xb9\xf0\x9d\x93\xae";
|
||||
|
||||
auto p = parser("type " + ident + " = i32");
|
||||
|
||||
auto t = p->type_alias();
|
||||
EXPECT_FALSE(p->has_error());
|
||||
EXPECT_FALSE(t.errored);
|
||||
EXPECT_TRUE(t.matched);
|
||||
ASSERT_NE(t.value, nullptr);
|
||||
ASSERT_TRUE(t.value->Is<ast::Alias>());
|
||||
auto* alias = t.value->As<ast::Alias>();
|
||||
EXPECT_EQ(p->builder().Symbols().NameFor(alias->name), ident);
|
||||
EXPECT_TRUE(alias->type->Is<ast::I32>());
|
||||
EXPECT_EQ(alias->source.range, (Source::Range{{1u, 1u}, {1u, 37u}}));
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, TypeDecl_MissingIdent) {
|
||||
auto p = parser("type = i32");
|
||||
auto t = p->type_alias();
|
||||
|
|
|
@ -18,7 +18,6 @@ namespace tint {
|
|||
namespace reader {
|
||||
namespace wgsl {
|
||||
namespace {
|
||||
|
||||
TEST_F(ParserImplTest, VariableDecl_Parses) {
|
||||
auto p = parser("var my_var : f32");
|
||||
auto v = p->variable_decl();
|
||||
|
@ -33,6 +32,25 @@ TEST_F(ParserImplTest, VariableDecl_Parses) {
|
|||
EXPECT_EQ(v->type->source.range, (Source::Range{{1u, 14u}, {1u, 17u}}));
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, VariableDecl_Unicode_Parses) {
|
||||
const std::string ident = // "𝖎𝖉𝖊𝖓𝖙𝖎𝖋𝖎𝖊𝖗123"
|
||||
"\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
|
||||
"\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
|
||||
"\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33";
|
||||
|
||||
auto p = parser("var " + ident + " : f32");
|
||||
auto v = p->variable_decl();
|
||||
EXPECT_FALSE(p->has_error());
|
||||
EXPECT_TRUE(v.matched);
|
||||
EXPECT_FALSE(v.errored);
|
||||
EXPECT_EQ(v->name, ident);
|
||||
EXPECT_NE(v->type, nullptr);
|
||||
EXPECT_TRUE(v->type->Is<ast::F32>());
|
||||
|
||||
EXPECT_EQ(v->source.range, (Source::Range{{1u, 5u}, {1u, 48u}}));
|
||||
EXPECT_EQ(v->type->source.range, (Source::Range{{1u, 51u}, {1u, 54u}}));
|
||||
}
|
||||
|
||||
TEST_F(ParserImplTest, VariableDecl_Inferred_Parses) {
|
||||
auto p = parser("var my_var = 1.0");
|
||||
auto v = p->variable_decl(/*allow_inferred = */ true);
|
||||
|
|
|
@ -27,7 +27,7 @@ namespace tint {
|
|||
/// Source describes a range of characters within a source file.
|
||||
class Source {
|
||||
public:
|
||||
/// FileContent describes the content of a source file.
|
||||
/// FileContent describes the content of a source file encoded using utf-8.
|
||||
class FileContent {
|
||||
public:
|
||||
/// Constructs the FileContent with the given file content.
|
||||
|
@ -78,7 +78,8 @@ class Source {
|
|||
public:
|
||||
/// the 1-based line number. 0 represents no line information.
|
||||
size_t line = 0;
|
||||
/// the 1-based column number. 0 represents no column information.
|
||||
/// the 1-based column number in utf8-code units (bytes).
|
||||
/// 0 represents no column information.
|
||||
size_t column = 0;
|
||||
|
||||
/// Returns true of `this` location is lexicographically less than `rhs`
|
||||
|
|
|
@ -427,7 +427,9 @@ std::ostream& operator<<(std::ostream& out, CodePoint code_point) {
|
|||
return out << "'U+" << std::hex << code_point.value << "'";
|
||||
}
|
||||
|
||||
std::pair<CodePoint, size_t> utf8::Decode(const uint8_t* ptr, size_t len) {
|
||||
namespace utf8 {
|
||||
|
||||
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len) {
|
||||
if (len < 1) {
|
||||
return {};
|
||||
}
|
||||
|
@ -490,4 +492,15 @@ std::pair<CodePoint, size_t> utf8::Decode(const uint8_t* ptr, size_t len) {
|
|||
return {c, n};
|
||||
}
|
||||
|
||||
bool IsASCII(std::string_view str) {
|
||||
for (auto c : str) {
|
||||
if (c & 0x80) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
} // namespace tint::text
|
||||
|
|
|
@ -69,6 +69,10 @@ namespace utf8 {
|
|||
/// If the next code point cannot be decoded then returns [0,0].
|
||||
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len);
|
||||
|
||||
/// @returns true if all the utf-8 code points in the string are ASCII
|
||||
/// (code-points 0x00..0x7f).
|
||||
bool IsASCII(std::string_view);
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
} // namespace tint::text
|
||||
|
|
|
@ -56,7 +56,8 @@ Output Glsl::Run(const Program* in, const DataMap& inputs) const {
|
|||
data.Add<SingleEntryPoint::Config>(cfg->entry_point);
|
||||
}
|
||||
manager.Add<Renamer>();
|
||||
data.Add<Renamer::Config>(Renamer::Target::kGlslKeywords);
|
||||
data.Add<Renamer::Config>(Renamer::Target::kGlslKeywords,
|
||||
/* preserve_unicode */ false);
|
||||
manager.Add<Unshadow>();
|
||||
|
||||
// Attempt to convert `loop`s into for-loops. This is to try and massage the
|
||||
|
|
|
@ -21,13 +21,13 @@
|
|||
#include "src/program_builder.h"
|
||||
#include "src/sem/call.h"
|
||||
#include "src/sem/member_accessor_expression.h"
|
||||
#include "src/text/unicode.h"
|
||||
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer);
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data);
|
||||
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Config);
|
||||
|
||||
namespace tint {
|
||||
namespace transform {
|
||||
namespace tint::transform {
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -1245,7 +1245,7 @@ Renamer::Data::Data(Remappings&& r) : remappings(std::move(r)) {}
|
|||
Renamer::Data::Data(const Data&) = default;
|
||||
Renamer::Data::~Data() = default;
|
||||
|
||||
Renamer::Config::Config(Target t) : target(t) {}
|
||||
Renamer::Config::Config(Target t, bool pu) : target(t), preserve_unicode(pu) {}
|
||||
Renamer::Config::Config(const Config&) = default;
|
||||
Renamer::Config::~Config() = default;
|
||||
|
||||
|
@ -1293,48 +1293,52 @@ Output Renamer::Run(const Program* in, const DataMap& inputs) const {
|
|||
Data::Remappings remappings;
|
||||
|
||||
Target target = Target::kAll;
|
||||
bool preserve_unicode = false;
|
||||
|
||||
if (auto* cfg = inputs.Get<Config>()) {
|
||||
target = cfg->target;
|
||||
preserve_unicode = cfg->preserve_unicode;
|
||||
}
|
||||
|
||||
ctx.ReplaceAll([&](Symbol sym_in) {
|
||||
auto name_in = ctx.src->Symbols().NameFor(sym_in);
|
||||
switch (target) {
|
||||
case Target::kAll:
|
||||
// Always rename.
|
||||
break;
|
||||
case Target::kGlslKeywords:
|
||||
if (!std::binary_search(
|
||||
kReservedKeywordsGLSL,
|
||||
kReservedKeywordsGLSL +
|
||||
sizeof(kReservedKeywordsGLSL) / sizeof(const char*),
|
||||
name_in) &&
|
||||
name_in.compare(0, 3, "gl_")) {
|
||||
// No match, just reuse the original name.
|
||||
return ctx.dst->Symbols().New(name_in);
|
||||
}
|
||||
break;
|
||||
case Target::kHlslKeywords:
|
||||
if (!std::binary_search(
|
||||
kReservedKeywordsHLSL,
|
||||
kReservedKeywordsHLSL +
|
||||
sizeof(kReservedKeywordsHLSL) / sizeof(const char*),
|
||||
name_in)) {
|
||||
// No match, just reuse the original name.
|
||||
return ctx.dst->Symbols().New(name_in);
|
||||
}
|
||||
break;
|
||||
case Target::kMslKeywords:
|
||||
if (!std::binary_search(
|
||||
kReservedKeywordsMSL,
|
||||
kReservedKeywordsMSL +
|
||||
sizeof(kReservedKeywordsMSL) / sizeof(const char*),
|
||||
name_in)) {
|
||||
// No match, just reuse the original name.
|
||||
return ctx.dst->Symbols().New(name_in);
|
||||
}
|
||||
break;
|
||||
if (preserve_unicode || text::utf8::IsASCII(name_in)) {
|
||||
switch (target) {
|
||||
case Target::kAll:
|
||||
// Always rename.
|
||||
break;
|
||||
case Target::kGlslKeywords:
|
||||
if (!std::binary_search(
|
||||
kReservedKeywordsGLSL,
|
||||
kReservedKeywordsGLSL +
|
||||
sizeof(kReservedKeywordsGLSL) / sizeof(const char*),
|
||||
name_in) &&
|
||||
name_in.compare(0, 3, "gl_")) {
|
||||
// No match, just reuse the original name.
|
||||
return ctx.dst->Symbols().New(name_in);
|
||||
}
|
||||
break;
|
||||
case Target::kHlslKeywords:
|
||||
if (!std::binary_search(
|
||||
kReservedKeywordsHLSL,
|
||||
kReservedKeywordsHLSL +
|
||||
sizeof(kReservedKeywordsHLSL) / sizeof(const char*),
|
||||
name_in)) {
|
||||
// No match, just reuse the original name.
|
||||
return ctx.dst->Symbols().New(name_in);
|
||||
}
|
||||
break;
|
||||
case Target::kMslKeywords:
|
||||
if (!std::binary_search(
|
||||
kReservedKeywordsMSL,
|
||||
kReservedKeywordsMSL +
|
||||
sizeof(kReservedKeywordsMSL) / sizeof(const char*),
|
||||
name_in)) {
|
||||
// No match, just reuse the original name.
|
||||
return ctx.dst->Symbols().New(name_in);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto sym_out = ctx.dst->Sym();
|
||||
|
@ -1359,5 +1363,4 @@ Output Renamer::Run(const Program* in, const DataMap& inputs) const {
|
|||
std::make_unique<Data>(std::move(remappings)));
|
||||
}
|
||||
|
||||
} // namespace transform
|
||||
} // namespace tint
|
||||
} // namespace tint::transform
|
||||
|
|
|
@ -20,8 +20,7 @@
|
|||
|
||||
#include "src/transform/transform.h"
|
||||
|
||||
namespace tint {
|
||||
namespace transform {
|
||||
namespace tint::transform {
|
||||
|
||||
/// Renamer is a Transform that renames all the symbols in a program.
|
||||
class Renamer : public Castable<Renamer, Transform> {
|
||||
|
@ -63,7 +62,9 @@ class Renamer : public Castable<Renamer, Transform> {
|
|||
struct Config : public Castable<Config, transform::Data> {
|
||||
/// Constructor
|
||||
/// @param tgt the targets to rename
|
||||
explicit Config(Target tgt);
|
||||
/// @param keep_unicode if false, symbols with non-ascii code-points are
|
||||
/// renamed
|
||||
explicit Config(Target tgt, bool keep_unicode = false);
|
||||
|
||||
/// Copy constructor
|
||||
Config(const Config&);
|
||||
|
@ -73,6 +74,9 @@ class Renamer : public Castable<Renamer, Transform> {
|
|||
|
||||
/// The targets to rename
|
||||
Target const target = Target::kAll;
|
||||
|
||||
/// If false, symbols with non-ascii code-points are renamed.
|
||||
bool preserve_unicode = false;
|
||||
};
|
||||
|
||||
/// Constructor using a the configuration provided in the input Data
|
||||
|
@ -88,7 +92,6 @@ class Renamer : public Castable<Renamer, Transform> {
|
|||
Output Run(const Program* program, const DataMap& data = {}) const override;
|
||||
};
|
||||
|
||||
} // namespace transform
|
||||
} // namespace tint
|
||||
} // namespace tint::transform
|
||||
|
||||
#endif // SRC_TRANSFORM_RENAMER_H_
|
||||
|
|
|
@ -23,6 +23,11 @@ namespace tint {
|
|||
namespace transform {
|
||||
namespace {
|
||||
|
||||
constexpr const char kUnicodeIdentifier[] = // "𝖎𝖉𝖊𝖓𝖙𝖎𝖋𝖎𝖊𝖗123"
|
||||
"\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
|
||||
"\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
|
||||
"\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33";
|
||||
|
||||
using ::testing::ContainerEq;
|
||||
|
||||
using RenamerTest = TransformTest;
|
||||
|
@ -184,6 +189,25 @@ fn tint_symbol() {
|
|||
EXPECT_THAT(data->remappings, ContainerEq(expected_remappings));
|
||||
}
|
||||
|
||||
TEST_F(RenamerTest, PreserveUnicode) {
|
||||
auto src = R"(
|
||||
@stage(fragment)
|
||||
fn frag_main() {
|
||||
var )" + std::string(kUnicodeIdentifier) +
|
||||
R"( : i32;
|
||||
}
|
||||
)";
|
||||
|
||||
auto expect = src;
|
||||
|
||||
DataMap inputs;
|
||||
inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords,
|
||||
/* preserve_unicode */ true);
|
||||
auto got = Run<Renamer>(src, inputs);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
}
|
||||
|
||||
TEST_F(RenamerTest, AttemptSymbolCollision) {
|
||||
auto* src = R"(
|
||||
@stage(vertex)
|
||||
|
@ -244,7 +268,8 @@ fn frag_main() {
|
|||
)";
|
||||
|
||||
DataMap inputs;
|
||||
inputs.Add<Renamer::Config>(Renamer::Target::kGlslKeywords);
|
||||
inputs.Add<Renamer::Config>(Renamer::Target::kGlslKeywords,
|
||||
/* preserve_unicode */ false);
|
||||
auto got = Run<Renamer>(src, inputs);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
|
@ -269,7 +294,8 @@ fn frag_main() {
|
|||
)";
|
||||
|
||||
DataMap inputs;
|
||||
inputs.Add<Renamer::Config>(Renamer::Target::kHlslKeywords);
|
||||
inputs.Add<Renamer::Config>(Renamer::Target::kHlslKeywords,
|
||||
/* preserve_unicode */ false);
|
||||
auto got = Run<Renamer>(src, inputs);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
|
@ -294,7 +320,8 @@ fn frag_main() {
|
|||
)";
|
||||
|
||||
DataMap inputs;
|
||||
inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords);
|
||||
inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords,
|
||||
/* preserve_unicode */ false);
|
||||
auto got = Run<Renamer>(src, inputs);
|
||||
|
||||
EXPECT_EQ(expect, str(got));
|
||||
|
@ -528,7 +555,8 @@ INSTANTIATE_TEST_SUITE_P(RenamerTestGlsl,
|
|||
// "void", // WGSL keyword
|
||||
"volatile",
|
||||
// "while", // WGSL keyword
|
||||
"writeonly"));
|
||||
"writeonly",
|
||||
kUnicodeIdentifier));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(RenamerTestHlsl,
|
||||
RenamerTestHlsl,
|
||||
|
@ -1142,8 +1170,9 @@ INSTANTIATE_TEST_SUITE_P(RenamerTestHlsl,
|
|||
"vertexshader",
|
||||
"virtual",
|
||||
// "void", // WGSL keyword
|
||||
"volatile"));
|
||||
// "while" // WGSL reserved keyword
|
||||
"volatile",
|
||||
// "while" // WGSL reserved keyword
|
||||
kUnicodeIdentifier));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
RenamerTestMsl,
|
||||
|
@ -1425,7 +1454,9 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
"M_2_PI_H",
|
||||
"M_2_SQRTPI_H",
|
||||
"M_SQRT2_H",
|
||||
"M_SQRT1_2_H"));
|
||||
"M_SQRT1_2_H",
|
||||
// "while" // WGSL reserved keyword
|
||||
kUnicodeIdentifier));
|
||||
|
||||
} // namespace
|
||||
} // namespace transform
|
||||
|
|
|
@ -89,7 +89,8 @@ class TestHelperBase : public BODY, public ProgramBuilder {
|
|||
transform::Manager transform_manager;
|
||||
transform::DataMap transform_data;
|
||||
transform_data.Add<transform::Renamer::Config>(
|
||||
transform::Renamer::Target::kHlslKeywords);
|
||||
transform::Renamer::Target::kHlslKeywords,
|
||||
/* preserve_unicode */ true);
|
||||
transform_manager.Add<tint::transform::Renamer>();
|
||||
auto result =
|
||||
transform_manager.Run(&sanitized_result.program, transform_data);
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
// Øⓑꚫ쁹Ǵ𐌒岾🥍ⴵ㍨又ᮗ
|
||||
|
||||
/*
|
||||
/* 👋🌎 */
|
||||
नमस्ते दुनिया
|
||||
*/
|
||||
|
||||
@stage(fragment)
|
||||
fn /* こんにちは世界 */ main( /* 你好世界 */ ) {
|
||||
// 안녕하세요 세계
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
#version 310 es
|
||||
precision mediump float;
|
||||
|
||||
void tint_symbol() {
|
||||
}
|
||||
|
||||
void main() {
|
||||
tint_symbol();
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
void main() {
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
fragment void tint_symbol() {
|
||||
return;
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 5
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main"
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpName %main "main"
|
||||
%void = OpTypeVoid
|
||||
%1 = OpTypeFunction %void
|
||||
%main = OpFunction %void None %1
|
||||
%4 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
|
@ -0,0 +1,3 @@
|
|||
@stage(fragment)
|
||||
fn main() {
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
type 𝓉𝓎𝓅ℯ_𝒶 = i32;
|
||||
type 𝐭𝐲𝐩𝐞_𝐛 = f32;
|
||||
|
||||
fn 𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷(ᵖᵃʳᵃᵐ : 𝓉𝓎𝓅ℯ_𝒶) -> 𝐭𝐲𝐩𝐞_𝐛 {
|
||||
return 𝐭𝐲𝐩𝐞_𝐛(ᵖᵃʳᵃᵐ);
|
||||
}
|
||||
|
||||
@stage(fragment)
|
||||
fn 𝕖𝕟𝕥𝕣𝕪𝕡𝕠𝕚𝕟𝕥() {
|
||||
var 𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚 : 𝓉𝓎𝓅ℯ_𝒶;
|
||||
let 𝖗𝖊𝖘𝖚𝖑𝖙 = 𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷(𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚);
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
#version 310 es
|
||||
precision mediump float;
|
||||
|
||||
float tint_symbol_2(int tint_symbol_3) {
|
||||
return float(tint_symbol_3);
|
||||
}
|
||||
|
||||
void tint_symbol_4() {
|
||||
int tint_symbol_5 = 0;
|
||||
float tint_symbol_6 = tint_symbol_2(tint_symbol_5);
|
||||
}
|
||||
|
||||
void main() {
|
||||
tint_symbol_4();
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
float tint_symbol_2(int tint_symbol_3) {
|
||||
return float(tint_symbol_3);
|
||||
}
|
||||
|
||||
void tint_symbol_4() {
|
||||
int tint_symbol_5 = 0;
|
||||
const float tint_symbol_6 = tint_symbol_2(tint_symbol_5);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
float tint_symbol_2(int tint_symbol_3) {
|
||||
return float(tint_symbol_3);
|
||||
}
|
||||
|
||||
fragment void tint_symbol_4() {
|
||||
int tint_symbol_5 = 0;
|
||||
float const tint_symbol_6 = tint_symbol_2(tint_symbol_5);
|
||||
return;
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
; SPIR-V
|
||||
; Version: 1.3
|
||||
; Generator: Google Tint Compiler; 0
|
||||
; Bound: 17
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %________________________________________ "𝕖𝕟𝕥𝕣𝕪𝕡𝕠𝕚𝕟𝕥"
|
||||
OpExecutionMode %________________________________________ OriginUpperLeft
|
||||
OpName %________________________________ "𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷"
|
||||
OpName %______________ "ᵖᵃʳᵃᵐ"
|
||||
OpName %________________________________________ "𝕖𝕟𝕥𝕣𝕪𝕡𝕠𝕚𝕟𝕥"
|
||||
OpName %_________________________________0 "𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚"
|
||||
%float = OpTypeFloat 32
|
||||
%int = OpTypeInt 32 1
|
||||
%1 = OpTypeFunction %float %int
|
||||
%void = OpTypeVoid
|
||||
%8 = OpTypeFunction %void
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%14 = OpConstantNull %int
|
||||
%________________________________ = OpFunction %float None %1
|
||||
%______________ = OpFunctionParameter %int
|
||||
%6 = OpLabel
|
||||
%7 = OpConvertSToF %float %______________
|
||||
OpReturnValue %7
|
||||
OpFunctionEnd
|
||||
%________________________________________ = OpFunction %void None %8
|
||||
%11 = OpLabel
|
||||
%_________________________________0 = OpVariable %_ptr_Function_int Function %14
|
||||
%16 = OpLoad %int %_________________________________0
|
||||
%15 = OpFunctionCall %float %________________________________ %16
|
||||
OpReturn
|
||||
OpFunctionEnd
|
|
@ -0,0 +1,13 @@
|
|||
type 𝓉𝓎𝓅ℯ_𝒶 = i32;
|
||||
|
||||
type 𝐭𝐲𝐩𝐞_𝐛 = f32;
|
||||
|
||||
fn 𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷(ᵖᵃʳᵃᵐ : 𝓉𝓎𝓅ℯ_𝒶) -> 𝐭𝐲𝐩𝐞_𝐛 {
|
||||
return 𝐭𝐲𝐩𝐞_𝐛(ᵖᵃʳᵃᵐ);
|
||||
}
|
||||
|
||||
@stage(fragment)
|
||||
fn 𝕖𝕟𝕥𝕣𝕪𝕡𝕠𝕚𝕟𝕥() {
|
||||
var 𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚 : 𝓉𝓎𝓅ℯ_𝒶;
|
||||
let 𝖗𝖊𝖘𝖚𝖑𝖙 = 𝓯𝓾𝓷𝓬𝓽𝓲𝓸𝓷(𝙫𝙖𝙧𝙞𝙖𝙗𝙡𝙚);
|
||||
}
|
Loading…
Reference in New Issue