dawn-cmake/src/tint/reader/wgsl/lexer_test.cc
Ben Clayton c98d57d662 tint: Rename 'static_assert' to 'const_assert'
Bug: tint:1807
Change-Id: I2c2a205ada01ad14d0bf6620a3dc3ec84dd7ee67
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/117212
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Dan Sinclair <dsinclair@chromium.org>
Commit-Queue: Ben Clayton <bclayton@google.com>
2023-01-24 14:59:43 +00:00

1129 lines
46 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2020 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/tint/reader/wgsl/lexer.h"
#include <limits>
#include <tuple>
#include <vector>
#include "gtest/gtest.h"
#include "src/tint/number.h"
namespace tint::reader::wgsl {
namespace {
using LexerTest = testing::Test;
// Blankspace constants. These are macros on purpose to be able to easily build
// up string literals with them.
//
// Same line code points
#define kSpace " "
#define kHTab "\t"
#define kL2R "\xE2\x80\x8E"
#define kR2L "\xE2\x80\x8F"
// Line break code points
#define kCR "\r"
#define kLF "\n"
#define kVTab "\x0B"
#define kFF "\x0C"
#define kNL "\xC2\x85"
#define kLS "\xE2\x80\xA8"
#define kPS "\xE2\x80\xA9"
TEST_F(LexerTest, Empty) {
Source::File file("", "");
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(1u, list.size());
EXPECT_TRUE(list[0].IsEof());
}
TEST_F(LexerTest, Skips_Blankspace_Basic) {
Source::File file("", "\t\r\n\t ident\t\n\t \r ");
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(2u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 2u);
EXPECT_EQ(t.source().range.begin.column, 6u);
EXPECT_EQ(t.source().range.end.line, 2u);
EXPECT_EQ(t.source().range.end.column, 11u);
EXPECT_EQ(t.to_str(), "ident");
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsEof());
}
}
TEST_F(LexerTest, Skips_Blankspace_Exotic) {
Source::File file("", //
kVTab kFF kNL kLS kPS kL2R kR2L //
"ident" //
kVTab kFF kNL kLS kPS kL2R kR2L);
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(2u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 6u);
EXPECT_EQ(t.source().range.begin.column, 7u);
EXPECT_EQ(t.source().range.end.line, 6u);
EXPECT_EQ(t.source().range.end.column, 12u);
EXPECT_EQ(t.to_str(), "ident");
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsEof());
}
}
TEST_F(LexerTest, Skips_Comments_Line) {
Source::File file("", R"(//starts with comment
ident1 //ends with comment
// blank line
ident2)");
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(3u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 2u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 2u);
EXPECT_EQ(t.source().range.end.column, 7u);
EXPECT_EQ(t.to_str(), "ident1");
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 4u);
EXPECT_EQ(t.source().range.begin.column, 2u);
EXPECT_EQ(t.source().range.end.line, 4u);
EXPECT_EQ(t.source().range.end.column, 8u);
EXPECT_EQ(t.to_str(), "ident2");
}
{
auto& t = list[2];
EXPECT_TRUE(t.IsEof());
}
}
TEST_F(LexerTest, Skips_Comments_Unicode) {
Source::File file("", R"(// starts with 🙂🙂🙂
ident1 //ends with 🙂🙂🙂
// blank line
ident2)");
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(3u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 2u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 2u);
EXPECT_EQ(t.source().range.end.column, 7u);
EXPECT_EQ(t.to_str(), "ident1");
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 4u);
EXPECT_EQ(t.source().range.begin.column, 2u);
EXPECT_EQ(t.source().range.end.line, 4u);
EXPECT_EQ(t.source().range.end.column, 8u);
EXPECT_EQ(t.to_str(), "ident2");
}
{
auto& t = list[2];
EXPECT_TRUE(t.IsEof());
}
}
using LineCommentTerminatorTest = testing::TestWithParam<const char*>;
TEST_P(LineCommentTerminatorTest, Terminators) {
// Test that line comments are ended by blankspace characters other than
// space, horizontal tab, left-to-right mark, and right-to-left mark.
auto* c = GetParam();
std::string src = "const// This is a comment";
src += c;
src += "ident";
Source::File file("", src);
Lexer l(&file);
auto is_same_line = [](std::string_view v) {
return v == kSpace || v == kHTab || v == kL2R || v == kR2L;
};
auto list = l.Lex();
ASSERT_EQ(is_same_line(c) ? 2u : 3u, list.size());
size_t idx = 0;
{
auto& t = list[idx++];
EXPECT_TRUE(t.Is(Token::Type::kConst));
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 6u);
}
if (!is_same_line(c)) {
size_t line = is_same_line(c) ? 1u : 2u;
size_t col = is_same_line(c) ? 25u : 1u;
auto& t = list[idx++];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, line);
EXPECT_EQ(t.source().range.begin.column, col);
EXPECT_EQ(t.source().range.end.line, line);
EXPECT_EQ(t.source().range.end.column, col + 5);
EXPECT_EQ(t.to_str(), "ident");
}
{
auto& t = list[idx];
EXPECT_TRUE(t.IsEof());
}
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
LineCommentTerminatorTest,
testing::Values(
// same line
kSpace,
kHTab,
kCR,
kL2R,
kR2L,
// line break
kLF,
kVTab,
kFF,
kNL,
kLS,
kPS));
TEST_F(LexerTest, Skips_Comments_Block) {
Source::File file("", R"(/* comment
text */ident)");
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(2u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 2u);
EXPECT_EQ(t.source().range.begin.column, 8u);
EXPECT_EQ(t.source().range.end.line, 2u);
EXPECT_EQ(t.source().range.end.column, 13u);
EXPECT_EQ(t.to_str(), "ident");
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsEof());
}
}
TEST_F(LexerTest, Skips_Comments_Block_Nested) {
Source::File file("", R"(/* comment
text // nested line comments are ignored /* more text
/////**/ */*/ident)");
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(2u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 3u);
EXPECT_EQ(t.source().range.begin.column, 14u);
EXPECT_EQ(t.source().range.end.line, 3u);
EXPECT_EQ(t.source().range.end.column, 19u);
EXPECT_EQ(t.to_str(), "ident");
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsEof());
}
}
TEST_F(LexerTest, Skips_Comments_Block_Unterminated) {
// I had to break up the /* because otherwise the clang readability check
// errored out saying it could not find the end of a multi-line comment.
Source::File file("", R"(
/)"
R"(*
abcd)");
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(1u, list.size());
auto& t = list[0];
ASSERT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(), "unterminated block comment");
EXPECT_EQ(t.source().range.begin.line, 2u);
EXPECT_EQ(t.source().range.begin.column, 3u);
EXPECT_EQ(t.source().range.end.line, 2u);
EXPECT_EQ(t.source().range.end.column, 4u);
}
TEST_F(LexerTest, Null_InBlankspace_IsError) {
Source::File file("", std::string{' ', 0, ' '});
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(1u, list.size());
auto& t = list[0];
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 2u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 2u);
EXPECT_EQ(t.to_str(), "null character found");
}
TEST_F(LexerTest, Null_InLineComment_IsError) {
Source::File file("", std::string{'/', '/', ' ', 0, ' '});
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(1u, list.size());
auto& t = list[0];
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 4u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 4u);
EXPECT_EQ(t.to_str(), "null character found");
}
TEST_F(LexerTest, Null_InBlockComment_IsError) {
Source::File file("", std::string{'/', '*', ' ', 0, '*', '/'});
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(1u, list.size());
auto& t = list[0];
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 4u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 4u);
EXPECT_EQ(t.to_str(), "null character found");
}
TEST_F(LexerTest, Null_InIdentifier_IsError) {
// Try inserting a null in an identifier. Other valid token
// kinds will behave similarly, so use the identifier case
// as a representative.
Source::File file("", std::string{'a', 0, 'c'});
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(2u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.to_str(), "a");
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 2u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 2u);
EXPECT_EQ(t.to_str(), "null character found");
}
}
struct FloatData {
const char* input;
double result;
};
inline std::ostream& operator<<(std::ostream& out, FloatData data) {
out << std::string(data.input);
return out;
}
using FloatTest = testing::TestWithParam<FloatData>;
TEST_P(FloatTest, Parse) {
auto params = GetParam();
Source::File file("", params.input);
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(2u, list.size());
{
auto& t = list[0];
if (std::string(params.input).back() == 'f') {
EXPECT_TRUE(t.Is(Token::Type::kFloatLiteral_F));
} else if (std::string(params.input).back() == 'h') {
EXPECT_TRUE(t.Is(Token::Type::kFloatLiteral_H));
} else {
EXPECT_TRUE(t.Is(Token::Type::kFloatLiteral));
}
EXPECT_EQ(t.to_f64(), params.result);
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + strlen(params.input));
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsEof());
}
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
FloatTest,
testing::Values(
// No decimal, with 'f' suffix
FloatData{"0f", 0.0},
FloatData{"1f", 1.0},
// No decimal, with 'h' suffix
FloatData{"0h", 0.0},
FloatData{"1h", 1.0},
// Zero, with decimal.
FloatData{"0.0", 0.0},
FloatData{"0.", 0.0},
FloatData{".0", 0.0},
// Zero, with decimal and 'f' suffix
FloatData{"0.0f", 0.0},
FloatData{"0.f", 0.0},
FloatData{".0f", 0.0},
// Zero, with decimal and 'h' suffix
FloatData{"0.0h", 0.0},
FloatData{"0.h", 0.0},
FloatData{".0h", 0.0},
// Non-zero with decimal
FloatData{"5.7", 5.7},
FloatData{"5.", 5.},
FloatData{".7", .7},
// Non-zero with decimal and 'f' suffix
FloatData{"5.7f", static_cast<double>(5.7f)},
FloatData{"5.f", static_cast<double>(5.f)},
FloatData{".7f", static_cast<double>(.7f)},
// Non-zero with decimal and 'h' suffix
FloatData{"5.7h", static_cast<double>(f16::Quantize(5.7f))},
FloatData{"5.h", static_cast<double>(f16::Quantize(5.f))},
FloatData{".7h", static_cast<double>(f16::Quantize(.7f))},
// No decimal, with exponent
FloatData{"1e5", 1e5},
FloatData{"1E5", 1e5},
FloatData{"1e-5", 1e-5},
FloatData{"1E-5", 1e-5},
// No decimal, with exponent and 'f' suffix
FloatData{"1e5f", static_cast<double>(1e5f)},
FloatData{"1E5f", static_cast<double>(1e5f)},
FloatData{"1e-5f", static_cast<double>(1e-5f)},
FloatData{"1E-5f", static_cast<double>(1e-5f)},
// No decimal, with exponent and 'h' suffix
FloatData{"6e4h", static_cast<double>(f16::Quantize(6e4f))},
FloatData{"6E4h", static_cast<double>(f16::Quantize(6e4f))},
FloatData{"1e-5h", static_cast<double>(f16::Quantize(1e-5f))},
FloatData{"1E-5h", static_cast<double>(f16::Quantize(1e-5f))},
// With decimal and exponents
FloatData{"0.2e+12", 0.2e12},
FloatData{"1.2e-5", 1.2e-5},
FloatData{"2.57e23", 2.57e23},
FloatData{"2.5e+0", 2.5},
FloatData{"2.5e-0", 2.5},
// With decimal and exponents and 'f' suffix
FloatData{"0.2e+12f", static_cast<double>(0.2e12f)},
FloatData{"1.2e-5f", static_cast<double>(1.2e-5f)},
FloatData{"2.57e23f", static_cast<double>(2.57e23f)},
FloatData{"2.5e+0f", static_cast<double>(2.5f)},
FloatData{"2.5e-0f", static_cast<double>(2.5f)},
// With decimal and exponents and 'h' suffix
FloatData{"0.2e+5h", static_cast<double>(f16::Quantize(0.2e5f))},
FloatData{"1.2e-5h", static_cast<double>(f16::Quantize(1.2e-5f))},
FloatData{"6.55e4h", static_cast<double>(f16::Quantize(6.55e4f))},
FloatData{"2.5e+0h", static_cast<double>(f16::Quantize(2.5f))},
FloatData{"2.5e-0h", static_cast<double>(f16::Quantize(2.5f))},
// Quantization
FloatData{"3.141592653589793", 3.141592653589793}, // no quantization
FloatData{"3.141592653589793f", 3.1415927410125732}, // f32 quantized
FloatData{"3.141592653589793h", 3.140625} // f16 quantized
));
using FloatTest_Invalid = testing::TestWithParam<const char*>;
TEST_P(FloatTest_Invalid, Handles) {
Source::File file("", GetParam());
Lexer l(&file);
auto list = l.Lex();
ASSERT_FALSE(list.empty());
auto& t = list[0];
EXPECT_FALSE(t.Is(Token::Type::kFloatLiteral) || t.Is(Token::Type::kFloatLiteral_F) ||
t.Is(Token::Type::kFloatLiteral_H));
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
FloatTest_Invalid,
testing::Values(".",
// Need a mantissa digit
".e5",
".E5",
// Need exponent digits
".e",
".e+",
".e-",
".E",
".e+",
".e-",
// Overflow
"2.5e+256f",
"6.5520e+4h",
// Decimal exponent must immediately
// follow the 'e'.
"2.5e 12",
"2.5e +12",
"2.5e -12",
"2.5e+ 123",
"2.5e- 123",
"2.5E 12",
"2.5E +12",
"2.5E -12",
"2.5E+ 123",
"2.5E- 123"));
using AsciiIdentifierTest = testing::TestWithParam<const char*>;
TEST_P(AsciiIdentifierTest, Parse) {
Source::File file("", GetParam());
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(2u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + strlen(GetParam()));
EXPECT_EQ(t.to_str(), GetParam());
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsEof());
}
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
AsciiIdentifierTest,
testing::Values("a",
"test",
"test01",
"test_",
"_test",
"test_01",
"ALLCAPS",
"MiXeD_CaSe",
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"alldigits_0123456789"));
struct UnicodeCase {
const char* utf8;
size_t count;
};
using ValidUnicodeIdentifierTest = testing::TestWithParam<UnicodeCase>;
TEST_P(ValidUnicodeIdentifierTest, Parse) {
Source::File file("", GetParam().utf8);
Lexer l(&file);
auto list = l.Lex();
ASSERT_EQ(2u, list.size());
{
auto& t = list[0];
EXPECT_TRUE(t.IsIdentifier());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + GetParam().count);
EXPECT_EQ(t.to_str(), GetParam().utf8);
}
{
auto& t = list[1];
EXPECT_TRUE(t.IsEof());
}
}
INSTANTIATE_TEST_SUITE_P(
LexerTest,
ValidUnicodeIdentifierTest,
testing::ValuesIn({
UnicodeCase{// "𝐢𝐝𝐞𝐧𝐭𝐢𝐟𝐢𝐞𝐫"
"\xf0\x9d\x90\xa2\xf0\x9d\x90\x9d\xf0\x9d\x90\x9e\xf0\x9d"
"\x90\xa7\xf0\x9d\x90\xad\xf0\x9d\x90\xa2\xf0\x9d\x90\x9f"
"\xf0\x9d\x90\xa2\xf0\x9d\x90\x9e\xf0\x9d\x90\xab",
40},
UnicodeCase{// "𝑖𝑑𝑒𝑛𝑡𝑖𝑓𝑖𝑒𝑟"
"\xf0\x9d\x91\x96\xf0\x9d\x91\x91\xf0\x9d\x91\x92\xf0\x9d"
"\x91\x9b\xf0\x9d\x91\xa1\xf0\x9d\x91\x96\xf0\x9d\x91\x93"
"\xf0\x9d\x91\x96\xf0\x9d\x91\x92\xf0\x9d\x91\x9f",
40},
UnicodeCase{// ""
"\xef\xbd\x89\xef\xbd\x84\xef\xbd\x85\xef\xbd\x8e\xef\xbd\x94\xef"
"\xbd\x89\xef\xbd\x86\xef\xbd\x89\xef\xbd\x85\xef\xbd\x92",
30},
UnicodeCase{// "𝕚𝕕𝕖𝕟𝕥𝕚𝕗𝕚𝕖𝕣𝟙𝟚𝟛"
"\xf0\x9d\x95\x9a\xf0\x9d\x95\x95\xf0\x9d\x95\x96\xf0\x9d"
"\x95\x9f\xf0\x9d\x95\xa5\xf0\x9d\x95\x9a\xf0\x9d\x95\x97"
"\xf0\x9d\x95\x9a\xf0\x9d\x95\x96\xf0\x9d\x95\xa3\xf0\x9d"
"\x9f\x99\xf0\x9d\x9f\x9a\xf0\x9d\x9f\x9b",
52},
UnicodeCase{// "𝖎𝖉𝖊𝖓𝖙𝖎𝖋𝖎𝖊𝖗123"
"\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
"\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
"\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33",
43},
}));
using InvalidUnicodeIdentifierTest = testing::TestWithParam<const char*>;
TEST_P(InvalidUnicodeIdentifierTest, Parse) {
Source::File file("", GetParam());
Lexer l(&file);
auto list = l.Lex();
ASSERT_FALSE(list.empty());
auto& t = list[0];
EXPECT_TRUE(t.IsError());
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u);
EXPECT_EQ(t.to_str(), "invalid UTF-8");
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
InvalidUnicodeIdentifierTest,
testing::ValuesIn({
"\x80\x80\x80\x80", // 10000000
"\x81\x80\x80\x80", // 10000001
"\x8f\x80\x80\x80", // 10001111
"\x90\x80\x80\x80", // 10010000
"\x91\x80\x80\x80", // 10010001
"\x9f\x80\x80\x80", // 10011111
"\xa0\x80\x80\x80", // 10100000
"\xa1\x80\x80\x80", // 10100001
"\xaf\x80\x80\x80", // 10101111
"\xb0\x80\x80\x80", // 10110000
"\xb1\x80\x80\x80", // 10110001
"\xbf\x80\x80\x80", // 10111111
"\xc0\x80\x80\x80", // 11000000
"\xc1\x80\x80\x80", // 11000001
"\xf5\x80\x80\x80", // 11110101
"\xf6\x80\x80\x80", // 11110110
"\xf7\x80\x80\x80", // 11110111
"\xf8\x80\x80\x80", // 11111000
"\xfe\x80\x80\x80", // 11111110
"\xff\x80\x80\x80", // 11111111
"\xd0", // 2-bytes, missing second byte
"\xe8\x8f", // 3-bytes, missing third byte
"\xf4\x8f\x8f", // 4-bytes, missing fourth byte
"\xd0\x7f", // 2-bytes, second byte MSB unset
"\xe8\x7f\x8f", // 3-bytes, second byte MSB unset
"\xe8\x8f\x7f", // 3-bytes, third byte MSB unset
"\xf4\x7f\x8f\x8f", // 4-bytes, second byte MSB unset
"\xf4\x8f\x7f\x8f", // 4-bytes, third byte MSB unset
"\xf4\x8f\x8f\x7f", // 4-bytes, fourth byte MSB unset
}));
TEST_F(LexerTest, IdentifierTest_SingleUnderscoreDoesNotMatch) {
Source::File file("", "_");
Lexer l(&file);
auto list = l.Lex();
ASSERT_FALSE(list.empty());
auto& t = list[0];
EXPECT_FALSE(t.IsIdentifier());
}
TEST_F(LexerTest, IdentifierTest_DoesNotStartWithDoubleUnderscore) {
Source::File file("", "__test");
Lexer l(&file);
auto list = l.Lex();
ASSERT_FALSE(list.empty());
auto& t = list[0];
EXPECT_FALSE(t.IsIdentifier());
}
TEST_F(LexerTest, IdentifierTest_DoesNotStartWithNumber) {
Source::File file("", "01test");
Lexer l(&file);
auto list = l.Lex();
EXPECT_FALSE(list.empty());
auto& t = list[0];
EXPECT_FALSE(t.IsIdentifier());
}
////////////////////////////////////////////////////////////////////////////////
// ParseIntegerTest
////////////////////////////////////////////////////////////////////////////////
struct ParseIntegerCase {
const char* input;
int64_t result;
};
inline std::ostream& operator<<(std::ostream& out, ParseIntegerCase data) {
out << std::string(data.input);
return out;
}
using ParseIntegerTest = testing::TestWithParam<std::tuple<char, ParseIntegerCase>>;
TEST_P(ParseIntegerTest, Parse) {
auto suffix = std::get<0>(GetParam());
auto params = std::get<1>(GetParam());
Source::File file("", params.input);
Lexer l(&file);
auto list = l.Lex();
ASSERT_FALSE(list.empty());
auto& t = list[0];
switch (suffix) {
case 'i':
EXPECT_TRUE(t.Is(Token::Type::kIntLiteral_I));
break;
case 'u':
EXPECT_TRUE(t.Is(Token::Type::kIntLiteral_U));
break;
case 0:
EXPECT_TRUE(t.Is(Token::Type::kIntLiteral));
break;
}
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + strlen(params.input));
ASSERT_FALSE(t.IsError()) << t.to_str();
EXPECT_EQ(t.to_i64(), params.result);
}
INSTANTIATE_TEST_SUITE_P(Dec_AInt,
ParseIntegerTest,
testing::Combine(testing::Values('\0'), // No suffix
testing::ValuesIn(std::vector<ParseIntegerCase>{
{"0", 0},
{"2", 2},
{"123", 123},
{"2147483647", 2147483647},
})));
INSTANTIATE_TEST_SUITE_P(Dec_u32,
ParseIntegerTest,
testing::Combine(testing::Values('u'), // Suffix
testing::ValuesIn(std::vector<ParseIntegerCase>{
{"0u", 0},
{"123u", 123},
{"4294967295u", 4294967295ll},
})));
INSTANTIATE_TEST_SUITE_P(Dec_i32,
ParseIntegerTest,
testing::Combine(testing::Values('i'), // Suffix
testing::ValuesIn(std::vector<ParseIntegerCase>{
{"0i", 0u},
{"123i", 123},
{"2147483647i", 2147483647},
})));
INSTANTIATE_TEST_SUITE_P(Hex_AInt,
ParseIntegerTest,
testing::Combine(testing::Values('\0'), // No suffix
testing::ValuesIn(std::vector<ParseIntegerCase>{
{"0x0", 0},
{"0X0", 0},
{"0x42", 66},
{"0X42", 66},
{"0xeF1Abc9", 0xeF1Abc9},
{"0XeF1Abc9", 0xeF1Abc9},
{"0x80000000", 0x80000000},
{"0X80000000", 0X80000000},
{"0x7FFFFFFF", 0x7fffffff},
{"0X7FFFFFFF", 0x7fffffff},
{"0x7fffffff", 0x7fffffff},
{"0x7fffffff", 0x7fffffff},
{"0x7FfFfFfF", 0x7fffffff},
{"0X7FfFfFfF", 0x7fffffff},
{"0x7fffffffffffffff", 0x7fffffffffffffffll},
})));
INSTANTIATE_TEST_SUITE_P(Hex_u32,
ParseIntegerTest,
testing::Combine(testing::Values('u'), // Suffix
testing::ValuesIn(std::vector<ParseIntegerCase>{
{"0x0u", 0},
{"0x42u", 66},
{"0xeF1Abc9u", 250719177},
{"0xFFFFFFFFu", 0xffffffff},
{"0XFFFFFFFFu", 0xffffffff},
{"0xffffffffu", 0xffffffff},
{"0Xffffffffu", 0xffffffff},
{"0xfFfFfFfFu", 0xffffffff},
{"0XfFfFfFfFu", 0xffffffff},
})));
INSTANTIATE_TEST_SUITE_P(Hex_i32,
ParseIntegerTest,
testing::Combine(testing::Values('i'), // Suffix
testing::ValuesIn(std::vector<ParseIntegerCase>{
{"0x0i", 0},
{"0x42i", 66},
{"0xeF1Abc9i", 250719177},
{"0x7FFFFFFFi", 0x7fffffff},
{"0X7FFFFFFFi", 0x7fffffff},
{"0x7fffffffi", 0x7fffffff},
{"0X7fffffffi", 0x7fffffff},
{"0x7FfFfFfFi", 0x7fffffff},
{"0X7FfFfFfFi", 0x7fffffff},
})));
////////////////////////////////////////////////////////////////////////////////
// ParseIntegerTest_CannotBeRepresented
////////////////////////////////////////////////////////////////////////////////
using ParseIntegerTest_CannotBeRepresented =
testing::TestWithParam<std::tuple<const char*, const char*>>;
TEST_P(ParseIntegerTest_CannotBeRepresented, Parse) {
auto type = std::get<0>(GetParam());
auto source = std::get<1>(GetParam());
Source::File file("", source);
Lexer l(&file);
auto list = l.Lex();
ASSERT_FALSE(list.empty());
auto& t = list[0];
EXPECT_TRUE(t.Is(Token::Type::kError));
auto expect = "value cannot be represented as '" + std::string(type) + "'";
EXPECT_EQ(t.to_str(), expect);
}
INSTANTIATE_TEST_SUITE_P(AbstractInt,
ParseIntegerTest_CannotBeRepresented,
testing::Combine(testing::Values("abstract-int"),
testing::Values("9223372036854775808",
"0xFFFFFFFFFFFFFFFF",
"0xffffffffffffffff",
"0x8000000000000000")));
INSTANTIATE_TEST_SUITE_P(i32,
ParseIntegerTest_CannotBeRepresented,
testing::Combine(testing::Values("i32"), // type
testing::Values("2147483648i")));
INSTANTIATE_TEST_SUITE_P(u32,
ParseIntegerTest_CannotBeRepresented,
testing::Combine(testing::Values("u32"), // type
testing::Values("4294967296u")));
////////////////////////////////////////////////////////////////////////////////
// ParseIntegerTest_LeadingZeros
////////////////////////////////////////////////////////////////////////////////
using ParseIntegerTest_LeadingZeros = testing::TestWithParam<const char*>;
TEST_P(ParseIntegerTest_LeadingZeros, Parse) {
Source::File file("", GetParam());
Lexer l(&file);
auto list = l.Lex();
ASSERT_FALSE(list.empty());
auto& t = list[0];
EXPECT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(), "integer literal cannot have leading 0s");
}
INSTANTIATE_TEST_SUITE_P(LeadingZero,
ParseIntegerTest_LeadingZeros,
testing::Values("01234", "0000", "00u"));
////////////////////////////////////////////////////////////////////////////////
// ParseIntegerTest_NoSignificantDigits
////////////////////////////////////////////////////////////////////////////////
using ParseIntegerTest_NoSignificantDigits = testing::TestWithParam<const char*>;
TEST_P(ParseIntegerTest_NoSignificantDigits, Parse) {
Source::File file("", GetParam());
Lexer l(&file);
auto list = l.Lex();
ASSERT_FALSE(list.empty());
auto& t = list[0];
EXPECT_TRUE(t.Is(Token::Type::kError));
EXPECT_EQ(t.to_str(), "integer or float hex literal has no significant digits");
}
INSTANTIATE_TEST_SUITE_P(LeadingZero,
ParseIntegerTest_NoSignificantDigits,
testing::Values("0x", "0X", "0xu", "0Xu", "0xi", "0Xi"));
struct TokenData {
const char* input;
Token::Type type;
};
inline std::ostream& operator<<(std::ostream& out, TokenData data) {
out << std::string(data.input);
return out;
}
using PunctuationTest = testing::TestWithParam<TokenData>;
TEST_P(PunctuationTest, Parses) {
auto params = GetParam();
Source::File file("", params.input);
Lexer l(&file);
auto list = l.Lex();
ASSERT_GE(list.size(), 2u);
{
auto& t = list[0];
EXPECT_TRUE(t.Is(params.type));
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + strlen(params.input));
}
{
auto& t = list[1];
EXPECT_EQ(t.source().range.begin.column, 1 + std::string(params.input).size());
}
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
PunctuationTest,
testing::Values(TokenData{"&", Token::Type::kAnd},
TokenData{"->", Token::Type::kArrow},
TokenData{"@", Token::Type::kAttr},
TokenData{"/", Token::Type::kForwardSlash},
TokenData{"!", Token::Type::kBang},
TokenData{"[", Token::Type::kBracketLeft},
TokenData{"]", Token::Type::kBracketRight},
TokenData{"{", Token::Type::kBraceLeft},
TokenData{"}", Token::Type::kBraceRight},
TokenData{":", Token::Type::kColon},
TokenData{",", Token::Type::kComma},
TokenData{"=", Token::Type::kEqual},
TokenData{"==", Token::Type::kEqualEqual},
TokenData{">", Token::Type::kGreaterThan},
TokenData{"<", Token::Type::kLessThan},
TokenData{"<=", Token::Type::kLessThanEqual},
TokenData{"<<", Token::Type::kShiftLeft},
TokenData{"%", Token::Type::kMod},
TokenData{"!=", Token::Type::kNotEqual},
TokenData{"-", Token::Type::kMinus},
TokenData{".", Token::Type::kPeriod},
TokenData{"+", Token::Type::kPlus},
TokenData{"++", Token::Type::kPlusPlus},
TokenData{"|", Token::Type::kOr},
TokenData{"||", Token::Type::kOrOr},
TokenData{"(", Token::Type::kParenLeft},
TokenData{")", Token::Type::kParenRight},
TokenData{";", Token::Type::kSemicolon},
TokenData{"*", Token::Type::kStar},
TokenData{"~", Token::Type::kTilde},
TokenData{"_", Token::Type::kUnderscore},
TokenData{"^", Token::Type::kXor},
TokenData{"+=", Token::Type::kPlusEqual},
TokenData{"-=", Token::Type::kMinusEqual},
TokenData{"*=", Token::Type::kTimesEqual},
TokenData{"/=", Token::Type::kDivisionEqual},
TokenData{"%=", Token::Type::kModuloEqual},
TokenData{"&=", Token::Type::kAndEqual},
TokenData{"|=", Token::Type::kOrEqual},
TokenData{"^=", Token::Type::kXorEqual},
TokenData{"<<=", Token::Type::kShiftLeftEqual}));
using SplittablePunctuationTest = testing::TestWithParam<TokenData>;
TEST_P(SplittablePunctuationTest, Parses) {
auto params = GetParam();
Source::File file("", params.input);
Lexer l(&file);
auto list = l.Lex();
ASSERT_GE(list.size(), 3u);
{
auto& t = list[0];
EXPECT_TRUE(t.Is(params.type));
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + strlen(params.input));
}
const size_t num_placeholders = list[0].NumPlaceholders();
EXPECT_GT(num_placeholders, 0u);
ASSERT_EQ(list.size(), 2u + num_placeholders);
for (size_t i = 0; i < num_placeholders; i++) {
auto& t = list[1 + i];
EXPECT_TRUE(t.Is(Token::Type::kPlaceholder));
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 2u + i);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + strlen(params.input));
}
{
auto& t = list.back();
EXPECT_TRUE(t.Is(Token::Type::kEOF));
EXPECT_EQ(t.source().range.begin.column, 2u + num_placeholders);
}
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
SplittablePunctuationTest,
testing::Values(TokenData{"&&", Token::Type::kAndAnd},
TokenData{">=", Token::Type::kGreaterThanEqual},
TokenData{"--", Token::Type::kMinusMinus},
TokenData{">>", Token::Type::kShiftRight},
TokenData{">>=", Token::Type::kShiftRightEqual}));
using KeywordTest = testing::TestWithParam<TokenData>;
TEST_P(KeywordTest, Parses) {
auto params = GetParam();
Source::File file("", params.input);
Lexer l(&file);
auto list = l.Lex();
ASSERT_GE(list.size(), 2u);
auto& t = list[0];
EXPECT_TRUE(t.Is(params.type)) << params.input;
EXPECT_EQ(t.source().range.begin.line, 1u);
EXPECT_EQ(t.source().range.begin.column, 1u);
EXPECT_EQ(t.source().range.end.line, 1u);
EXPECT_EQ(t.source().range.end.column, 1u + strlen(params.input));
EXPECT_EQ(list[1].source().range.begin.column, 1 + std::string(params.input).size());
}
INSTANTIATE_TEST_SUITE_P(
LexerTest,
KeywordTest,
testing::Values(TokenData{"alias", Token::Type::kAlias},
TokenData{"array", Token::Type::kArray},
TokenData{"bitcast", Token::Type::kBitcast},
TokenData{"bool", Token::Type::kBool},
TokenData{"break", Token::Type::kBreak},
TokenData{"case", Token::Type::kCase},
TokenData{"const", Token::Type::kConst},
TokenData{"const_assert", Token::Type::kConstAssert},
TokenData{"continue", Token::Type::kContinue},
TokenData{"continuing", Token::Type::kContinuing},
TokenData{"default", Token::Type::kDefault},
TokenData{"diagnostic", Token::Type::kDiagnostic},
TokenData{"discard", Token::Type::kDiscard},
TokenData{"else", Token::Type::kElse},
TokenData{"enable", Token::Type::kEnable},
TokenData{"f32", Token::Type::kF32},
TokenData{"fallthrough", Token::Type::kFallthrough},
TokenData{"false", Token::Type::kFalse},
TokenData{"fn", Token::Type::kFn},
TokenData{"for", Token::Type::kFor},
TokenData{"i32", Token::Type::kI32},
TokenData{"if", Token::Type::kIf},
TokenData{"let", Token::Type::kLet},
TokenData{"loop", Token::Type::kLoop},
TokenData{"mat2x2", Token::Type::kMat2x2},
TokenData{"mat2x3", Token::Type::kMat2x3},
TokenData{"mat2x4", Token::Type::kMat2x4},
TokenData{"mat3x2", Token::Type::kMat3x2},
TokenData{"mat3x3", Token::Type::kMat3x3},
TokenData{"mat3x4", Token::Type::kMat3x4},
TokenData{"mat4x2", Token::Type::kMat4x2},
TokenData{"mat4x3", Token::Type::kMat4x3},
TokenData{"mat4x4", Token::Type::kMat4x4},
TokenData{"override", Token::Type::kOverride},
TokenData{"ptr", Token::Type::kPtr},
TokenData{"return", Token::Type::kReturn},
TokenData{"sampler", Token::Type::kSampler},
TokenData{"sampler_comparison", Token::Type::kComparisonSampler},
TokenData{"static_assert", Token::Type::kStaticAssert},
TokenData{"struct", Token::Type::kStruct},
TokenData{"switch", Token::Type::kSwitch},
TokenData{"texture_1d", Token::Type::kTextureSampled1d},
TokenData{"texture_2d", Token::Type::kTextureSampled2d},
TokenData{"texture_2d_array", Token::Type::kTextureSampled2dArray},
TokenData{"texture_3d", Token::Type::kTextureSampled3d},
TokenData{"texture_cube", Token::Type::kTextureSampledCube},
TokenData{"texture_cube_array", Token::Type::kTextureSampledCubeArray},
TokenData{"texture_depth_2d", Token::Type::kTextureDepth2d},
TokenData{"texture_depth_2d_array", Token::Type::kTextureDepth2dArray},
TokenData{"texture_depth_cube", Token::Type::kTextureDepthCube},
TokenData{"texture_depth_cube_array", Token::Type::kTextureDepthCubeArray},
TokenData{"texture_depth_multisampled_2d",
Token::Type::kTextureDepthMultisampled2d},
TokenData{"texture_multisampled_2d", Token::Type::kTextureMultisampled2d},
TokenData{"texture_storage_1d", Token::Type::kTextureStorage1d},
TokenData{"texture_storage_2d", Token::Type::kTextureStorage2d},
TokenData{"texture_storage_2d_array", Token::Type::kTextureStorage2dArray},
TokenData{"texture_storage_3d", Token::Type::kTextureStorage3d},
TokenData{"true", Token::Type::kTrue},
TokenData{"type", Token::Type::kType},
TokenData{"u32", Token::Type::kU32},
TokenData{"var", Token::Type::kVar},
TokenData{"vec2", Token::Type::kVec2},
TokenData{"vec3", Token::Type::kVec3},
TokenData{"vec4", Token::Type::kVec4},
TokenData{"while", Token::Type::kWhile}));
} // namespace
} // namespace tint::reader::wgsl