Count the line pos, offset and size of compilation message in UTF-16
This patch counts the line position, offset and size of the compilation message in UTF-16 and saves them to WGPUCompilationMessage to align the latest WebGPU SPEC. Bug: dawn:1357 Change-Id: If8f4026bd5b4a64a078e100762b6d1f61da50053 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/115640 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Jiawei Shao <jiawei.shao@intel.com> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
parent
3d2caaae47
commit
f7beb85fd1
|
@ -749,7 +749,10 @@
|
|||
{"name": "line num", "type": "uint64_t"},
|
||||
{"name": "line pos", "type": "uint64_t"},
|
||||
{"name": "offset", "type": "uint64_t"},
|
||||
{"name": "length", "type": "uint64_t"}
|
||||
{"name": "length", "type": "uint64_t"},
|
||||
{"name": "utf16 line pos", "type": "uint64_t"},
|
||||
{"name": "utf16 offset", "type": "uint64_t"},
|
||||
{"name": "utf16 length", "type": "uint64_t"}
|
||||
]
|
||||
},
|
||||
"compilation message type": {
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "src/tint/diagnostic/printer.h"
|
||||
#include "src/tint/inspector/inspector.h"
|
||||
#include "src/tint/reader/reader.h"
|
||||
#include "src/tint/text/unicode.h"
|
||||
#include "src/tint/transform/binding_remapper.h"
|
||||
#include "src/tint/transform/clamp_frag_depth.h"
|
||||
#include "src/tint/transform/first_index_offset.h"
|
||||
|
|
|
@ -36,6 +36,39 @@ WGPUCompilationMessageType tintSeverityToMessageType(tint::diag::Severity severi
|
|||
|
||||
} // anonymous namespace
|
||||
|
||||
ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String) {
|
||||
if (tint::text::utf8::IsASCII(utf8String)) {
|
||||
return utf8String.size();
|
||||
}
|
||||
|
||||
uint64_t numberOfUTF16CodeUnits = 0;
|
||||
std::string_view remaining = utf8String;
|
||||
while (!remaining.empty()) {
|
||||
auto [codePoint, utf8CharacterByteLength] = tint::text::utf8::Decode(remaining);
|
||||
// Directly return as something wrong has happened during the UTF-8 decoding.
|
||||
if (utf8CharacterByteLength == 0) {
|
||||
return DAWN_INTERNAL_ERROR("Fail to decode the unicode string");
|
||||
}
|
||||
remaining = remaining.substr(utf8CharacterByteLength);
|
||||
|
||||
// Count the number of code units in UTF-16. See https://en.wikipedia.org/wiki/UTF-16 for
|
||||
// more details.
|
||||
if (codePoint.value <= 0xD7FF || (codePoint.value >= 0xE000 && codePoint.value <= 0xFFFF)) {
|
||||
// Code points from U+0000 to U+D7FF and U+E000 to U+FFFF are encoded as single 16-bit
|
||||
// code units.
|
||||
++numberOfUTF16CodeUnits;
|
||||
} else if (codePoint.value >= 0x10000) {
|
||||
// Code points from U+010000 to U+10FFFF are encoded as two 16-bit code units.
|
||||
numberOfUTF16CodeUnits += 2;
|
||||
} else {
|
||||
// UTF-16 cannot encode the code points from U+D800 to U+DFFF.
|
||||
return DAWN_INTERNAL_ERROR("The unicode string contains illegal unicode code point.");
|
||||
}
|
||||
}
|
||||
|
||||
return numberOfUTF16CodeUnits;
|
||||
}
|
||||
|
||||
OwnedCompilationMessages::OwnedCompilationMessages() {
|
||||
mCompilationInfo.nextInChain = 0;
|
||||
mCompilationInfo.messageCount = 0;
|
||||
|
@ -53,23 +86,29 @@ void OwnedCompilationMessages::AddMessageForTesting(std::string message,
|
|||
// Cannot add messages after GetCompilationInfo has been called.
|
||||
ASSERT(mCompilationInfo.messages == nullptr);
|
||||
|
||||
// Message can only contain ascii characters.
|
||||
ASSERT(tint::text::utf8::IsASCII(message));
|
||||
|
||||
mMessageStrings.push_back(message);
|
||||
mMessages.push_back({nullptr, nullptr, static_cast<WGPUCompilationMessageType>(type), lineNum,
|
||||
linePos, offset, length});
|
||||
linePos, offset, length, linePos, offset, length});
|
||||
}
|
||||
|
||||
void OwnedCompilationMessages::AddMessage(const tint::diag::Diagnostic& diagnostic) {
|
||||
MaybeError OwnedCompilationMessages::AddMessage(const tint::diag::Diagnostic& diagnostic) {
|
||||
// Cannot add messages after GetCompilationInfo has been called.
|
||||
ASSERT(mCompilationInfo.messages == nullptr);
|
||||
|
||||
// Tint line and column values are 1-based.
|
||||
uint64_t lineNum = diagnostic.source.range.begin.line;
|
||||
uint64_t lineCol = diagnostic.source.range.begin.column;
|
||||
uint64_t linePosInBytes = diagnostic.source.range.begin.column;
|
||||
// The offset is 0-based.
|
||||
uint64_t offset = 0;
|
||||
uint64_t length = 0;
|
||||
uint64_t offsetInBytes = 0;
|
||||
uint64_t lengthInBytes = 0;
|
||||
uint64_t linePosInUTF16 = 0;
|
||||
uint64_t offsetInUTF16 = 0;
|
||||
uint64_t lengthInUTF16 = 0;
|
||||
|
||||
if (lineNum && lineCol && diagnostic.source.file) {
|
||||
if (lineNum && linePosInBytes && diagnostic.source.file) {
|
||||
const tint::Source::FileContent& content = diagnostic.source.file->content;
|
||||
|
||||
// Tint stores line as std::string_view in a complete source std::string that's in the
|
||||
|
@ -78,23 +117,38 @@ void OwnedCompilationMessages::AddMessage(const tint::diag::Diagnostic& diagnost
|
|||
// range starts at 1 while the array of lines start at 0 (hence the -1).
|
||||
const char* fileStart = content.data.data();
|
||||
const char* lineStart = content.lines[lineNum - 1].data();
|
||||
offset = static_cast<uint64_t>(lineStart - fileStart) + lineCol - 1;
|
||||
offsetInBytes = static_cast<uint64_t>(lineStart - fileStart) + linePosInBytes - 1;
|
||||
|
||||
// The linePosInBytes is 1-based.
|
||||
uint64_t linePosOffsetInUTF16 = 0;
|
||||
DAWN_TRY_ASSIGN(linePosOffsetInUTF16, CountUTF16CodeUnitsFromUTF8String(
|
||||
std::string_view(lineStart, linePosInBytes - 1)));
|
||||
linePosInUTF16 = linePosOffsetInUTF16 + 1;
|
||||
|
||||
// The offset is 0-based.
|
||||
uint64_t lineStartToFileStartOffsetInUTF16 = 0;
|
||||
DAWN_TRY_ASSIGN(lineStartToFileStartOffsetInUTF16,
|
||||
CountUTF16CodeUnitsFromUTF8String(std::string_view(
|
||||
fileStart, static_cast<uint64_t>(lineStart - fileStart))));
|
||||
offsetInUTF16 = lineStartToFileStartOffsetInUTF16 + linePosInUTF16 - 1;
|
||||
|
||||
// If the range has a valid start but the end is not specified, clamp it to the start.
|
||||
uint64_t endLineNum = diagnostic.source.range.end.line;
|
||||
uint64_t endLineCol = diagnostic.source.range.end.column;
|
||||
if (endLineNum == 0 || endLineCol == 0) {
|
||||
endLineNum = lineNum;
|
||||
endLineCol = lineCol;
|
||||
endLineCol = linePosInBytes;
|
||||
}
|
||||
|
||||
const char* endLineStart = content.lines[endLineNum - 1].data();
|
||||
uint64_t endOffset = static_cast<uint64_t>(endLineStart - fileStart) + endLineCol - 1;
|
||||
|
||||
uint64_t endOffsetInBytes =
|
||||
static_cast<uint64_t>(endLineStart - fileStart) + endLineCol - 1;
|
||||
// The length of the message is the difference between the starting offset and the
|
||||
// ending offset. Negative ranges aren't allowed
|
||||
ASSERT(endOffset >= offset);
|
||||
length = endOffset - offset;
|
||||
// ending offset. Negative ranges aren't allowed.
|
||||
ASSERT(endOffsetInBytes >= offsetInBytes);
|
||||
lengthInBytes = endOffsetInBytes - offsetInBytes;
|
||||
DAWN_TRY_ASSIGN(lengthInUTF16, CountUTF16CodeUnitsFromUTF8String(std::string_view(
|
||||
fileStart + offsetInBytes, lengthInBytes)));
|
||||
}
|
||||
|
||||
if (diagnostic.code) {
|
||||
|
@ -104,18 +158,23 @@ void OwnedCompilationMessages::AddMessage(const tint::diag::Diagnostic& diagnost
|
|||
}
|
||||
|
||||
mMessages.push_back({nullptr, nullptr, tintSeverityToMessageType(diagnostic.severity), lineNum,
|
||||
lineCol, offset, length});
|
||||
linePosInBytes, offsetInBytes, lengthInBytes, linePosInUTF16,
|
||||
offsetInUTF16, lengthInUTF16});
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void OwnedCompilationMessages::AddMessages(const tint::diag::List& diagnostics) {
|
||||
MaybeError OwnedCompilationMessages::AddMessages(const tint::diag::List& diagnostics) {
|
||||
// Cannot add messages after GetCompilationInfo has been called.
|
||||
ASSERT(mCompilationInfo.messages == nullptr);
|
||||
|
||||
for (const auto& diag : diagnostics) {
|
||||
AddMessage(diag);
|
||||
DAWN_TRY(AddMessage(diag));
|
||||
}
|
||||
|
||||
AddFormattedTintMessages(diagnostics);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void OwnedCompilationMessages::ClearMessages() {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "dawn/native/Error.h"
|
||||
#include "dawn/native/dawn_platform.h"
|
||||
|
||||
#include "dawn/common/NonCopyable.h"
|
||||
|
@ -29,6 +30,8 @@ class List;
|
|||
|
||||
namespace dawn::native {
|
||||
|
||||
ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String);
|
||||
|
||||
class OwnedCompilationMessages : public NonCopyable {
|
||||
public:
|
||||
OwnedCompilationMessages();
|
||||
|
@ -41,14 +44,14 @@ class OwnedCompilationMessages : public NonCopyable {
|
|||
uint64_t linePos = 0,
|
||||
uint64_t offset = 0,
|
||||
uint64_t length = 0);
|
||||
void AddMessages(const tint::diag::List& diagnostics);
|
||||
MaybeError AddMessages(const tint::diag::List& diagnostics);
|
||||
void ClearMessages();
|
||||
|
||||
const WGPUCompilationInfo* GetCompilationInfo();
|
||||
const std::vector<std::string>& GetFormattedTintMessages();
|
||||
|
||||
private:
|
||||
void AddMessage(const tint::diag::Diagnostic& diagnostic);
|
||||
MaybeError AddMessage(const tint::diag::Diagnostic& diagnostic);
|
||||
void AddFormattedTintMessages(const tint::diag::List& diagnostics);
|
||||
|
||||
WGPUCompilationInfo mCompilationInfo;
|
||||
|
|
|
@ -299,7 +299,7 @@ ResultOrError<tint::Program> ParseWGSL(const tint::Source::File* file,
|
|||
#if TINT_BUILD_WGSL_READER
|
||||
tint::Program program = tint::reader::wgsl::Parse(file);
|
||||
if (outMessages != nullptr) {
|
||||
outMessages->AddMessages(program.Diagnostics());
|
||||
DAWN_TRY(outMessages->AddMessages(program.Diagnostics()));
|
||||
}
|
||||
if (!program.IsValid()) {
|
||||
return DAWN_VALIDATION_ERROR("Tint WGSL reader failure: %s\n", program.Diagnostics().str());
|
||||
|
@ -316,7 +316,7 @@ ResultOrError<tint::Program> ParseSPIRV(const std::vector<uint32_t>& spirv,
|
|||
#if TINT_BUILD_SPV_READER
|
||||
tint::Program program = tint::reader::spirv::Parse(spirv);
|
||||
if (outMessages != nullptr) {
|
||||
outMessages->AddMessages(program.Diagnostics());
|
||||
DAWN_TRY(outMessages->AddMessages(program.Diagnostics()));
|
||||
}
|
||||
if (!program.IsValid()) {
|
||||
return DAWN_VALIDATION_ERROR("Tint SPIR-V reader failure:\nParser: %s\n",
|
||||
|
@ -789,7 +789,7 @@ MaybeError ValidateWGSLProgramExtension(const DeviceBase* device,
|
|||
|
||||
if (hasDisallowedExtension) {
|
||||
if (outMessages != nullptr) {
|
||||
outMessages->AddMessages(messages);
|
||||
DAWN_TRY(outMessages->AddMessages(messages));
|
||||
}
|
||||
return DAWN_MAKE_ERROR(InternalErrorType::Validation,
|
||||
"Shader module uses extension(s) not enabled for its device.");
|
||||
|
@ -983,7 +983,7 @@ ResultOrError<tint::Program> RunTransforms(tint::transform::Transform* transform
|
|||
OwnedCompilationMessages* outMessages) {
|
||||
tint::transform::Output output = transform->Run(program, inputs);
|
||||
if (outMessages != nullptr) {
|
||||
outMessages->AddMessages(output.program.Diagnostics());
|
||||
DAWN_TRY(outMessages->AddMessages(output.program.Diagnostics()));
|
||||
}
|
||||
DAWN_INVALID_IF(!output.program.IsValid(), "Tint program failure: %s\n",
|
||||
output.program.Diagnostics().str());
|
||||
|
|
|
@ -301,6 +301,7 @@ dawn_test("dawn_unittests") {
|
|||
"unittests/SystemUtilsTests.cpp",
|
||||
"unittests/ToBackendTests.cpp",
|
||||
"unittests/TypedIntegerTests.cpp",
|
||||
"unittests/UnicodeTests.cpp",
|
||||
"unittests/native/BlobTests.cpp",
|
||||
"unittests/native/CacheRequestTests.cpp",
|
||||
"unittests/native/CommandBufferEncodingTests.cpp",
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2022 The Dawn Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dawn/native/ShaderModule.h"
|
||||
#include "dawn/tests/unittests/validation/ValidationTest.h"
|
||||
|
||||
class CountUTF16CodeUnitsFromUTF8StringTest : public ValidationTest {};
|
||||
|
||||
TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
|
||||
struct TestCase {
|
||||
const char* u8String;
|
||||
uint64_t lengthInUTF16;
|
||||
};
|
||||
|
||||
// Referenced from src/tint/text/unicode_test.cc
|
||||
constexpr std::array<TestCase, 12> kTestCases = {{
|
||||
{"", 0},
|
||||
{"abc", 3},
|
||||
{"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xb8\x96\xe7\x95\x8c", 4},
|
||||
{"def\xf0\x9f\x91\x8b\xf0\x9f\x8c\x8e", 7},
|
||||
{"\xed\x9f\xbf", 1}, // CodePoint == 0xD7FF
|
||||
{"\xed\x9f\xbe", 1}, // CodePoint == 0xD7FF - 1
|
||||
{"\xee\x80\x80", 1}, // CodePoint == 0xE000
|
||||
{"\xee\x80\x81", 1}, // CodePoint == 0xE000 + 1
|
||||
{"\xef\xbf\xbf", 1}, // CodePoint == 0xFFFF
|
||||
{"\xef\xbf\xbe", 1}, // CodePoint == 0xFFFF - 1
|
||||
{"\xf0\x90\x80\x80", 2}, // CodePoint == 0x10000
|
||||
{"\xf0\x90\x80\x81", 2}, // CodePoint == 0x10000 + 1
|
||||
}};
|
||||
|
||||
for (const TestCase& testCase : kTestCases) {
|
||||
dawn::native::ResultOrError<uint64_t> resultOrError =
|
||||
dawn::native::CountUTF16CodeUnitsFromUTF8String(std::string_view(testCase.u8String));
|
||||
ASSERT_TRUE(resultOrError.IsSuccess());
|
||||
ASSERT_EQ(testCase.lengthInUTF16, resultOrError.AcquireSuccess());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, InvalidUnicodeString) {
|
||||
// Referenced from src/tint/text/unicode_test.cc
|
||||
constexpr std::array<const char*, 12> kTestCases = {{
|
||||
"\xed\xa0\x80", // CodePoint == 0xD7FF + 1
|
||||
"\xed\xbf\xbf", // CodePoint == 0xE000 - 1
|
||||
"ab\xed\xa0\x80",
|
||||
"\xd0", // 2-bytes, missing second byte
|
||||
"\xe8\x8f", // 3-bytes, missing third byte
|
||||
"\xf4\x8f\x8f", // 4-bytes, missing fourth byte
|
||||
"\xd0\x7f", // 2-bytes, second byte MSB unset
|
||||
"\xe8\x7f\x8f", // 3-bytes, second byte MSB unset
|
||||
"\xe8\x8f\x7f", // 3-bytes, third byte MSB unset
|
||||
"\xf4\x7f\x8f\x8f", // 4-bytes, second byte MSB unset
|
||||
"\xf4\x8f\x7f\x8f", // 4-bytes, third byte MSB unset
|
||||
"\xf4\x8f\x8f\x7f", // 4-bytes, fourth byte MSB unset
|
||||
}};
|
||||
|
||||
for (const char* testCase : kTestCases) {
|
||||
dawn::native::ResultOrError<uint64_t> resultOrError =
|
||||
dawn::native::CountUTF16CodeUnitsFromUTF8String(std::string_view(testCase));
|
||||
ASSERT_TRUE(resultOrError.IsError());
|
||||
std::ignore = resultOrError.AcquireError();
|
||||
}
|
||||
}
|
|
@ -93,7 +93,7 @@ TEST_F(WireShaderModuleTests, GetCompilationInfo) {
|
|||
wgpuShaderModuleGetCompilationInfo(shaderModule, ToMockGetCompilationInfoCallback, nullptr);
|
||||
|
||||
WGPUCompilationMessage message = {
|
||||
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8};
|
||||
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8, 4, 6, 8};
|
||||
WGPUCompilationInfo compilationInfo;
|
||||
compilationInfo.nextInChain = nullptr;
|
||||
compilationInfo.messageCount = 1;
|
||||
|
@ -133,7 +133,7 @@ TEST_F(WireShaderModuleTests, GetCompilationInfoBeforeDisconnect) {
|
|||
wgpuShaderModuleGetCompilationInfo(shaderModule, ToMockGetCompilationInfoCallback, nullptr);
|
||||
|
||||
WGPUCompilationMessage message = {
|
||||
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8};
|
||||
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8, 4, 6, 8};
|
||||
WGPUCompilationInfo compilationInfo;
|
||||
compilationInfo.nextInChain = nullptr;
|
||||
compilationInfo.messageCount = 1;
|
||||
|
@ -193,7 +193,7 @@ TEST_F(WireShaderModuleTests, GetCompilationInfoInsideCallbackBeforeDisconnect)
|
|||
&testData);
|
||||
|
||||
WGPUCompilationMessage message = {
|
||||
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8};
|
||||
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8, 4, 6, 8};
|
||||
WGPUCompilationInfo compilationInfo;
|
||||
compilationInfo.nextInChain = nullptr;
|
||||
compilationInfo.messageCount = 1;
|
||||
|
@ -220,7 +220,7 @@ TEST_F(WireShaderModuleTests, GetCompilationInfoInsideCallbackBeforeDestruction)
|
|||
&testData);
|
||||
|
||||
WGPUCompilationMessage message = {
|
||||
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8};
|
||||
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8, 4, 6, 8};
|
||||
WGPUCompilationInfo compilationInfo;
|
||||
compilationInfo.nextInChain = nullptr;
|
||||
compilationInfo.messageCount = 1;
|
||||
|
|
|
@ -427,6 +427,10 @@ std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len) {
|
|||
return {c, n};
|
||||
}
|
||||
|
||||
std::pair<CodePoint, size_t> Decode(std::string_view utf8_string) {
|
||||
return Decode(reinterpret_cast<const uint8_t*>(utf8_string.data()), utf8_string.size());
|
||||
}
|
||||
|
||||
bool IsASCII(std::string_view str) {
|
||||
for (auto c : str) {
|
||||
if (c & 0x80) {
|
||||
|
|
|
@ -69,6 +69,12 @@ namespace utf8 {
|
|||
/// If the next code point cannot be decoded then returns [0,0].
|
||||
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len);
|
||||
|
||||
/// Decodes the first code point in the utf8 string.
|
||||
/// @param utf8_string the string view that contains the utf8 sequence
|
||||
/// @returns a pair of CodePoint and width in code units (bytes).
|
||||
/// If the next code point cannot be decoded then returns [0,0].
|
||||
std::pair<CodePoint, size_t> Decode(std::string_view utf8_string);
|
||||
|
||||
/// @returns true if all the utf-8 code points in the string are ASCII
|
||||
/// (code-points 0x00..0x7f).
|
||||
bool IsASCII(std::string_view);
|
||||
|
|
Loading…
Reference in New Issue