Count the line pos, offset and size of compilation message in UTF-16

This patch counts the line position, offset and size of the compilation
message in UTF-16 and saves them to WGPUCompilationMessage to align the
latest WebGPU SPEC.

Bug: dawn:1357
Change-Id: If8f4026bd5b4a64a078e100762b6d1f61da50053
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/115640
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Jiawei Shao 2023-01-10 00:03:24 +00:00 committed by Dawn LUCI CQ
parent 3d2caaae47
commit f7beb85fd1
10 changed files with 177 additions and 27 deletions

View File

@ -749,7 +749,10 @@
{"name": "line num", "type": "uint64_t"},
{"name": "line pos", "type": "uint64_t"},
{"name": "offset", "type": "uint64_t"},
{"name": "length", "type": "uint64_t"}
{"name": "length", "type": "uint64_t"},
{"name": "utf16 line pos", "type": "uint64_t"},
{"name": "utf16 offset", "type": "uint64_t"},
{"name": "utf16 length", "type": "uint64_t"}
]
},
"compilation message type": {

View File

@ -25,6 +25,7 @@
#include "src/tint/diagnostic/printer.h"
#include "src/tint/inspector/inspector.h"
#include "src/tint/reader/reader.h"
#include "src/tint/text/unicode.h"
#include "src/tint/transform/binding_remapper.h"
#include "src/tint/transform/clamp_frag_depth.h"
#include "src/tint/transform/first_index_offset.h"

View File

@ -36,6 +36,39 @@ WGPUCompilationMessageType tintSeverityToMessageType(tint::diag::Severity severi
} // anonymous namespace
ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String) {
if (tint::text::utf8::IsASCII(utf8String)) {
return utf8String.size();
}
uint64_t numberOfUTF16CodeUnits = 0;
std::string_view remaining = utf8String;
while (!remaining.empty()) {
auto [codePoint, utf8CharacterByteLength] = tint::text::utf8::Decode(remaining);
// Directly return as something wrong has happened during the UTF-8 decoding.
if (utf8CharacterByteLength == 0) {
return DAWN_INTERNAL_ERROR("Fail to decode the unicode string");
}
remaining = remaining.substr(utf8CharacterByteLength);
// Count the number of code units in UTF-16. See https://en.wikipedia.org/wiki/UTF-16 for
// more details.
if (codePoint.value <= 0xD7FF || (codePoint.value >= 0xE000 && codePoint.value <= 0xFFFF)) {
// Code points from U+0000 to U+D7FF and U+E000 to U+FFFF are encoded as single 16-bit
// code units.
++numberOfUTF16CodeUnits;
} else if (codePoint.value >= 0x10000) {
// Code points from U+010000 to U+10FFFF are encoded as two 16-bit code units.
numberOfUTF16CodeUnits += 2;
} else {
// UTF-16 cannot encode the code points from U+D800 to U+DFFF.
return DAWN_INTERNAL_ERROR("The unicode string contains illegal unicode code point.");
}
}
return numberOfUTF16CodeUnits;
}
OwnedCompilationMessages::OwnedCompilationMessages() {
mCompilationInfo.nextInChain = 0;
mCompilationInfo.messageCount = 0;
@ -53,23 +86,29 @@ void OwnedCompilationMessages::AddMessageForTesting(std::string message,
// Cannot add messages after GetCompilationInfo has been called.
ASSERT(mCompilationInfo.messages == nullptr);
// Message can only contain ascii characters.
ASSERT(tint::text::utf8::IsASCII(message));
mMessageStrings.push_back(message);
mMessages.push_back({nullptr, nullptr, static_cast<WGPUCompilationMessageType>(type), lineNum,
linePos, offset, length});
linePos, offset, length, linePos, offset, length});
}
void OwnedCompilationMessages::AddMessage(const tint::diag::Diagnostic& diagnostic) {
MaybeError OwnedCompilationMessages::AddMessage(const tint::diag::Diagnostic& diagnostic) {
// Cannot add messages after GetCompilationInfo has been called.
ASSERT(mCompilationInfo.messages == nullptr);
// Tint line and column values are 1-based.
uint64_t lineNum = diagnostic.source.range.begin.line;
uint64_t lineCol = diagnostic.source.range.begin.column;
uint64_t linePosInBytes = diagnostic.source.range.begin.column;
// The offset is 0-based.
uint64_t offset = 0;
uint64_t length = 0;
uint64_t offsetInBytes = 0;
uint64_t lengthInBytes = 0;
uint64_t linePosInUTF16 = 0;
uint64_t offsetInUTF16 = 0;
uint64_t lengthInUTF16 = 0;
if (lineNum && lineCol && diagnostic.source.file) {
if (lineNum && linePosInBytes && diagnostic.source.file) {
const tint::Source::FileContent& content = diagnostic.source.file->content;
// Tint stores line as std::string_view in a complete source std::string that's in the
@ -78,23 +117,38 @@ void OwnedCompilationMessages::AddMessage(const tint::diag::Diagnostic& diagnost
// range starts at 1 while the array of lines start at 0 (hence the -1).
const char* fileStart = content.data.data();
const char* lineStart = content.lines[lineNum - 1].data();
offset = static_cast<uint64_t>(lineStart - fileStart) + lineCol - 1;
offsetInBytes = static_cast<uint64_t>(lineStart - fileStart) + linePosInBytes - 1;
// The linePosInBytes is 1-based.
uint64_t linePosOffsetInUTF16 = 0;
DAWN_TRY_ASSIGN(linePosOffsetInUTF16, CountUTF16CodeUnitsFromUTF8String(
std::string_view(lineStart, linePosInBytes - 1)));
linePosInUTF16 = linePosOffsetInUTF16 + 1;
// The offset is 0-based.
uint64_t lineStartToFileStartOffsetInUTF16 = 0;
DAWN_TRY_ASSIGN(lineStartToFileStartOffsetInUTF16,
CountUTF16CodeUnitsFromUTF8String(std::string_view(
fileStart, static_cast<uint64_t>(lineStart - fileStart))));
offsetInUTF16 = lineStartToFileStartOffsetInUTF16 + linePosInUTF16 - 1;
// If the range has a valid start but the end is not specified, clamp it to the start.
uint64_t endLineNum = diagnostic.source.range.end.line;
uint64_t endLineCol = diagnostic.source.range.end.column;
if (endLineNum == 0 || endLineCol == 0) {
endLineNum = lineNum;
endLineCol = lineCol;
endLineCol = linePosInBytes;
}
const char* endLineStart = content.lines[endLineNum - 1].data();
uint64_t endOffset = static_cast<uint64_t>(endLineStart - fileStart) + endLineCol - 1;
uint64_t endOffsetInBytes =
static_cast<uint64_t>(endLineStart - fileStart) + endLineCol - 1;
// The length of the message is the difference between the starting offset and the
// ending offset. Negative ranges aren't allowed
ASSERT(endOffset >= offset);
length = endOffset - offset;
// ending offset. Negative ranges aren't allowed.
ASSERT(endOffsetInBytes >= offsetInBytes);
lengthInBytes = endOffsetInBytes - offsetInBytes;
DAWN_TRY_ASSIGN(lengthInUTF16, CountUTF16CodeUnitsFromUTF8String(std::string_view(
fileStart + offsetInBytes, lengthInBytes)));
}
if (diagnostic.code) {
@ -104,18 +158,23 @@ void OwnedCompilationMessages::AddMessage(const tint::diag::Diagnostic& diagnost
}
mMessages.push_back({nullptr, nullptr, tintSeverityToMessageType(diagnostic.severity), lineNum,
lineCol, offset, length});
linePosInBytes, offsetInBytes, lengthInBytes, linePosInUTF16,
offsetInUTF16, lengthInUTF16});
return {};
}
void OwnedCompilationMessages::AddMessages(const tint::diag::List& diagnostics) {
MaybeError OwnedCompilationMessages::AddMessages(const tint::diag::List& diagnostics) {
// Cannot add messages after GetCompilationInfo has been called.
ASSERT(mCompilationInfo.messages == nullptr);
for (const auto& diag : diagnostics) {
AddMessage(diag);
DAWN_TRY(AddMessage(diag));
}
AddFormattedTintMessages(diagnostics);
return {};
}
void OwnedCompilationMessages::ClearMessages() {

View File

@ -18,6 +18,7 @@
#include <string>
#include <vector>
#include "dawn/native/Error.h"
#include "dawn/native/dawn_platform.h"
#include "dawn/common/NonCopyable.h"
@ -29,6 +30,8 @@ class List;
namespace dawn::native {
ResultOrError<uint64_t> CountUTF16CodeUnitsFromUTF8String(const std::string_view& utf8String);
class OwnedCompilationMessages : public NonCopyable {
public:
OwnedCompilationMessages();
@ -41,14 +44,14 @@ class OwnedCompilationMessages : public NonCopyable {
uint64_t linePos = 0,
uint64_t offset = 0,
uint64_t length = 0);
void AddMessages(const tint::diag::List& diagnostics);
MaybeError AddMessages(const tint::diag::List& diagnostics);
void ClearMessages();
const WGPUCompilationInfo* GetCompilationInfo();
const std::vector<std::string>& GetFormattedTintMessages();
private:
void AddMessage(const tint::diag::Diagnostic& diagnostic);
MaybeError AddMessage(const tint::diag::Diagnostic& diagnostic);
void AddFormattedTintMessages(const tint::diag::List& diagnostics);
WGPUCompilationInfo mCompilationInfo;

View File

@ -299,7 +299,7 @@ ResultOrError<tint::Program> ParseWGSL(const tint::Source::File* file,
#if TINT_BUILD_WGSL_READER
tint::Program program = tint::reader::wgsl::Parse(file);
if (outMessages != nullptr) {
outMessages->AddMessages(program.Diagnostics());
DAWN_TRY(outMessages->AddMessages(program.Diagnostics()));
}
if (!program.IsValid()) {
return DAWN_VALIDATION_ERROR("Tint WGSL reader failure: %s\n", program.Diagnostics().str());
@ -316,7 +316,7 @@ ResultOrError<tint::Program> ParseSPIRV(const std::vector<uint32_t>& spirv,
#if TINT_BUILD_SPV_READER
tint::Program program = tint::reader::spirv::Parse(spirv);
if (outMessages != nullptr) {
outMessages->AddMessages(program.Diagnostics());
DAWN_TRY(outMessages->AddMessages(program.Diagnostics()));
}
if (!program.IsValid()) {
return DAWN_VALIDATION_ERROR("Tint SPIR-V reader failure:\nParser: %s\n",
@ -789,7 +789,7 @@ MaybeError ValidateWGSLProgramExtension(const DeviceBase* device,
if (hasDisallowedExtension) {
if (outMessages != nullptr) {
outMessages->AddMessages(messages);
DAWN_TRY(outMessages->AddMessages(messages));
}
return DAWN_MAKE_ERROR(InternalErrorType::Validation,
"Shader module uses extension(s) not enabled for its device.");
@ -983,7 +983,7 @@ ResultOrError<tint::Program> RunTransforms(tint::transform::Transform* transform
OwnedCompilationMessages* outMessages) {
tint::transform::Output output = transform->Run(program, inputs);
if (outMessages != nullptr) {
outMessages->AddMessages(output.program.Diagnostics());
DAWN_TRY(outMessages->AddMessages(output.program.Diagnostics()));
}
DAWN_INVALID_IF(!output.program.IsValid(), "Tint program failure: %s\n",
output.program.Diagnostics().str());

View File

@ -301,6 +301,7 @@ dawn_test("dawn_unittests") {
"unittests/SystemUtilsTests.cpp",
"unittests/ToBackendTests.cpp",
"unittests/TypedIntegerTests.cpp",
"unittests/UnicodeTests.cpp",
"unittests/native/BlobTests.cpp",
"unittests/native/CacheRequestTests.cpp",
"unittests/native/CommandBufferEncodingTests.cpp",

View File

@ -0,0 +1,73 @@
// Copyright 2022 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn/native/ShaderModule.h"
#include "dawn/tests/unittests/validation/ValidationTest.h"
class CountUTF16CodeUnitsFromUTF8StringTest : public ValidationTest {};
TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, ValidUnicodeString) {
struct TestCase {
const char* u8String;
uint64_t lengthInUTF16;
};
// Referenced from src/tint/text/unicode_test.cc
constexpr std::array<TestCase, 12> kTestCases = {{
{"", 0},
{"abc", 3},
{"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xb8\x96\xe7\x95\x8c", 4},
{"def\xf0\x9f\x91\x8b\xf0\x9f\x8c\x8e", 7},
{"\xed\x9f\xbf", 1}, // CodePoint == 0xD7FF
{"\xed\x9f\xbe", 1}, // CodePoint == 0xD7FF - 1
{"\xee\x80\x80", 1}, // CodePoint == 0xE000
{"\xee\x80\x81", 1}, // CodePoint == 0xE000 + 1
{"\xef\xbf\xbf", 1}, // CodePoint == 0xFFFF
{"\xef\xbf\xbe", 1}, // CodePoint == 0xFFFF - 1
{"\xf0\x90\x80\x80", 2}, // CodePoint == 0x10000
{"\xf0\x90\x80\x81", 2}, // CodePoint == 0x10000 + 1
}};
for (const TestCase& testCase : kTestCases) {
dawn::native::ResultOrError<uint64_t> resultOrError =
dawn::native::CountUTF16CodeUnitsFromUTF8String(std::string_view(testCase.u8String));
ASSERT_TRUE(resultOrError.IsSuccess());
ASSERT_EQ(testCase.lengthInUTF16, resultOrError.AcquireSuccess());
}
}
TEST_F(CountUTF16CodeUnitsFromUTF8StringTest, InvalidUnicodeString) {
// Referenced from src/tint/text/unicode_test.cc
constexpr std::array<const char*, 12> kTestCases = {{
"\xed\xa0\x80", // CodePoint == 0xD7FF + 1
"\xed\xbf\xbf", // CodePoint == 0xE000 - 1
"ab\xed\xa0\x80",
"\xd0", // 2-bytes, missing second byte
"\xe8\x8f", // 3-bytes, missing third byte
"\xf4\x8f\x8f", // 4-bytes, missing fourth byte
"\xd0\x7f", // 2-bytes, second byte MSB unset
"\xe8\x7f\x8f", // 3-bytes, second byte MSB unset
"\xe8\x8f\x7f", // 3-bytes, third byte MSB unset
"\xf4\x7f\x8f\x8f", // 4-bytes, second byte MSB unset
"\xf4\x8f\x7f\x8f", // 4-bytes, third byte MSB unset
"\xf4\x8f\x8f\x7f", // 4-bytes, fourth byte MSB unset
}};
for (const char* testCase : kTestCases) {
dawn::native::ResultOrError<uint64_t> resultOrError =
dawn::native::CountUTF16CodeUnitsFromUTF8String(std::string_view(testCase));
ASSERT_TRUE(resultOrError.IsError());
std::ignore = resultOrError.AcquireError();
}
}

View File

@ -93,7 +93,7 @@ TEST_F(WireShaderModuleTests, GetCompilationInfo) {
wgpuShaderModuleGetCompilationInfo(shaderModule, ToMockGetCompilationInfoCallback, nullptr);
WGPUCompilationMessage message = {
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8};
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8, 4, 6, 8};
WGPUCompilationInfo compilationInfo;
compilationInfo.nextInChain = nullptr;
compilationInfo.messageCount = 1;
@ -133,7 +133,7 @@ TEST_F(WireShaderModuleTests, GetCompilationInfoBeforeDisconnect) {
wgpuShaderModuleGetCompilationInfo(shaderModule, ToMockGetCompilationInfoCallback, nullptr);
WGPUCompilationMessage message = {
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8};
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8, 4, 6, 8};
WGPUCompilationInfo compilationInfo;
compilationInfo.nextInChain = nullptr;
compilationInfo.messageCount = 1;
@ -193,7 +193,7 @@ TEST_F(WireShaderModuleTests, GetCompilationInfoInsideCallbackBeforeDisconnect)
&testData);
WGPUCompilationMessage message = {
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8};
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8, 4, 6, 8};
WGPUCompilationInfo compilationInfo;
compilationInfo.nextInChain = nullptr;
compilationInfo.messageCount = 1;
@ -220,7 +220,7 @@ TEST_F(WireShaderModuleTests, GetCompilationInfoInsideCallbackBeforeDestruction)
&testData);
WGPUCompilationMessage message = {
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8};
nullptr, "Test Message", WGPUCompilationMessageType_Info, 2, 4, 6, 8, 4, 6, 8};
WGPUCompilationInfo compilationInfo;
compilationInfo.nextInChain = nullptr;
compilationInfo.messageCount = 1;

View File

@ -427,6 +427,10 @@ std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len) {
return {c, n};
}
std::pair<CodePoint, size_t> Decode(std::string_view utf8_string) {
return Decode(reinterpret_cast<const uint8_t*>(utf8_string.data()), utf8_string.size());
}
bool IsASCII(std::string_view str) {
for (auto c : str) {
if (c & 0x80) {

View File

@ -69,6 +69,12 @@ namespace utf8 {
/// If the next code point cannot be decoded then returns [0,0].
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len);
/// Decodes the first code point in the utf8 string.
/// @param utf8_string the string view that contains the utf8 sequence
/// @returns a pair of CodePoint and width in code units (bytes).
/// If the next code point cannot be decoded then returns [0,0].
std::pair<CodePoint, size_t> Decode(std::string_view utf8_string);
/// @returns true if all the utf-8 code points in the string are ASCII
/// (code-points 0x00..0x7f).
bool IsASCII(std::string_view);