From b9d1540b3129eb698ec267ad5ecf1dd921ffa0ab Mon Sep 17 00:00:00 2001 From: Brandon Jones Date: Tue, 25 Jan 2022 17:15:37 +0000 Subject: [PATCH] Make use of std::string_view when parsing There may very well be more places it can be used, but this updates the easiest to identify cases that could be switched over with minimal restructuring. Change-Id: I5100f398731cc4e031c82548ac826d713d0a4cda Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/76640 Reviewed-by: Ben Clayton Commit-Queue: Brandon Jones Kokoro: Kokoro --- src/CMakeLists.txt | 1 + src/reader/wgsl/lexer.cc | 10 +++--- src/reader/wgsl/lexer.h | 4 +-- src/reader/wgsl/parser_impl.cc | 57 +++++++++++++++-------------- src/reader/wgsl/parser_impl.h | 35 +++++++++--------- src/reader/wgsl/token.cc | 2 +- src/reader/wgsl/token.h | 7 ++-- src/source.cc | 41 +++++++++++++++++---- src/source.h | 12 +++++-- src/source_test.cc | 66 ++++++++++++++++++++++++++++++++++ test/BUILD.gn | 1 + 11 files changed, 170 insertions(+), 66 deletions(-) create mode 100644 src/source_test.cc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2b211d6f05..3498fd2d79 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -750,6 +750,7 @@ if(TINT_BUILD_TESTS) sem/type_manager_test.cc sem/u32_type_test.cc sem/vector_type_test.cc + source_test.cc symbol_table_test.cc symbol_test.cc test_main.cc diff --git a/src/reader/wgsl/lexer.cc b/src/reader/wgsl/lexer.cc index 9aa3ae688a..94d8ced7cc 100644 --- a/src/reader/wgsl/lexer.cc +++ b/src/reader/wgsl/lexer.cc @@ -138,10 +138,10 @@ bool Lexer::is_hex(char ch) const { return std::isxdigit(ch); } -bool Lexer::matches(size_t pos, const std::string& substr) { +bool Lexer::matches(size_t pos, std::string_view substr) { if (pos >= len_) return false; - return content_->data.substr(pos, substr.size()) == substr; + return content_->data_view.substr(pos, substr.size()) == substr; } Token Lexer::skip_whitespace_and_comments() { @@ -763,7 +763,7 @@ Token Lexer::try_ident() { } } - auto str = content_->data.substr(s, pos_ - s); + auto str = content_->data_view.substr(s, pos_ - s); end_source(source); auto t = check_keyword(source, str); @@ -771,7 +771,7 @@ Token Lexer::try_ident() { return t; } - return {Token::Type::kIdentifier, source, str}; + return {Token::Type::kIdentifier, source, std::string(str)}; } Token Lexer::try_punctuation() { @@ -937,7 +937,7 @@ Token Lexer::try_punctuation() { return {type, source}; } -Token Lexer::check_keyword(const Source& source, const std::string& str) { +Token Lexer::check_keyword(const Source& source, std::string_view str) { if (str == "array") return {Token::Type::kArray, source, "array"}; if (str == "atomic") diff --git a/src/reader/wgsl/lexer.h b/src/reader/wgsl/lexer.h index c823f593ae..872e16a8bb 100644 --- a/src/reader/wgsl/lexer.h +++ b/src/reader/wgsl/lexer.h @@ -51,7 +51,7 @@ class Lexer { size_t start, size_t end, int32_t base); - Token check_keyword(const Source&, const std::string&); + Token check_keyword(const Source&, std::string_view); /// The try_* methods have the following in common: /// - They assume there is at least one character to be consumed, @@ -89,7 +89,7 @@ class Lexer { /// @returns true if 'ch' is a digit, an alphabetic character, /// or an underscore. bool is_alphanum_underscore(char ch) const; - bool matches(size_t pos, const std::string& substr); + bool matches(size_t pos, std::string_view substr); /// The source file path std::string const file_path_; diff --git a/src/reader/wgsl/parser_impl.cc b/src/reader/wgsl/parser_impl.cc index 6570e431aa..f219eba425 100644 --- a/src/reader/wgsl/parser_impl.cc +++ b/src/reader/wgsl/parser_impl.cc @@ -70,7 +70,7 @@ const char kReadAccess[] = "read"; const char kWriteAccess[] = "write"; const char kReadWriteAccess[] = "read_write"; -ast::Builtin ident_to_builtin(const std::string& str) { +ast::Builtin ident_to_builtin(std::string_view str) { if (str == "position") { return ast::Builtin::kPosition; } @@ -266,8 +266,8 @@ ParserImpl::ParserImpl(Source::File const* file) ParserImpl::~ParserImpl() = default; ParserImpl::Failure::Errored ParserImpl::add_error(const Source& source, - const std::string& err, - const std::string& use) { + std::string_view err, + std::string_view use) { std::stringstream msg; msg << err; if (!use.empty()) { @@ -759,8 +759,7 @@ Maybe ParserImpl::depth_texture_type() { // | 'rgba32uint' // | 'rgba32sint' // | 'rgba32float' -Expect ParserImpl::expect_texel_format( - const std::string& use) { +Expect ParserImpl::expect_texel_format(std::string_view use) { auto tok = next(); if (tok.IsIdentifier()) { auto s = tok.to_str(); @@ -819,7 +818,7 @@ Expect ParserImpl::expect_texel_format( // variable_ident_decl // : IDENT COLON variable_decoration_list* type_decl Expect ParserImpl::expect_variable_ident_decl( - const std::string& use, + std::string_view use, bool allow_inferred) { auto ident = expect_ident(use); if (ident.errored) @@ -849,7 +848,7 @@ Expect ParserImpl::expect_variable_ident_decl( return TypedIdentifier{type.value, ident.value, ident.source}; } -Expect ParserImpl::expect_access(const std::string& use) { +Expect ParserImpl::expect_access(std::string_view use) { auto ident = expect_ident(use); if (ident.errored) return Failure::kErrored; @@ -1016,7 +1015,7 @@ Maybe ParserImpl::type_decl(ast::DecorationList& decos) { return Failure::kNoMatch; } -Expect ParserImpl::expect_type(const std::string& use) { +Expect ParserImpl::expect_type(std::string_view use) { auto type = type_decl(); if (type.errored) return Failure::kErrored; @@ -1170,7 +1169,7 @@ Expect ParserImpl::expect_type_decl_matrix(Token t) { // | PRIVATE // | FUNCTION Expect ParserImpl::expect_storage_class( - const std::string& use) { + std::string_view use) { auto source = peek().source(); if (match(Token::Type::kUniform)) @@ -2228,7 +2227,7 @@ Maybe ParserImpl::singular_expression() { // : PAREN_LEFT ((logical_or_expression COMMA)* logical_or_expression COMMA?)? // PAREN_RIGHT Expect ParserImpl::expect_argument_expression_list( - const std::string& use) { + std::string_view use) { return expect_paren_block(use, [&]() -> Expect { ast::ExpressionList ret; while (continue_parsing()) { @@ -2296,8 +2295,8 @@ Maybe ParserImpl::unary_expression() { return Failure::kErrored; } if (!expr.matched) { - return add_error( - peek(), "unable to parse right side of " + t.to_name() + " expression"); + return add_error(peek(), "unable to parse right side of " + + std::string(t.to_name()) + " expression"); } return create(t.source(), op, expr.value); @@ -2329,8 +2328,8 @@ Expect ParserImpl::expect_multiplicative_expr( if (rhs.errored) return Failure::kErrored; if (!rhs.matched) { - return add_error(peek(), - "unable to parse right side of " + name + " expression"); + return add_error(peek(), "unable to parse right side of " + + std::string(name) + " expression"); } lhs = create(source, op, lhs, rhs.value); @@ -2466,8 +2465,8 @@ Expect ParserImpl::expect_relational_expr( if (rhs.errored) return Failure::kErrored; if (!rhs.matched) { - return add_error(peek(), - "unable to parse right side of " + name + " expression"); + return add_error(peek(), "unable to parse right side of " + + std::string(name) + " expression"); } lhs = create(source, op, lhs, rhs.value); @@ -2510,8 +2509,8 @@ Expect ParserImpl::expect_equality_expr( if (rhs.errored) return Failure::kErrored; if (!rhs.matched) { - return add_error(peek(), - "unable to parse right side of " + name + " expression"); + return add_error(peek(), "unable to parse right side of " + + std::string(name) + " expression"); } lhs = create(source, op, lhs, rhs.value); @@ -3147,7 +3146,7 @@ bool ParserImpl::match(Token::Type tok, Source* source /*= nullptr*/) { return false; } -bool ParserImpl::expect(const std::string& use, Token::Type tok) { +bool ParserImpl::expect(std::string_view use, Token::Type tok) { auto t = peek(); if (t.Is(tok)) { next(); @@ -3195,7 +3194,7 @@ bool ParserImpl::expect(const std::string& use, Token::Type tok) { return false; } -Expect ParserImpl::expect_sint(const std::string& use) { +Expect ParserImpl::expect_sint(std::string_view use) { auto t = peek(); if (!t.Is(Token::Type::kSintLiteral)) return add_error(t.source(), "expected signed integer literal", use); @@ -3204,30 +3203,30 @@ Expect ParserImpl::expect_sint(const std::string& use) { return {t.to_i32(), t.source()}; } -Expect ParserImpl::expect_positive_sint(const std::string& use) { +Expect ParserImpl::expect_positive_sint(std::string_view use) { auto sint = expect_sint(use); if (sint.errored) return Failure::kErrored; if (sint.value < 0) - return add_error(sint.source, use + " must be positive"); + return add_error(sint.source, std::string(use) + " must be positive"); return {static_cast(sint.value), sint.source}; } Expect ParserImpl::expect_nonzero_positive_sint( - const std::string& use) { + std::string_view use) { auto sint = expect_sint(use); if (sint.errored) return Failure::kErrored; if (sint.value <= 0) - return add_error(sint.source, use + " must be greater than 0"); + return add_error(sint.source, std::string(use) + " must be greater than 0"); return {static_cast(sint.value), sint.source}; } -Expect ParserImpl::expect_ident(const std::string& use) { +Expect ParserImpl::expect_ident(std::string_view use) { auto t = peek(); if (t.IsIdentifier()) { synchronized_ = true; @@ -3247,7 +3246,7 @@ Expect ParserImpl::expect_ident(const std::string& use) { template T ParserImpl::expect_block(Token::Type start, Token::Type end, - const std::string& use, + std::string_view use, F&& body) { if (!expect(use, start)) { return Failure::kErrored; @@ -3267,19 +3266,19 @@ T ParserImpl::expect_block(Token::Type start, } template -T ParserImpl::expect_paren_block(const std::string& use, F&& body) { +T ParserImpl::expect_paren_block(std::string_view use, F&& body) { return expect_block(Token::Type::kParenLeft, Token::Type::kParenRight, use, std::forward(body)); } template -T ParserImpl::expect_brace_block(const std::string& use, F&& body) { +T ParserImpl::expect_brace_block(std::string_view use, F&& body) { return expect_block(Token::Type::kBraceLeft, Token::Type::kBraceRight, use, std::forward(body)); } template -T ParserImpl::expect_lt_gt_block(const std::string& use, F&& body) { +T ParserImpl::expect_lt_gt_block(std::string_view use, F&& body) { return expect_block(Token::Type::kLessThan, Token::Type::kGreaterThan, use, std::forward(body)); } diff --git a/src/reader/wgsl/parser_impl.h b/src/reader/wgsl/parser_impl.h index 6c202a77e7..6352526602 100644 --- a/src/reader/wgsl/parser_impl.h +++ b/src/reader/wgsl/parser_impl.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -367,8 +368,8 @@ class ParserImpl { /// @return `Failure::Errored::kError` so that you can combine an add_error() /// call and return on the same line. Failure::Errored add_error(const Source& source, - const std::string& msg, - const std::string& use); + std::string_view msg, + std::string_view use); /// Appends an error at `source` with the message `msg` /// @param source the source to associate the error with /// @param msg the error message @@ -407,7 +408,7 @@ class ParserImpl { /// specify type /// @returns the identifier and type parsed or empty otherwise Expect expect_variable_ident_decl( - const std::string& use, + std::string_view use, bool allow_inferred = false); /// Parses a `variable_qualifier` grammar element /// @returns the variable qualifier information @@ -426,7 +427,7 @@ class ParserImpl { /// Parses a `storage_class` grammar element, erroring on parse failure. /// @param use a description of what was being parsed if an error was raised. /// @returns the storage class or StorageClass::kNone if none matched - Expect expect_storage_class(const std::string& use); + Expect expect_storage_class(std::string_view use); /// Parses a `struct_decl` grammar element with the initial /// `struct_decoration_decl*` provided as `decos`. /// @returns the struct type or nullptr on error @@ -472,7 +473,7 @@ class ParserImpl { /// Parses a `texel_format` grammar element /// @param use a description of what was being parsed if an error was raised /// @returns returns the texel format or kNone if none matched. - Expect expect_texel_format(const std::string& use); + Expect expect_texel_format(std::string_view use); /// Parses a `function_header` grammar element /// @returns the parsed function header Maybe function_header(); @@ -490,7 +491,7 @@ class ParserImpl { /// match a valid access control. /// @param use a description of what was being parsed if an error was raised /// @returns the parsed access control. - Expect expect_access(const std::string& use); + Expect expect_access(std::string_view use); /// Parses a builtin identifier, erroring if the next token does not match a /// valid builtin name. /// @returns the parsed builtin. @@ -566,7 +567,7 @@ class ParserImpl { /// @param use a description of what was being parsed if an error was raised /// @returns the list of arguments Expect expect_argument_expression_list( - const std::string& use); + std::string_view use); /// Parses the recursive portion of the postfix_expression /// @param prefix the left side of the expression /// @returns the parsed expression or nullptr @@ -712,32 +713,32 @@ class ParserImpl { /// @param use a description of what was being parsed if an error was raised. /// @param tok the token to test against /// @returns true if the next token equals `tok` - bool expect(const std::string& use, Token::Type tok); + bool expect(std::string_view use, Token::Type tok); /// Parses a signed integer from the next token in the stream, erroring if the /// next token is not a signed integer. /// Consumes the next token on match. /// @param use a description of what was being parsed if an error was raised /// @returns the parsed integer. - Expect expect_sint(const std::string& use); + Expect expect_sint(std::string_view use); /// Parses a signed integer from the next token in the stream, erroring if /// the next token is not a signed integer or is negative. /// Consumes the next token if it is a signed integer (not necessarily /// negative). /// @param use a description of what was being parsed if an error was raised /// @returns the parsed integer. - Expect expect_positive_sint(const std::string& use); + Expect expect_positive_sint(std::string_view use); /// Parses a non-zero signed integer from the next token in the stream, /// erroring if the next token is not a signed integer or is less than 1. /// Consumes the next token if it is a signed integer (not necessarily /// >= 1). /// @param use a description of what was being parsed if an error was raised /// @returns the parsed integer. - Expect expect_nonzero_positive_sint(const std::string& use); + Expect expect_nonzero_positive_sint(std::string_view use); /// Errors if the next token is not an identifier. /// Consumes the next token on match. /// @param use a description of what was being parsed if an error was raised /// @returns the parsed identifier. - Expect expect_ident(const std::string& use); + Expect expect_ident(std::string_view use); /// Parses a lexical block starting with the token `start` and ending with /// the token `end`. `body` is called to parse the lexical block body /// between the `start` and `end` tokens. If the `start` or `end` tokens @@ -754,7 +755,7 @@ class ParserImpl { template > T expect_block(Token::Type start, Token::Type end, - const std::string& use, + std::string_view use, F&& body); /// A convenience function that calls expect_block() passing /// `Token::Type::kParenLeft` and `Token::Type::kParenRight` for the `start` @@ -765,7 +766,7 @@ class ParserImpl { /// @return the value returned by `body` if no errors are raised, otherwise /// an Expect with error state. template > - T expect_paren_block(const std::string& use, F&& body); + T expect_paren_block(std::string_view use, F&& body); /// A convenience function that calls `expect_block` passing /// `Token::Type::kBraceLeft` and `Token::Type::kBraceRight` for the `start` /// and `end` arguments, respectively. @@ -775,7 +776,7 @@ class ParserImpl { /// @return the value returned by `body` if no errors are raised, otherwise /// an Expect with error state. template > - T expect_brace_block(const std::string& use, F&& body); + T expect_brace_block(std::string_view use, F&& body); /// A convenience function that calls `expect_block` passing /// `Token::Type::kLessThan` and `Token::Type::kGreaterThan` for the `start` /// and `end` arguments, respectively. @@ -785,7 +786,7 @@ class ParserImpl { /// @return the value returned by `body` if no errors are raised, otherwise /// an Expect with error state. template > - T expect_lt_gt_block(const std::string& use, F&& body); + T expect_lt_gt_block(std::string_view use, F&& body); /// sync() calls the function `func`, and attempts to resynchronize the /// parser to the next found resynchronization token if `func` fails. If the @@ -853,7 +854,7 @@ class ParserImpl { ast::DecorationList decos); Expect expect_type_decl_matrix(Token t); - Expect expect_type(const std::string& use); + Expect expect_type(std::string_view use); Maybe non_block_statement(); Maybe for_header_initializer(); diff --git a/src/reader/wgsl/token.cc b/src/reader/wgsl/token.cc index 8ba2cffdb3..82147d550d 100644 --- a/src/reader/wgsl/token.cc +++ b/src/reader/wgsl/token.cc @@ -19,7 +19,7 @@ namespace reader { namespace wgsl { // static -std::string Token::TypeToName(Type type) { +std::string_view Token::TypeToName(Type type) { switch (type) { case Token::Type::kError: return "kError"; diff --git a/src/reader/wgsl/token.h b/src/reader/wgsl/token.h index 8fdc87ea4b..bc955dd81f 100644 --- a/src/reader/wgsl/token.h +++ b/src/reader/wgsl/token.h @@ -16,6 +16,7 @@ #define SRC_READER_WGSL_TOKEN_H_ #include +#include #include "src/source.h" @@ -260,7 +261,7 @@ class Token { /// Converts a token type to a name /// @param type the type to convert /// @returns the token type as as string - static std::string TypeToName(Type type); + static std::string_view TypeToName(Type type); /// Creates an uninitialized token Token(); @@ -354,7 +355,7 @@ class Token { Source source() const { return source_; } /// Returns the string value of the token - /// @return const std::string& + /// @return std::string std::string to_str() const; /// Returns the float value of the token. 0 is returned if the token does not /// contain a float value. @@ -370,7 +371,7 @@ class Token { int32_t to_i32() const; /// @returns the token type as string - std::string to_name() const { return Token::TypeToName(type_); } + std::string_view to_name() const { return Token::TypeToName(type_); } private: /// The Token::Type of the token diff --git a/src/source.cc b/src/source.cc index c9e5076366..84b6606145 100644 --- a/src/source.cc +++ b/src/source.cc @@ -16,23 +16,50 @@ #include #include +#include #include namespace tint { namespace { -std::vector split_lines(const std::string& str) { - std::stringstream stream(str); - std::string line; - std::vector lines; - while (std::getline(stream, line, '\n')) { - lines.emplace_back(std::move(line)); +std::vector SplitLines(std::string_view str) { + std::vector lines; + + size_t lineStart = 0; + for (size_t i = 0; i < str.size(); ++i) { + if (str[i] == '\n') { + lines.push_back(str.substr(lineStart, i - lineStart)); + lineStart = i + 1; + } } + if (lineStart < str.size()) { + lines.push_back(str.substr(lineStart)); + } + return lines; } + +std::vector CopyRelativeStringViews( + const std::vector& src_list, + const std::string_view& src_view, + const std::string_view& dst_view) { + std::vector out(src_list.size()); + for (size_t i = 0; i < src_list.size(); i++) { + auto offset = static_cast(&src_list[i].front() - &src_view.front()); + auto count = src_list[i].length(); + out[i] = dst_view.substr(offset, count); + } + return out; +} + } // namespace Source::FileContent::FileContent(const std::string& body) - : data(body), lines(split_lines(body)) {} + : data(body), data_view(data), lines(SplitLines(data_view)) {} + +Source::FileContent::FileContent(const FileContent& rhs) + : data(rhs.data), + data_view(data), + lines(CopyRelativeStringViews(rhs.lines, rhs.data_view, data_view)) {} Source::FileContent::~FileContent() = default; diff --git a/src/source.h b/src/source.h index b74d20eee3..60f8c1594a 100644 --- a/src/source.h +++ b/src/source.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -33,13 +34,19 @@ class Source { /// @param data the file contents explicit FileContent(const std::string& data); + /// Copy constructor + /// @param rhs the FileContent to copy + FileContent(const FileContent& rhs); + /// Destructor ~FileContent(); - /// un-split file content + /// The original un-split file content const std::string data; + /// A string_view over #data + const std::string_view data_view; /// #data split by lines - const std::vector lines; + const std::vector lines; }; /// File describes a source file, including path and content. @@ -51,6 +58,7 @@ class Source { inline File(const std::string& p, const std::string& c) : path(p), content(c) {} + /// Destructor ~File(); /// file path (optional) diff --git a/src/source_test.cc b/src/source_test.cc new file mode 100644 index 0000000000..57ef4c440f --- /dev/null +++ b/src/source_test.cc @@ -0,0 +1,66 @@ +// Copyright 2022 The Tint Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/source.h" + +#include +#include + +#include "gtest/gtest.h" + +namespace tint { +namespace { + +static constexpr const char* kSource = R"(line one +line two +line three)"; + +using SourceFileContentTest = testing::Test; + +TEST_F(SourceFileContentTest, Ctor) { + Source::FileContent fc(kSource); + EXPECT_EQ(fc.data, kSource); + EXPECT_EQ(fc.data_view, kSource); + ASSERT_EQ(fc.lines.size(), 3u); + EXPECT_EQ(fc.lines[0], "line one"); + EXPECT_EQ(fc.lines[1], "line two"); + EXPECT_EQ(fc.lines[2], "line three"); +} + +TEST_F(SourceFileContentTest, CopyCtor) { + auto src = std::make_unique(kSource); + Source::FileContent fc{*src}; + src.reset(); + EXPECT_EQ(fc.data, kSource); + EXPECT_EQ(fc.data_view, kSource); + ASSERT_EQ(fc.lines.size(), 3u); + EXPECT_EQ(fc.lines[0], "line one"); + EXPECT_EQ(fc.lines[1], "line two"); + EXPECT_EQ(fc.lines[2], "line three"); +} + +TEST_F(SourceFileContentTest, MoveCtor) { + auto src = std::make_unique(kSource); + Source::FileContent fc{std::move(*src)}; + src.reset(); + EXPECT_EQ(fc.data, kSource); + EXPECT_EQ(fc.data_view, kSource); + ASSERT_EQ(fc.lines.size(), 3u); + EXPECT_EQ(fc.lines[0], "line one"); + EXPECT_EQ(fc.lines[1], "line two"); + EXPECT_EQ(fc.lines[2], "line three"); +} + +} // namespace +} // namespace tint diff --git a/test/BUILD.gn b/test/BUILD.gn index 4812293f86..6f5e092aa9 100644 --- a/test/BUILD.gn +++ b/test/BUILD.gn @@ -699,6 +699,7 @@ tint_unittests_source_set("tint_unittests_core_src") { "../src/program_builder_test.cc", "../src/program_test.cc", "../src/scope_stack_test.cc", + "../src/source_test.cc", "../src/symbol_table_test.cc", "../src/symbol_test.cc", "../src/traits_test.cc",