diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc index 9fe2a6f997..83aac1eb63 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc @@ -37,6 +37,7 @@ enum class MutationKind { kReplaceIdentifier, kReplaceLiteral, kInsertReturnStatement, + kReplaceOperator, kNumMutationKinds }; @@ -102,6 +103,11 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data, } break; + case MutationKind::kReplaceOperator: + if (!mutator.ReplaceRandomOperator(wgsl_code)) { + return 0; + } + break; default: assert(false && "Unreachable"); return 0; diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc index 76856a9281..32a6619ad8 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include "gtest/gtest.h" @@ -28,6 +29,7 @@ class WgslMutatorTest : public WgslMutator { using WgslMutator::DeleteInterval; using WgslMutator::DuplicateInterval; using WgslMutator::FindClosingBrace; + using WgslMutator::FindOperatorOccurrence; using WgslMutator::GetFunctionBodyPositions; using WgslMutator::GetIdentifiers; using WgslMutator::GetIntLiterals; @@ -543,5 +545,46 @@ TEST(TestInsertReturn, TestMissingSemicolon) { ASSERT_EQ(expected_wgsl_code, wgsl_code); } +TEST(TestReplaceOperator, TestIdentifyOperators) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + std::string code = + R"( +x += 2; +y = a + b; +z = -a; +x *= b / c; +t = t && t | t || t; +b = b > c ^ c <= d; +a >>= b; +b <<= a; +a = a << 2; +b = b >> 3; +c = a % 3; +d %= e; +)"; + // These are the operator occurrences that will be observed by going through the file character + // by character. This includes, for example, identifying the ">" operator if search starts after + // the first character of ">>". + std::vector> operator_occurrences = { + {3, 2}, {4, 1}, {11, 1}, {15, 1}, {22, 1}, {24, 1}, {30, 2}, {31, 1}, + {35, 1}, {42, 1}, {46, 2}, {47, 1}, {51, 1}, {55, 2}, {56, 1}, {63, 1}, + {67, 1}, {71, 1}, {75, 2}, {76, 1}, {83, 3}, {84, 2}, {85, 1}, {92, 3}, + {93, 2}, {94, 1}, {101, 1}, {105, 2}, {106, 1}, {113, 1}, {117, 2}, {118, 1}, + {125, 1}, {129, 1}, {136, 2}, {137, 1}, {3, 2}}; + uint32_t operator_occurrence_index = 0; + for (size_t i = 0; i < code.length(); i++) { + // Move on to the next operator occurrence if the current index into the code string exceeds + // the index associated with that operator occurrence. Exception: stay with the last + // operator occurrence if search has already passed the last operator in the file. + if (i < code.length() - 2 && i > operator_occurrences[operator_occurrence_index].first) { + operator_occurrence_index = + (operator_occurrence_index + 1) % operator_occurrences.size(); + } + ASSERT_EQ(operator_occurrences[operator_occurrence_index], + mutator.FindOperatorOccurrence(code, static_cast(i)).value()); + } +} + } // namespace } // namespace tint::fuzzers::regex_fuzzer diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc index f525caa701..773eea61d7 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc @@ -318,4 +318,119 @@ bool WgslMutator::ReplaceRandomIntLiteral(std::string& wgsl_code) { return true; } +std::string WgslMutator::ChooseRandomReplacementForOperator(const std::string& existing_operator) { + // Operators are partitioned into three classes: assignment, expression and increment. The regex + // mutator will swap operators in the same class. The hypothesis is that this should exercise a + // number of type-safe swaps (e.g. changing += to *=), as well as some badly-typed yet + // interesting swaps (e.g. changing + to ^ when the operators are matrices), while avoiding + // making totally nonsensical replacements (such as changing ++ too /). + std::vector assignment_operators{ + "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>="}; + std::vector expression_operators{"+", "-", "*", "/", "%", "&&", "||", + "&", "|", "^", "<<", ">>", "<", ">", + "<=", ">=", "!", "!=", "~"}; + std::vector increment_operators{"++", "--"}; + for (auto operators : {assignment_operators, expression_operators, increment_operators}) { + auto it = std::find(operators.begin(), operators.end(), existing_operator); + if (it != operators.end()) { + // The operator falls into this category, so select another operator from the category. + operators.erase(it); + return generator_.GetRandomElement(operators); + } + } + assert(false && "Unknown operator"); + return ""; +} + +bool WgslMutator::ReplaceRandomOperator(std::string& wgsl_code) { + // Choose an index into the code at random. + const uint32_t start_index = generator_.GetUInt32(static_cast(wgsl_code.size())); + // Find the first operator occurrence from the chosen point, wrapping back to the start of the + // file if needed. + auto maybe_operator_occurrence = FindOperatorOccurrence(wgsl_code, start_index); + if (!maybe_operator_occurrence.has_value()) { + // It is unlikely that there will be *no* operators in the file, but if this is the case + // then this mutation cannot be applied. + return false; + } + std::string existing_operator = + wgsl_code.substr(maybe_operator_occurrence->first, maybe_operator_occurrence->second); + // Replace the identified operator with a randomly-chosen alternative. + wgsl_code.replace(maybe_operator_occurrence->first, maybe_operator_occurrence->second, + ChooseRandomReplacementForOperator(existing_operator)); + return true; +} + +std::optional> WgslMutator::FindOperatorOccurrence( + const std::string& wgsl_code, + uint32_t start_index) { + // Loops through the characters of the code in a wrap-around fashion, looking for the first + // encountered token that is a WGSL operator. + + for (size_t i = 0; i < wgsl_code.size(); i++) { + uint32_t current_index = static_cast((start_index + i) % wgsl_code.size()); + + // To cater for multi-character operator tokens, get the three consecutive characters from + // the code string starting at the current index. Use null characters to account for the + // case where search has reached the end of the code string. + char first_character = wgsl_code[current_index]; + char second_character = + current_index == wgsl_code.size() - 1 ? '\0' : wgsl_code[current_index + 1]; + char third_character = + current_index >= wgsl_code.size() - 2 ? '\0' : wgsl_code[current_index + 2]; + + // This uses the extracted characters to match for the various WGSL operators. + switch (first_character) { + case '!': + case '^': + switch (second_character) { + case '=': + return {{current_index, 2}}; + default: + return {{current_index, 1}}; + } + case '|': + case '&': + case '+': + case '-': + if (second_character == first_character || second_character == '=') { + return {{current_index, 2}}; + } + return {{current_index, 1}}; + case '*': + case '/': + case '%': + switch (second_character) { + case '=': + return {{current_index, 2}}; + default: + return {{current_index, 1}}; + } + case '=': + if (second_character == '=') { + return {{current_index, 2}}; + } + return {{current_index, 1}}; + case '<': + case '>': + if (second_character == '=') { + return {{current_index, 2}}; + } + if (second_character == first_character) { + if (third_character == '=') { + return {{current_index, 3}}; + } + return {{current_index, 2}}; + } + return {{current_index, 1}}; + case '~': + return {{current_index, 1}}; + default: + break; + } + } + // No operator was found, so empty is returned. + return {}; +} + } // namespace tint::fuzzers::regex_fuzzer diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h index f04aea1b07..c5ae900aaa 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h +++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h @@ -15,6 +15,7 @@ #ifndef SRC_TINT_FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_ #define SRC_TINT_FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_ +#include #include #include #include @@ -72,6 +73,12 @@ class WgslMutator { /// @return true if the mutation was succesful or false otherwise. bool InsertReturnStatement(std::string& wgsl_code); + /// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one + /// randomly-chosen operator in the original string with another operator. + /// @param wgsl_code - the initial WGSL-like string that will be mutated. + /// @return true if an operator replacement happened or false otherwise. + bool ReplaceRandomOperator(std::string& wgsl_code); + protected: /// Given index idx1 it delets the region of length interval_len /// starting at index idx1; @@ -147,6 +154,22 @@ class WgslMutator { size_t reg2_len, std::string& wgsl_code); + /// Finds the next occurrence of an operator in a WGSL-like string from a given starting + /// position, wrapping around to the start of the string if no operator is found before reaching + /// the end, and returning empty of no operator is found at all. There is no guarantee that the + /// result will correspond to a WGSL operator token, e.g. the identified characters could be + /// part of a comment, or e.g. the file might contain >>=, in which case the operator + /// >= will be identified should it happen that the starting index corresponds to the second > + /// character of this operator. Given that the regex mutator does not aim to guarantee + /// well-formed programs, these issues are acceptable. + /// @param wgsl_code - the WGSL-like string in which operator occurrences will be found. + /// @param start_index - the index at which search should start + /// @return empty if no operator is found, otherwise a pair comprising the index at which the + /// operator starts and the character length of the operator. + std::optional> FindOperatorOccurrence( + const std::string& wgsl_code, + uint32_t start_index); + private: /// A function that given a delimiter, returns a vector that contains /// all the positions of the delimiter in the WGSL code. @@ -167,6 +190,13 @@ class WgslMutator { std::string replacement_text, std::string& wgsl_code); + /// Given a string representing a WGSL operator, randomly returns a different WGSL operator in + /// the same category as the original, where the three categories are assignment operators (such + /// as = and +=), expression operators (such as + and ^) and increment operators (++ and --). + /// @param existing_operator - the characters comprising some WGSL operator + /// @return another WGSL operator falling into the same category. + std::string ChooseRandomReplacementForOperator(const std::string& existing_operator); + RandomGenerator& generator_; };