Regex fuzzer: replace operators

A mutation that replaces operators in a WGSL-like string at random.

Fixes: tint:1092.

Change-Id: I912825365f338266d34a1bffb5c8a96cecaba179
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96404
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
This commit is contained in:
Alastair F. Donaldson 2022-07-19 15:55:43 +00:00 committed by Dawn LUCI CQ
parent 7bcf410d0b
commit 84f7830874
4 changed files with 194 additions and 0 deletions

View File

@ -37,6 +37,7 @@ enum class MutationKind {
kReplaceIdentifier,
kReplaceLiteral,
kInsertReturnStatement,
kReplaceOperator,
kNumMutationKinds
};
@ -102,6 +103,11 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
}
break;
case MutationKind::kReplaceOperator:
if (!mutator.ReplaceRandomOperator(wgsl_code)) {
return 0;
}
break;
default:
assert(false && "Unreachable");
return 0;

View File

@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <optional>
#include <string>
#include "gtest/gtest.h"
@ -28,6 +29,7 @@ class WgslMutatorTest : public WgslMutator {
using WgslMutator::DeleteInterval;
using WgslMutator::DuplicateInterval;
using WgslMutator::FindClosingBrace;
using WgslMutator::FindOperatorOccurrence;
using WgslMutator::GetFunctionBodyPositions;
using WgslMutator::GetIdentifiers;
using WgslMutator::GetIntLiterals;
@ -543,5 +545,46 @@ TEST(TestInsertReturn, TestMissingSemicolon) {
ASSERT_EQ(expected_wgsl_code, wgsl_code);
}
TEST(TestReplaceOperator, TestIdentifyOperators) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string code =
R"(
x += 2;
y = a + b;
z = -a;
x *= b / c;
t = t && t | t || t;
b = b > c ^ c <= d;
a >>= b;
b <<= a;
a = a << 2;
b = b >> 3;
c = a % 3;
d %= e;
)";
// These are the operator occurrences that will be observed by going through the file character
// by character. This includes, for example, identifying the ">" operator if search starts after
// the first character of ">>".
std::vector<std::pair<uint32_t, uint32_t>> operator_occurrences = {
{3, 2}, {4, 1}, {11, 1}, {15, 1}, {22, 1}, {24, 1}, {30, 2}, {31, 1},
{35, 1}, {42, 1}, {46, 2}, {47, 1}, {51, 1}, {55, 2}, {56, 1}, {63, 1},
{67, 1}, {71, 1}, {75, 2}, {76, 1}, {83, 3}, {84, 2}, {85, 1}, {92, 3},
{93, 2}, {94, 1}, {101, 1}, {105, 2}, {106, 1}, {113, 1}, {117, 2}, {118, 1},
{125, 1}, {129, 1}, {136, 2}, {137, 1}, {3, 2}};
uint32_t operator_occurrence_index = 0;
for (size_t i = 0; i < code.length(); i++) {
// Move on to the next operator occurrence if the current index into the code string exceeds
// the index associated with that operator occurrence. Exception: stay with the last
// operator occurrence if search has already passed the last operator in the file.
if (i < code.length() - 2 && i > operator_occurrences[operator_occurrence_index].first) {
operator_occurrence_index =
(operator_occurrence_index + 1) % operator_occurrences.size();
}
ASSERT_EQ(operator_occurrences[operator_occurrence_index],
mutator.FindOperatorOccurrence(code, static_cast<uint32_t>(i)).value());
}
}
} // namespace
} // namespace tint::fuzzers::regex_fuzzer

View File

@ -318,4 +318,119 @@ bool WgslMutator::ReplaceRandomIntLiteral(std::string& wgsl_code) {
return true;
}
std::string WgslMutator::ChooseRandomReplacementForOperator(const std::string& existing_operator) {
// Operators are partitioned into three classes: assignment, expression and increment. The regex
// mutator will swap operators in the same class. The hypothesis is that this should exercise a
// number of type-safe swaps (e.g. changing += to *=), as well as some badly-typed yet
// interesting swaps (e.g. changing + to ^ when the operators are matrices), while avoiding
// making totally nonsensical replacements (such as changing ++ too /).
std::vector<std::string> assignment_operators{
"=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>="};
std::vector<std::string> expression_operators{"+", "-", "*", "/", "%", "&&", "||",
"&", "|", "^", "<<", ">>", "<", ">",
"<=", ">=", "!", "!=", "~"};
std::vector<std::string> increment_operators{"++", "--"};
for (auto operators : {assignment_operators, expression_operators, increment_operators}) {
auto it = std::find(operators.begin(), operators.end(), existing_operator);
if (it != operators.end()) {
// The operator falls into this category, so select another operator from the category.
operators.erase(it);
return generator_.GetRandomElement(operators);
}
}
assert(false && "Unknown operator");
return "";
}
bool WgslMutator::ReplaceRandomOperator(std::string& wgsl_code) {
// Choose an index into the code at random.
const uint32_t start_index = generator_.GetUInt32(static_cast<uint32_t>(wgsl_code.size()));
// Find the first operator occurrence from the chosen point, wrapping back to the start of the
// file if needed.
auto maybe_operator_occurrence = FindOperatorOccurrence(wgsl_code, start_index);
if (!maybe_operator_occurrence.has_value()) {
// It is unlikely that there will be *no* operators in the file, but if this is the case
// then this mutation cannot be applied.
return false;
}
std::string existing_operator =
wgsl_code.substr(maybe_operator_occurrence->first, maybe_operator_occurrence->second);
// Replace the identified operator with a randomly-chosen alternative.
wgsl_code.replace(maybe_operator_occurrence->first, maybe_operator_occurrence->second,
ChooseRandomReplacementForOperator(existing_operator));
return true;
}
std::optional<std::pair<uint32_t, uint32_t>> WgslMutator::FindOperatorOccurrence(
const std::string& wgsl_code,
uint32_t start_index) {
// Loops through the characters of the code in a wrap-around fashion, looking for the first
// encountered token that is a WGSL operator.
for (size_t i = 0; i < wgsl_code.size(); i++) {
uint32_t current_index = static_cast<uint32_t>((start_index + i) % wgsl_code.size());
// To cater for multi-character operator tokens, get the three consecutive characters from
// the code string starting at the current index. Use null characters to account for the
// case where search has reached the end of the code string.
char first_character = wgsl_code[current_index];
char second_character =
current_index == wgsl_code.size() - 1 ? '\0' : wgsl_code[current_index + 1];
char third_character =
current_index >= wgsl_code.size() - 2 ? '\0' : wgsl_code[current_index + 2];
// This uses the extracted characters to match for the various WGSL operators.
switch (first_character) {
case '!':
case '^':
switch (second_character) {
case '=':
return {{current_index, 2}};
default:
return {{current_index, 1}};
}
case '|':
case '&':
case '+':
case '-':
if (second_character == first_character || second_character == '=') {
return {{current_index, 2}};
}
return {{current_index, 1}};
case '*':
case '/':
case '%':
switch (second_character) {
case '=':
return {{current_index, 2}};
default:
return {{current_index, 1}};
}
case '=':
if (second_character == '=') {
return {{current_index, 2}};
}
return {{current_index, 1}};
case '<':
case '>':
if (second_character == '=') {
return {{current_index, 2}};
}
if (second_character == first_character) {
if (third_character == '=') {
return {{current_index, 3}};
}
return {{current_index, 2}};
}
return {{current_index, 1}};
case '~':
return {{current_index, 1}};
default:
break;
}
}
// No operator was found, so empty is returned.
return {};
}
} // namespace tint::fuzzers::regex_fuzzer

View File

@ -15,6 +15,7 @@
#ifndef SRC_TINT_FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_
#define SRC_TINT_FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_
#include <optional>
#include <string>
#include <utility>
#include <vector>
@ -72,6 +73,12 @@ class WgslMutator {
/// @return true if the mutation was succesful or false otherwise.
bool InsertReturnStatement(std::string& wgsl_code);
/// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one
/// randomly-chosen operator in the original string with another operator.
/// @param wgsl_code - the initial WGSL-like string that will be mutated.
/// @return true if an operator replacement happened or false otherwise.
bool ReplaceRandomOperator(std::string& wgsl_code);
protected:
/// Given index idx1 it delets the region of length interval_len
/// starting at index idx1;
@ -147,6 +154,22 @@ class WgslMutator {
size_t reg2_len,
std::string& wgsl_code);
/// Finds the next occurrence of an operator in a WGSL-like string from a given starting
/// position, wrapping around to the start of the string if no operator is found before reaching
/// the end, and returning empty of no operator is found at all. There is no guarantee that the
/// result will correspond to a WGSL operator token, e.g. the identified characters could be
/// part of a comment, or e.g. the file might contain >>=, in which case the operator
/// >= will be identified should it happen that the starting index corresponds to the second >
/// character of this operator. Given that the regex mutator does not aim to guarantee
/// well-formed programs, these issues are acceptable.
/// @param wgsl_code - the WGSL-like string in which operator occurrences will be found.
/// @param start_index - the index at which search should start
/// @return empty if no operator is found, otherwise a pair comprising the index at which the
/// operator starts and the character length of the operator.
std::optional<std::pair<uint32_t, uint32_t>> FindOperatorOccurrence(
const std::string& wgsl_code,
uint32_t start_index);
private:
/// A function that given a delimiter, returns a vector that contains
/// all the positions of the delimiter in the WGSL code.
@ -167,6 +190,13 @@ class WgslMutator {
std::string replacement_text,
std::string& wgsl_code);
/// Given a string representing a WGSL operator, randomly returns a different WGSL operator in
/// the same category as the original, where the three categories are assignment operators (such
/// as = and +=), expression operators (such as + and ^) and increment operators (++ and --).
/// @param existing_operator - the characters comprising some WGSL operator
/// @return another WGSL operator falling into the same category.
std::string ChooseRandomReplacementForOperator(const std::string& existing_operator);
RandomGenerator& generator_;
};