Regex fuzzer: add swizzles

Adds random swizzle operations to candidate vector expressions.

Also excludes certain common keywords from the set of identifiers that
are matched by the regex fuzzer, which will serve to make other
mutations performed by the regex fuzzer more preise.

Fixes: tint:1619
Change-Id: I10b6937f2c6f7341ec4a85d3b7ab56b3a36ef169
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96780
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
This commit is contained in:
Alastair F. Donaldson 2022-07-29 16:29:21 +00:00 committed by Dawn LUCI CQ
parent ff4be33617
commit 00cc485add
4 changed files with 224 additions and 22 deletions

View File

@ -40,6 +40,7 @@ enum class MutationKind {
kReplaceOperator,
kInsertBreakOrContinue,
kReplaceFunctionCallWithBuiltin,
kAddSwizzle,
kNumMutationKinds
};
@ -121,6 +122,11 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
return 0;
}
break;
case MutationKind::kAddSwizzle:
if (!mutator.AddSwizzle(wgsl_code)) {
return 0;
}
break;
default:
assert(false && "Unreachable");
return 0;

View File

@ -28,13 +28,15 @@ class WgslMutatorTest : public WgslMutator {
using WgslMutator::DeleteInterval;
using WgslMutator::DuplicateInterval;
using WgslMutator::FindClosingBrace;
using WgslMutator::FindClosingBracket;
using WgslMutator::FindOperatorOccurrence;
using WgslMutator::GetFunctionBodyPositions;
using WgslMutator::GetFunctionCallIdentifiers;
using WgslMutator::GetIdentifiers;
using WgslMutator::GetIntLiterals;
using WgslMutator::GetLoopBodyPositions;
using WgslMutator::GetSwizzles;
using WgslMutator::GetVectorConstructors;
using WgslMutator::ReplaceRegion;
using WgslMutator::SwapIntervals;
};
@ -238,11 +240,8 @@ TEST(GetIdentifierTest, GetIdentifierTest1) {
std::vector<std::pair<size_t, size_t>> identifiers_pos = mutator.GetIdentifiers(wgsl_code);
std::vector<std::pair<size_t, size_t>> ground_truth = {
{0, 2}, {3, 12}, {28, 3}, {32, 3}, {37, 4}, {42, 3}, {49, 5}, {55, 4},
{60, 3}, {68, 4}, {73, 3}, {81, 4}, {86, 3}, {110, 6}, {123, 2}, {126, 11},
{144, 7}, {152, 8}, {162, 4}, {167, 3}, {183, 12}, {209, 6}, {216, 4}, {221, 3},
{244, 8}, {259, 2}, {262, 13}, {288, 12}, {319, 7}, {328, 14}, {352, 2}, {355, 12},
{381, 3}, {385, 7}, {394, 3}, {399, 3}, {418, 12}};
{3, 12}, {32, 3}, {49, 5}, {126, 11}, {144, 7}, {152, 8}, {183, 12},
{262, 13}, {288, 12}, {328, 14}, {355, 12}, {385, 7}, {394, 3}, {418, 12}};
ASSERT_EQ(ground_truth, identifiers_pos);
}
@ -316,7 +315,8 @@ TEST(InsertReturnTest, FindClosingBrace) {
var foo_3 : i32 = -20;
)";
size_t opening_bracket_pos = 18;
size_t closing_bracket_pos = mutator.FindClosingBrace(opening_bracket_pos, wgsl_code);
size_t closing_bracket_pos =
mutator.FindClosingBracket(opening_bracket_pos, wgsl_code, '{', '}');
// The -1 is needed since the function body starts after the left bracket.
std::string function_body =
@ -363,7 +363,8 @@ TEST(InsertReturnTest, FindClosingBraceFailing) {
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
size_t opening_bracket_pos = 18;
size_t closing_bracket_pos = mutator.FindClosingBrace(opening_bracket_pos, wgsl_code);
size_t closing_bracket_pos =
mutator.FindClosingBracket(opening_bracket_pos, wgsl_code, '{', '}');
// The -1 is needed since the function body starts after the left bracket.
std::string function_body =
@ -639,5 +640,30 @@ TEST(TestReplaceFunctionCallWithBuiltin, FindFunctionCalls) {
ASSERT_EQ(ground_truth, call_identifiers);
}
TEST(TestAddSwizzle, FindSwizzles) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string code = R"(x
v.xxyy.wz.x;
u.rgba.rrg.b)";
std::vector<std::pair<size_t, size_t>> swizzles = mutator.GetSwizzles(code);
std::vector<std::pair<size_t, size_t>> ground_truth{{3, 5}, {8, 3}, {11, 2},
{16, 5}, {21, 4}, {25, 2}};
ASSERT_EQ(ground_truth, swizzles);
}
TEST(TestAddSwizzle, FindVectorConstructors) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string code = R"(
vec4<f32>(vec2<f32>(1, 2), vec2<f32>(3))
vec2<i32>(1, abs(abs(2)))
)";
std::vector<std::pair<size_t, size_t>> swizzles = mutator.GetVectorConstructors(code);
std::vector<std::pair<size_t, size_t>> ground_truth{{1, 40}, {11, 15}, {28, 12}, {43, 25}};
ASSERT_EQ(ground_truth, swizzles);
}
} // namespace
} // namespace tint::fuzzers::regex_fuzzer

View File

@ -19,6 +19,7 @@
#include <map>
#include <regex>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
@ -39,9 +40,19 @@ std::vector<size_t> WgslMutator::FindDelimiterIndices(const std::string& delimit
return result;
}
std::unordered_set<std::string> WgslMutator::GetCommonKeywords() {
return {"array", "bool", "break", "compute", "continue", "f32", "fn", "fragment",
"i32", "if", "for", "let", "location", "loop", "ptr", "return",
"struct", "u32", "var", "vec2", "vec3", "vec4", "vertex", "while"};
}
std::vector<std::pair<size_t, size_t>> WgslMutator::GetIdentifiers(const std::string& wgsl_code) {
std::vector<std::pair<size_t, size_t>> result;
// To reduce the rate that invalid programs are produced, common keywords will be excluded from
// the identifiers that are returned.
std::unordered_set<std::string> common_keywords = GetCommonKeywords();
// This regular expression works by looking for a character that
// is not part of an identifier followed by a WGSL identifier, followed
// by a character which cannot be part of a WGSL identifer. The regex
@ -54,6 +65,10 @@ std::vector<std::pair<size_t, size_t>> WgslMutator::GetIdentifiers(const std::st
auto identifiers_end = std::sregex_iterator();
for (std::sregex_iterator i = identifiers_begin; i != identifiers_end; ++i) {
if (common_keywords.count(i->str()) > 0) {
// This is a common keyword, so skip it.
continue;
}
result.push_back(
{static_cast<size_t>(i->prefix().second - wgsl_code.cbegin()), i->str().size()});
}
@ -99,13 +114,16 @@ std::vector<std::pair<size_t, size_t>> WgslMutator::GetIntLiterals(const std::st
return result;
}
size_t WgslMutator::FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code) {
size_t WgslMutator::FindClosingBracket(size_t opening_bracket_pos,
const std::string& wgsl_code,
char opening_bracket_character,
char closing_bracket_character) {
size_t open_bracket_count = 1;
size_t pos = opening_bracket_pos + 1;
while (open_bracket_count >= 1 && pos < wgsl_code.size()) {
if (wgsl_code[pos] == '{') {
if (wgsl_code[pos] == opening_bracket_character) {
++open_bracket_count;
} else if (wgsl_code[pos] == '}') {
} else if (wgsl_code[pos] == closing_bracket_character) {
--open_bracket_count;
}
++pos;
@ -160,7 +178,7 @@ bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) {
// function body.
size_t left_bracket_pos = function.first;
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
size_t right_bracket_pos = FindClosingBracket(left_bracket_pos, wgsl_code, '{', '}');
if (right_bracket_pos == 0) {
return false;
@ -206,7 +224,7 @@ bool WgslMutator::InsertBreakOrContinue(std::string& wgsl_code) {
// bracket, and find a semi-colon within the loop body.
size_t left_bracket_pos = generator_.GetRandomElement(loop_body_positions);
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
size_t right_bracket_pos = FindClosingBracket(left_bracket_pos, wgsl_code, '{', '}');
if (right_bracket_pos == 0) {
return false;
@ -515,11 +533,10 @@ bool WgslMutator::ReplaceFunctionCallWithBuiltin(std::string& wgsl_code) {
// Pick a random function
auto function = generator_.GetRandomElement(function_body_positions);
// Find the corresponding closing bracket for the function, and find a semi-colon within the
// function body.
// Find the corresponding closing bracket for the function.
size_t left_bracket_pos = function.first;
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
size_t right_bracket_pos = FindClosingBracket(left_bracket_pos, wgsl_code, '{', '}');
if (right_bracket_pos == 0) {
return false;
@ -652,4 +669,127 @@ bool WgslMutator::ReplaceFunctionCallWithBuiltin(std::string& wgsl_code) {
return true;
}
bool WgslMutator::AddSwizzle(std::string& wgsl_code) {
std::vector<std::pair<size_t, bool>> function_body_positions =
GetFunctionBodyPositions(wgsl_code);
// No function was found in wgsl_code.
if (function_body_positions.empty()) {
return false;
}
// Pick a random function
auto function = generator_.GetRandomElement(function_body_positions);
// Find the corresponding closing bracket for the function.
size_t left_bracket_pos = function.first;
size_t right_bracket_pos = FindClosingBracket(left_bracket_pos, wgsl_code, '{', '}');
if (right_bracket_pos == 0) {
return false;
}
std::string function_body(
wgsl_code.substr(left_bracket_pos, right_bracket_pos - left_bracket_pos));
// It makes sense to try applying swizzles to:
// - identifiers, because they might be vectors
auto identifiers = GetIdentifiers(function_body);
// - existing swizzles, e.g. to turn v.xy into v.xy.xx
auto swizzles = GetSwizzles(function_body);
// - vector constructors, e.g. to turn vec3<f32>(...) into vec3<f32>(...).yyz
auto vector_constructors = GetVectorConstructors(function_body);
// Create a combined vector of all the possibilities for swizzling, so that they can be sampled
// from as a whole.
std::vector<std::pair<size_t, size_t>> combined;
combined.insert(combined.end(), identifiers.begin(), identifiers.end());
combined.insert(combined.end(), swizzles.begin(), swizzles.end());
combined.insert(combined.end(), vector_constructors.begin(), vector_constructors.end());
if (combined.empty()) {
// No opportunities for swizzling: give up.
return false;
}
// Randomly create a swizzle operation. This is done without checking the potential length of
// the target vector. For identifiers this isn't possible without proper context. For existing
// swizzles and vector constructors it would be possible to check the length, but it is anyway
// good to stress-test swizzle validation code paths.
std::string swizzle = ".";
{
// Choose a swizzle length between 1 and 4, inclusive.
uint32_t swizzle_length = generator_.GetUInt32(1, 5);
// Decide whether to use xyzw or rgba as convenience names.
bool use_xyzw = generator_.GetBool();
// Randomly choose a convenience name for each component of the swizzle.
for (uint32_t i = 0; i < swizzle_length; i++) {
switch (generator_.GetUInt32(4)) {
case 0:
swizzle += use_xyzw ? "x" : "r";
break;
case 1:
swizzle += use_xyzw ? "y" : "g";
break;
case 2:
swizzle += use_xyzw ? "z" : "b";
break;
case 3:
swizzle += use_xyzw ? "w" : "a";
break;
default:
assert(false && "Unreachable");
break;
}
}
}
// Choose a random opportunity for swizzling and add the swizzle right after it.
auto target = generator_.GetRandomElement(combined);
wgsl_code.insert(left_bracket_pos + target.first + target.second, swizzle);
return true;
}
std::vector<std::pair<size_t, size_t>> WgslMutator::GetSwizzles(const std::string& wgsl_code) {
std::regex swizzle_regex("\\.(([xyzw]+)|([rgba]+))");
std::vector<std::pair<size_t, size_t>> result;
auto swizzles_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), swizzle_regex);
auto swizles_end = std::sregex_iterator();
for (std::sregex_iterator i = swizzles_begin; i != swizles_end; ++i) {
result.push_back(
{static_cast<size_t>(i->prefix().second - wgsl_code.cbegin()), i->str().size()});
}
return result;
}
std::vector<std::pair<size_t, size_t>> WgslMutator::GetVectorConstructors(
const std::string& wgsl_code) {
// This regex recognises the prefixes of vector constructors, which have the form:
// "vecn<type>(", with possible whitespace between tokens.
std::regex vector_constructor_prefix_regex("vec\\d[ \\n]*<[ \\n]*[a-z0-9_]+[ \\n]*>[^\\(]*\\(");
std::vector<std::pair<size_t, size_t>> result;
auto vector_constructor_prefixes_begin =
std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), vector_constructor_prefix_regex);
auto vector_constructor_prefixes_end = std::sregex_iterator();
// Look through all of the vector constructor prefixes and see whether each one appears to
// correspond to a complete vector construction.
for (std::sregex_iterator i = vector_constructor_prefixes_begin;
i != vector_constructor_prefixes_end; ++i) {
// A prefix is deemed to correspond to a complete vector construction if it is possible to
// find a corresponding closing bracket for the "(" at the end of the prefix.
size_t closing_bracket = FindClosingBracket(
static_cast<size_t>(i->suffix().first - wgsl_code.cbegin()), wgsl_code, '(', ')');
if (closing_bracket != 0) {
// A closing bracket was found, so record the start and size of the entire vector
// constructor.
size_t start = static_cast<size_t>(i->prefix().second - wgsl_code.cbegin());
result.push_back({start, closing_bracket - start + 1});
}
}
return result;
}
} // namespace tint::fuzzers::regex_fuzzer

View File

@ -17,6 +17,7 @@
#include <optional>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
@ -92,6 +93,12 @@ class WgslMutator {
/// @return true if a function call replacement happened or false otherwise.
bool ReplaceFunctionCallWithBuiltin(std::string& wgsl_code);
/// Given a WGSL-like string, adds a swizzle operation to either (a) an identifier, (b) a vector
/// constructor, or (c) an existing swizzle.
/// @param wgsl_code - the initial WGSL-like string that will be mutated.
/// @return true if a swizzle operation is added or false otherwise.
bool AddSwizzle(std::string& wgsl_code);
protected:
/// Given index idx1 it delets the region of length interval_len
/// starting at index idx1;
@ -108,13 +115,18 @@ class WgslMutator {
/// @param wgsl_code - the string where the swap will occur.
void DuplicateInterval(size_t idx1, size_t reg1_len, size_t idx2, std::string& wgsl_code);
/// Finds a possible closing brace corresponding to the opening
/// brace at position opening_bracket_pos.
/// @param opening_bracket_pos - the position of the opening brace.
/// @param wgsl_code - the WGSL-like string where the closing brace.
/// Finds a possible closing bracket corresponding to the opening
/// bracket at position opening_bracket_pos.
/// @param opening_bracket_pos - the position of the opening bracket.
/// @param wgsl_code - the WGSL-like string where the closing bracket.
/// @param opening_bracket_character - the opening bracket character, e.g. (, {, <, or [
/// @param closing_bracket_character - the closing bracket character, e.g. ), }, >, or ]
/// @return the position of the closing bracket or 0 if there is no closing
/// brace.
size_t FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code);
/// bracket.
size_t FindClosingBracket(size_t opening_bracket_pos,
const std::string& wgsl_code,
char opening_bracket_character,
char closing_bracket_character);
/// Returns the starting position of the bodies of the functions identified by an appropriate
/// function, together with a boolean indicating whether the function returns a value or not.
@ -195,6 +207,16 @@ class WgslMutator {
const std::string& wgsl_code,
uint32_t start_index);
/// Finds all the swizzle operations in a WGSL-like string.
/// @param wgsl_code - the WGSL-like string where the swizzles will be found.
/// @return a vector with the positions and lengths of all the swizzles in wgsl_code.
std::vector<std::pair<size_t, size_t>> GetSwizzles(const std::string& wgsl_code);
/// Finds all the vector constructors in a WGSL-like string.
/// @param wgsl_code - the WGSL-like string where the vector constructors will be found.
/// @return a vector with the positions and lengths of all the vector constructors in wgsl_code.
std::vector<std::pair<size_t, size_t>> GetVectorConstructors(const std::string& wgsl_code);
private:
/// A function that given a delimiter, returns a vector that contains
/// all the positions of the delimiter in the WGSL code.
@ -222,6 +244,14 @@ class WgslMutator {
/// @return another WGSL operator falling into the same category.
std::string ChooseRandomReplacementForOperator(const std::string& existing_operator);
/// Yields a fixed set of commonly-used WGSL keywords. The regex fuzzer relies heavily on
/// recognizing possible identifiers via regular expressions. There is a high chance that
/// keywords will be recognized as identifiers, which will leads to invalid code. It is valuable
/// for this to occur to some extent (to stress test validation), but it is useful to be able to
/// exclude the most common keywords so that invalidity does not occur too often.
/// @return a set of commonly-used WGSL keywords.
static std::unordered_set<std::string> GetCommonKeywords();
RandomGenerator& generator_;
};