Regex fuzzer: replace function calls with builtins

Replaces random identifiers used as function names in calls with the
names of builtin functions.

Fixes: tint:1617.
Change-Id: I4e70276c9023bcb35b860c98fca6a95dc284f60a
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96580
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
This commit is contained in:
Alastair F. Donaldson 2022-07-20 16:05:14 +00:00 committed by Dawn LUCI CQ
parent ca21fa019f
commit eb02cd3301
4 changed files with 235 additions and 23 deletions

View File

@ -39,6 +39,7 @@ enum class MutationKind {
kInsertReturnStatement,
kReplaceOperator,
kInsertBreakOrContinue,
kReplaceFunctionCallWithBuiltin,
kNumMutationKinds
};
@ -115,6 +116,11 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
return 0;
}
break;
case MutationKind::kReplaceFunctionCallWithBuiltin:
if (!mutator.ReplaceFunctionCallWithBuiltin(wgsl_code)) {
return 0;
}
break;
default:
assert(false && "Unreachable");
return 0;

View File

@ -31,6 +31,7 @@ class WgslMutatorTest : public WgslMutator {
using WgslMutator::FindClosingBrace;
using WgslMutator::FindOperatorOccurrence;
using WgslMutator::GetFunctionBodyPositions;
using WgslMutator::GetFunctionCallIdentifiers;
using WgslMutator::GetIdentifiers;
using WgslMutator::GetIntLiterals;
using WgslMutator::GetLoopBodyPositions;
@ -236,18 +237,12 @@ TEST(GetIdentifierTest, GetIdentifierTest1) {
})";
std::vector<std::pair<size_t, size_t>> identifiers_pos = mutator.GetIdentifiers(wgsl_code);
std::vector<std::pair<size_t, size_t>> ground_truth = {
std::make_pair(3, 12), std::make_pair(28, 3), std::make_pair(37, 4),
std::make_pair(49, 5), std::make_pair(60, 3), std::make_pair(68, 4),
std::make_pair(81, 4), std::make_pair(110, 6), std::make_pair(123, 2),
std::make_pair(133, 4), std::make_pair(144, 7), std::make_pair(162, 4),
std::make_pair(183, 12), std::make_pair(209, 6), std::make_pair(221, 3),
std::make_pair(244, 8), std::make_pair(259, 2), std::make_pair(271, 4),
std::make_pair(288, 12), std::make_pair(319, 7), std::make_pair(328, 14),
std::make_pair(352, 2), std::make_pair(363, 4), std::make_pair(381, 3),
std::make_pair(394, 3), std::make_pair(399, 3), std::make_pair(418, 12)};
{0, 2}, {3, 12}, {28, 3}, {32, 3}, {37, 4}, {42, 3}, {49, 5}, {55, 4},
{60, 3}, {68, 4}, {73, 3}, {81, 4}, {86, 3}, {110, 6}, {123, 2}, {126, 11},
{144, 7}, {152, 8}, {162, 4}, {167, 3}, {183, 12}, {209, 6}, {216, 4}, {221, 3},
{244, 8}, {259, 2}, {262, 13}, {288, 12}, {319, 7}, {328, 14}, {352, 2}, {355, 12},
{381, 3}, {385, 7}, {394, 3}, {399, 3}, {418, 12}};
ASSERT_EQ(ground_truth, identifiers_pos);
}
@ -603,13 +598,46 @@ TEST(TestInsertBreakOrContinue, TestLoopPositions3) {
TEST(TestInsertBreakOrContinue, TestLoopPositions4) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
// This WGSL-like code is not valid, but it suffices to test regex-based matching (which is
// intended to work well on semi-valid code).
std::string wgsl_code = R"(unifor { } uniform { } sloop { } _loop { } _while { } awhile { } )";
std::string wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
@vertex
fn vertex_main() -> @builtin(position) vec4<f32> {
clamp_0acf8f();"
return vec4<f32>();
}
@fragment
fn fragment_main() {
clamp_0acf8f();
}
@compute @workgroup_size(1)
fn compute_main() {"
var<private> foo: f32 = 0.0;
clamp_0acf8f ();
})";
std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
ASSERT_TRUE(loop_positions.empty());
}
TEST(TestReplaceFunctionCallWithBuiltin, FindFunctionCalls) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string function_body = R"({
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f ();
_0acf8f();
f
();
j = (i * 30);
})";
std::vector<std::pair<size_t, size_t>> call_identifiers =
mutator.GetFunctionCallIdentifiers(function_body);
std::vector<std::pair<size_t, size_t>> ground_truth{{82, 12}, {110, 7}, {131, 1}};
ASSERT_EQ(ground_truth, call_identifiers);
}
} // namespace
} // namespace tint::fuzzers::regex_fuzzer

View File

@ -47,18 +47,32 @@ std::vector<std::pair<size_t, size_t>> WgslMutator::GetIdentifiers(const std::st
// by a character which cannot be part of a WGSL identifer. The regex
// for the WGSL identifier is obtained from:
// https://www.w3.org/TR/WGSL/#identifiers.
std::regex wgsl_identifier_regex("[^a-zA-Z]([a-zA-Z][0-9a-zA-Z_]*)[^0-9a-zA-Z_]");
std::regex identifier_regex("[_a-zA-Z][0-9a-zA-Z_]*");
std::smatch match;
auto identifiers_begin =
std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), identifier_regex);
auto identifiers_end = std::sregex_iterator();
std::string::const_iterator search_start(wgsl_code.cbegin());
std::string prefix;
for (std::sregex_iterator i = identifiers_begin; i != identifiers_end; ++i) {
result.push_back(
{static_cast<size_t>(i->prefix().second - wgsl_code.cbegin()), i->str().size()});
}
return result;
}
while (regex_search(search_start, wgsl_code.cend(), match, wgsl_identifier_regex) == true) {
prefix += match.prefix();
result.push_back(std::make_pair(prefix.size() + 1, match.str(1).size()));
prefix += match.str(0);
search_start = match.suffix().first;
std::vector<std::pair<size_t, size_t>> WgslMutator::GetFunctionCallIdentifiers(
const std::string& wgsl_code) {
std::vector<std::pair<size_t, size_t>> result;
std::regex call_regex("([_a-zA-Z][0-9a-zA-Z_]*)[ \\n]*\\(");
auto identifiers_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), call_regex);
auto identifiers_end = std::sregex_iterator();
for (std::sregex_iterator i = identifiers_begin; i != identifiers_end; ++i) {
auto submatch = (*i)[1];
result.push_back(
{static_cast<size_t>(submatch.first - wgsl_code.cbegin()), submatch.str().size()});
}
return result;
}
@ -489,4 +503,153 @@ std::optional<std::pair<uint32_t, uint32_t>> WgslMutator::FindOperatorOccurrence
return {};
}
bool WgslMutator::ReplaceFunctionCallWithBuiltin(std::string& wgsl_code) {
std::vector<std::pair<size_t, bool>> function_body_positions =
GetFunctionBodyPositions(wgsl_code);
// No function was found in wgsl_code.
if (function_body_positions.empty()) {
return false;
}
// Pick a random function
auto function = generator_.GetRandomElement(function_body_positions);
// Find the corresponding closing bracket for the function, and find a semi-colon within the
// function body.
size_t left_bracket_pos = function.first;
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
if (right_bracket_pos == 0) {
return false;
}
std::string function_body(
wgsl_code.substr(left_bracket_pos, right_bracket_pos - left_bracket_pos));
std::vector<std::pair<size_t, size_t>> function_call_identifiers =
GetFunctionCallIdentifiers(function_body);
if (function_call_identifiers.empty()) {
return false;
}
auto function_call_identifier = generator_.GetRandomElement(function_call_identifiers);
std::vector<std::string> builtin_functions{"all",
"any",
"select",
"arrayLength",
"abs",
"acos",
"acosh",
"asin",
"asinh",
"atan",
"atanh",
"atan2",
"ceil",
"clamp",
"cos",
"cosh",
"cross",
"degrees",
"distance",
"exp",
"exp2",
"faceForward",
"floor",
"fma",
"fract",
"frexp",
"inverseSqrt",
"ldexp",
"length",
"log",
"log2",
"max",
"min",
"mix",
"modf",
"normalize",
"pow",
"quantizeToF16",
"radians",
"reflect",
"refract",
"round",
"saturate",
"sign",
"sin",
"sinh",
"smoothstep",
"sqrt",
"step",
"tan",
"tanh",
"trunc",
"abs",
"clamp",
"countLeadingZeros",
"countOneBits",
"countTrailingZeros",
"extractBits",
"firstLeadingBit",
"firstTrailingBit",
"insertBits",
"max",
"min",
"reverseBits",
"determinant",
"transpose",
"dot",
"dpdx",
"dpdxCoarse",
"dpdxFine",
"dpdy",
"dpdyCoarse",
"dpdyFine",
"fwidth",
"fwidthCoarse",
"fwidthFine",
"textureDimensions",
"textureGather",
"textureGatherCompare",
"textureLoad",
"textureNumLayers",
"textureNumLevels",
"textureNumSamples",
"textureSample",
"textureSampleBias",
"textureSampleCompare",
"textureSampleCompareLevel",
"textureSampleGrad",
"textureSampleLevel",
"textureStore",
"atomicLoad",
"atomicStore",
"atomicAdd",
"atomicSub",
"atomicMax",
"atomicMin",
"atomicAnd",
"atomicOr",
"atomicXor",
"pack4x8snorm",
"pack4x8unorm",
"pack2x16snorm",
"pack2x16unorm",
"pack2x16float",
"unpack4x8snorm",
"unpack4x8unorm",
"unpack2x16snorm",
"unpack2x16unorm",
"unpack2x16float",
"storageBarrier",
"workgroupBarrier"};
wgsl_code.replace(left_bracket_pos + function_call_identifier.first,
function_call_identifier.second,
generator_.GetRandomElement(builtin_functions));
return true;
}
} // namespace tint::fuzzers::regex_fuzzer

View File

@ -84,6 +84,14 @@ class WgslMutator {
/// @return true if an operator replacement happened or false otherwise.
bool ReplaceRandomOperator(std::string& wgsl_code);
/// Given a WGSL-like string, replaces a random identifier that appears to be a function call
/// with the name of a built-in function. This will often lead to an invalid module, as the
/// mutation does not aim to check whether the original and replacement function have the same
/// number or types of arguments.
/// @param wgsl_code - the initial WGSL-like string that will be mutated.
/// @return true if a function call replacement happened or false otherwise.
bool ReplaceFunctionCallWithBuiltin(std::string& wgsl_code);
protected:
/// Given index idx1 it delets the region of length interval_len
/// starting at index idx1;
@ -128,6 +136,13 @@ class WgslMutator {
/// identifiers in wgsl_code.
std::vector<std::pair<size_t, size_t>> GetIdentifiers(const std::string& wgsl_code);
/// A function that finds the identifiers in a WGSL-like string that appear to be used as
/// function names in function call expressions.
/// @param wgsl_code - the WGSL-like string where the identifiers will be found.
/// @return a vector with the positions and the length of all the
/// identifiers in wgsl_code.
std::vector<std::pair<size_t, size_t>> GetFunctionCallIdentifiers(const std::string& wgsl_code);
/// A function that returns returns the starting position
/// and the length of all the integer literals in a WGSL-like string.
/// @param wgsl_code - the WGSL-like string where the int literals