Regex fuzzer: Add break and continue statements

Adds a break and continue statements to randomly-chosen loops. Also
overhauls support for adding return statements to functions.

Fixes: tint:1125.
Change-Id: Ib1a82b49e3fbb0b5520c725c8b8459d68383bed2
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96543
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
Alastair F. Donaldson 2022-07-19 16:59:33 +00:00 committed by Dawn LUCI CQ
parent 84f7830874
commit e4df87fd01
4 changed files with 192 additions and 94 deletions

View File

@ -38,6 +38,7 @@ enum class MutationKind {
kReplaceLiteral, kReplaceLiteral,
kInsertReturnStatement, kInsertReturnStatement,
kReplaceOperator, kReplaceOperator,
kInsertBreakOrContinue,
kNumMutationKinds kNumMutationKinds
}; };
@ -108,6 +109,12 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
return 0; return 0;
} }
break; break;
case MutationKind::kInsertBreakOrContinue:
if (!mutator.InsertBreakOrContinue(wgsl_code)) {
return 0;
}
break;
default: default:
assert(false && "Unreachable"); assert(false && "Unreachable");
return 0; return 0;

View File

@ -33,6 +33,7 @@ class WgslMutatorTest : public WgslMutator {
using WgslMutator::GetFunctionBodyPositions; using WgslMutator::GetFunctionBodyPositions;
using WgslMutator::GetIdentifiers; using WgslMutator::GetIdentifiers;
using WgslMutator::GetIntLiterals; using WgslMutator::GetIntLiterals;
using WgslMutator::GetLoopBodyPositions;
using WgslMutator::ReplaceRegion; using WgslMutator::ReplaceRegion;
using WgslMutator::SwapIntervals; using WgslMutator::SwapIntervals;
}; };
@ -382,69 +383,7 @@ TEST(InsertReturnTest, FindClosingBraceFailing) {
ASSERT_NE(expected, function_body); ASSERT_NE(expected, function_body);
} }
TEST(TestInsertReturn, TestInsertReturn1) { TEST(TestInsertReturn, TestFunctionPositions1) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
@vertex
fn vertex_main() -> @builtin(position) vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
@fragment
fn fragment_main() {
clamp_0acf8f();
}
@compute @workgroup_size(1)
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
std::vector<size_t> semicolon_pos;
for (size_t pos = wgsl_code.find(";", 0); pos != std::string::npos;
pos = wgsl_code.find(";", pos + 1)) {
semicolon_pos.push_back(pos);
}
// should insert a return true statement after the first semicolon of the
// first function the the WGSL-like string above.
wgsl_code.insert(semicolon_pos[0] + 1, "return true;");
std::string expected_wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());return true;
}
@vertex
fn vertex_main() -> @builtin(position) vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
@fragment
fn fragment_main() {
clamp_0acf8f();
}
@compute @workgroup_size(1)
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
ASSERT_EQ(expected_wgsl_code, wgsl_code);
}
TEST(TestInsertReturn, TestFunctionPositions) {
RandomGenerator generator(0); RandomGenerator generator(0);
WgslMutatorTest mutator(generator); WgslMutatorTest mutator(generator);
std::string wgsl_code = std::string wgsl_code =
@ -475,8 +414,32 @@ TEST(TestInsertReturn, TestFunctionPositions) {
foo_1 = 5 + 7; foo_1 = 5 + 7;
var foo_3 : i32 = -20;)"; var foo_3 : i32 = -20;)";
std::vector<size_t> function_positions = mutator.GetFunctionBodyPositions(wgsl_code); std::vector<std::pair<size_t, bool>> function_positions =
std::vector<size_t> expected_positions = {180, 586}; mutator.GetFunctionBodyPositions(wgsl_code);
std::vector<std::pair<size_t, bool>> expected_positions = {
{18, false}, {180, true}, {323, false}, {423, false}, {586, true}};
ASSERT_EQ(expected_positions, function_positions);
}
TEST(TestInsertReturn, TestFunctionPositions2) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string wgsl_code =
R"(fn some_loop_body() {
}
fn f() {
var j : i32; i = (i + 1)) {
some_loop_body(); ((i < 5) && (j < 10));
for(var i : i32 = 0;
j = (i * 30);
}
}
)";
std::vector<std::pair<size_t, bool>> function_positions =
mutator.GetFunctionBodyPositions(wgsl_code);
std::vector<std::pair<size_t, bool>> expected_positions = {{20, false}, {32, false}};
ASSERT_EQ(expected_positions, function_positions); ASSERT_EQ(expected_positions, function_positions);
} }
@ -586,5 +549,67 @@ d %= e;
} }
} }
TEST(TestInsertBreakOrContinue, TestLoopPositions1) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string wgsl_code = " loop { } loop { } loop { }";
std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
std::vector<size_t> expected_positions = {6, 15, 24};
ASSERT_EQ(expected_positions, loop_positions);
}
TEST(TestInsertBreakOrContinue, TestLoopPositions2) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string wgsl_code = R"( loop { } loop
{ } loop { })";
std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
std::vector<size_t> expected_positions = {6, 15, 24};
ASSERT_EQ(expected_positions, loop_positions);
}
TEST(TestInsertBreakOrContinue, TestLoopPositions3) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
// This WGSL-like code is not valid, but it suffices to test regex-based matching (which is
// intended to work well on semi-valid code).
std::string wgsl_code =
R"(fn compute_main() {
loop {
var twice: i32 = 2 * i;
i++;
if i == 5 { break; }
loop
{
var twice: i32 = 2 * i;
i++;
while (i < 100) { i++; }
if i == 5 { break; }
}
}
for (a = 0; a < 100; a++) {
if (a > 50) {
break;
}
while (i < 100) { i++; }
}
})";
std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
std::vector<size_t> expected_positions = {27, 108, 173, 249, 310};
ASSERT_EQ(expected_positions, loop_positions);
}
TEST(TestInsertBreakOrContinue, TestLoopPositions4) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
// This WGSL-like code is not valid, but it suffices to test regex-based matching (which is
// intended to work well on semi-valid code).
std::string wgsl_code = R"(unifor { } uniform { } sloop { } _loop { } _while { } awhile { } )";
std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
ASSERT_TRUE(loop_positions.empty());
}
} // namespace } // namespace
} // namespace tint::fuzzers::regex_fuzzer } // namespace tint::fuzzers::regex_fuzzer

View File

@ -99,33 +99,98 @@ size_t WgslMutator::FindClosingBrace(size_t opening_bracket_pos, const std::stri
return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1; return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1;
} }
std::vector<size_t> WgslMutator::GetFunctionBodyPositions(const std::string& wgsl_code) { std::vector<std::pair<size_t, bool>> WgslMutator::GetFunctionBodyPositions(
const std::string& wgsl_code) {
// Finds all the functions with a non-void return value. // Finds all the functions with a non-void return value.
std::regex function_regex("fn.*?->.*?\\{"); std::regex function_regex("fn[^a-zA-Z_0-9][^\\{]*\\{");
std::smatch match; std::vector<std::pair<size_t, bool>> result;
auto functions_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), function_regex);
auto functions_end = std::sregex_iterator();
for (std::sregex_iterator i = functions_begin; i != functions_end; ++i) {
bool returns_value = i->str().find("->") != std::string::npos;
result.push_back(
{static_cast<size_t>(i->suffix().first - wgsl_code.cbegin() - 1), returns_value});
}
return result;
}
std::vector<size_t> WgslMutator::GetLoopBodyPositions(const std::string& wgsl_code) {
// Finds all loops.
std::regex loop_regex("[^a-zA-Z_0-9](for|while|loop)[^\\{]*\\{");
std::vector<size_t> result; std::vector<size_t> result;
auto search_start(wgsl_code.cbegin()); auto loops_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), loop_regex);
std::string prefix = ""; auto loops_end = std::sregex_iterator();
while (std::regex_search(search_start, wgsl_code.cend(), match, function_regex)) { for (std::sregex_iterator i = loops_begin; i != loops_end; ++i) {
result.push_back(static_cast<size_t>(match.suffix().first - wgsl_code.cbegin() - 1L)); result.push_back(static_cast<size_t>(i->suffix().first - wgsl_code.cbegin() - 1));
search_start = match.suffix().first;
} }
return result; return result;
} }
bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) { bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) {
std::vector<size_t> function_body_positions = GetFunctionBodyPositions(wgsl_code); std::vector<std::pair<size_t, bool>> function_body_positions =
GetFunctionBodyPositions(wgsl_code);
// No function was found in wgsl_code. // No function was found in wgsl_code.
if (function_body_positions.empty()) { if (function_body_positions.empty()) {
return false; return false;
} }
// Pick a random function's opening bracket, find the corresponding closing // Pick a random function
// bracket, and find a semi-colon within the function body. auto function = generator_.GetRandomElement(function_body_positions);
size_t left_bracket_pos = generator_.GetRandomElement(function_body_positions);
// Find the corresponding closing bracket for the function, and find a semi-colon within the
// function body.
size_t left_bracket_pos = function.first;
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
if (right_bracket_pos == 0) {
return false;
}
std::vector<size_t> semicolon_positions;
for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1); pos < right_bracket_pos;
pos = wgsl_code.find(";", pos + 1)) {
semicolon_positions.push_back(pos);
}
if (semicolon_positions.empty()) {
return false;
}
std::string return_statement = "return";
if (function.second) {
// The function returns a value. Get all identifiers and integer literals to use as
// potential return values.
std::vector<std::pair<size_t, size_t>> identifiers = GetIdentifiers(wgsl_code);
auto return_values = identifiers;
std::vector<std::pair<size_t, size_t>> int_literals = GetIntLiterals(wgsl_code);
return_values.insert(return_values.end(), int_literals.begin(), int_literals.end());
std::pair<size_t, size_t> return_value = generator_.GetRandomElement(return_values);
return_statement += " " + wgsl_code.substr(return_value.first, return_value.second);
}
return_statement += ";";
// Insert the return statement immediately after the semicolon.
wgsl_code.insert(generator_.GetRandomElement(semicolon_positions) + 1, return_statement);
return true;
}
bool WgslMutator::InsertBreakOrContinue(std::string& wgsl_code) {
std::vector<size_t> loop_body_positions = GetLoopBodyPositions(wgsl_code);
// No loop was found in wgsl_code.
if (loop_body_positions.empty()) {
return false;
}
// Pick a random loop's opening bracket, find the corresponding closing
// bracket, and find a semi-colon within the loop body.
size_t left_bracket_pos = generator_.GetRandomElement(loop_body_positions);
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
@ -145,17 +210,8 @@ bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) {
size_t semicolon_position = generator_.GetRandomElement(semicolon_positions); size_t semicolon_position = generator_.GetRandomElement(semicolon_positions);
// Get all identifiers and integer literals to use as potential return values. // Insert a break or continue immediately after the semicolon.
std::vector<std::pair<size_t, size_t>> identifiers = GetIdentifiers(wgsl_code); wgsl_code.insert(semicolon_position + 1, generator_.GetBool() ? "break;" : "continue;");
auto return_values = identifiers;
std::vector<std::pair<size_t, size_t>> int_literals = GetIntLiterals(wgsl_code);
return_values.insert(return_values.end(), int_literals.begin(), int_literals.end());
std::pair<size_t, size_t> return_value = generator_.GetRandomElement(return_values);
std::string return_statement =
"return " + wgsl_code.substr(return_value.first, return_value.second) + ";";
// Insert the return statement immediately after the semicolon.
wgsl_code.insert(semicolon_position + 1, return_statement);
return true; return true;
} }

View File

@ -73,6 +73,11 @@ class WgslMutator {
/// @return true if the mutation was succesful or false otherwise. /// @return true if the mutation was succesful or false otherwise.
bool InsertReturnStatement(std::string& wgsl_code); bool InsertReturnStatement(std::string& wgsl_code);
/// Inserts a break or continue statement in a randomly chosen loop of a WGSL-like string.
/// @param wgsl_code - WGSL-like string that will be mutated.
/// @return true if the mutation was succesful or false otherwise.
bool InsertBreakOrContinue(std::string& wgsl_code);
/// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one /// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one
/// randomly-chosen operator in the original string with another operator. /// randomly-chosen operator in the original string with another operator.
/// @param wgsl_code - the initial WGSL-like string that will be mutated. /// @param wgsl_code - the initial WGSL-like string that will be mutated.
@ -103,14 +108,19 @@ class WgslMutator {
/// brace. /// brace.
size_t FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code); size_t FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code);
/// Returns the starting_position of the bodies of the functions /// Returns the starting position of the bodies of the functions identified by an appropriate
/// that follow the regular expression: fn.*?->.*?\\{, which searches for the /// function, together with a boolean indicating whether the function returns a value or not.
/// keyword fn followed by the function name, its return type and opening brace.
/// @param wgsl_code - the WGSL-like string where the functions will be /// @param wgsl_code - the WGSL-like string where the functions will be
/// searched. /// searched.
/// @return a vector with the starting position of the function bodies in /// @return a vector of pairs, where each pair provides the starting position of the function
/// wgsl_code. /// body, and the value true if and only if the function returns a value.
std::vector<size_t> GetFunctionBodyPositions(const std::string& wgsl_code); std::vector<std::pair<size_t, bool>> GetFunctionBodyPositions(const std::string& wgsl_code);
/// Returns the starting position of the bodies of the loops identified by an appropriate
/// regular expressions.
/// @param wgsl_code - the WGSL-like string in which loops will be searched for.
/// @return a vector with the starting position of the loop bodies in wgsl_code.
std::vector<size_t> GetLoopBodyPositions(const std::string& wgsl_code);
/// A function that finds all the identifiers in a WGSL-like string. /// A function that finds all the identifiers in a WGSL-like string.
/// @param wgsl_code - the WGSL-like string where the identifiers will be found. /// @param wgsl_code - the WGSL-like string where the identifiers will be found.