From e4df87fd017cd1d3eacbfbed57e521f6f7733936 Mon Sep 17 00:00:00 2001 From: "Alastair F. Donaldson" Date: Tue, 19 Jul 2022 16:59:33 +0000 Subject: [PATCH] Regex fuzzer: Add break and continue statements Adds a break and continue statements to randomly-chosen loops. Also overhauls support for adding return statements to functions. Fixes: tint:1125. Change-Id: Ib1a82b49e3fbb0b5520c725c8b8459d68383bed2 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96543 Reviewed-by: Ryan Harrison Commit-Queue: Alastair Donaldson Kokoro: Kokoro --- src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc | 7 + .../tint_regex_fuzzer/regex_fuzzer_tests.cc | 155 ++++++++++-------- .../fuzzers/tint_regex_fuzzer/wgsl_mutator.cc | 102 +++++++++--- .../fuzzers/tint_regex_fuzzer/wgsl_mutator.h | 22 ++- 4 files changed, 192 insertions(+), 94 deletions(-) diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc index 83aac1eb63..c5d2aba8cd 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc @@ -38,6 +38,7 @@ enum class MutationKind { kReplaceLiteral, kInsertReturnStatement, kReplaceOperator, + kInsertBreakOrContinue, kNumMutationKinds }; @@ -108,6 +109,12 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data, return 0; } break; + + case MutationKind::kInsertBreakOrContinue: + if (!mutator.InsertBreakOrContinue(wgsl_code)) { + return 0; + } + break; default: assert(false && "Unreachable"); return 0; diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc index 32a6619ad8..55ec02885e 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc @@ -33,6 +33,7 @@ class WgslMutatorTest : public WgslMutator { using WgslMutator::GetFunctionBodyPositions; using WgslMutator::GetIdentifiers; using WgslMutator::GetIntLiterals; + using WgslMutator::GetLoopBodyPositions; using WgslMutator::ReplaceRegion; using WgslMutator::SwapIntervals; }; @@ -382,69 +383,7 @@ TEST(InsertReturnTest, FindClosingBraceFailing) { ASSERT_NE(expected, function_body); } -TEST(TestInsertReturn, TestInsertReturn1) { - RandomGenerator generator(0); - WgslMutatorTest mutator(generator); - std::string wgsl_code = - R"(fn clamp_0acf8f() { - var res: vec2 = clamp(vec2(), vec2(), vec2()); - } - @vertex - fn vertex_main() -> @builtin(position) vec4 { - clamp_0acf8f(); - var foo_1: i32 = 3; - return vec4(); - } - @fragment - fn fragment_main() { - clamp_0acf8f(); - } - @compute @workgroup_size(1) - fn compute_main() { - var foo: f32 = 0.0; - var foo_2: i32 = 10; - clamp_0acf8f(); - } - foo_1 = 5 + 7; - var foo_3 : i32 = -20;)"; - - std::vector semicolon_pos; - for (size_t pos = wgsl_code.find(";", 0); pos != std::string::npos; - pos = wgsl_code.find(";", pos + 1)) { - semicolon_pos.push_back(pos); - } - - // should insert a return true statement after the first semicolon of the - // first function the the WGSL-like string above. - wgsl_code.insert(semicolon_pos[0] + 1, "return true;"); - - std::string expected_wgsl_code = - R"(fn clamp_0acf8f() { - var res: vec2 = clamp(vec2(), vec2(), vec2());return true; - } - @vertex - fn vertex_main() -> @builtin(position) vec4 { - clamp_0acf8f(); - var foo_1: i32 = 3; - return vec4(); - } - @fragment - fn fragment_main() { - clamp_0acf8f(); - } - @compute @workgroup_size(1) - fn compute_main() { - var foo: f32 = 0.0; - var foo_2: i32 = 10; - clamp_0acf8f(); - } - foo_1 = 5 + 7; - var foo_3 : i32 = -20;)"; - - ASSERT_EQ(expected_wgsl_code, wgsl_code); -} - -TEST(TestInsertReturn, TestFunctionPositions) { +TEST(TestInsertReturn, TestFunctionPositions1) { RandomGenerator generator(0); WgslMutatorTest mutator(generator); std::string wgsl_code = @@ -475,8 +414,32 @@ TEST(TestInsertReturn, TestFunctionPositions) { foo_1 = 5 + 7; var foo_3 : i32 = -20;)"; - std::vector function_positions = mutator.GetFunctionBodyPositions(wgsl_code); - std::vector expected_positions = {180, 586}; + std::vector> function_positions = + mutator.GetFunctionBodyPositions(wgsl_code); + std::vector> expected_positions = { + {18, false}, {180, true}, {323, false}, {423, false}, {586, true}}; + ASSERT_EQ(expected_positions, function_positions); +} + +TEST(TestInsertReturn, TestFunctionPositions2) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + std::string wgsl_code = + R"(fn some_loop_body() { +} + +fn f() { + var j : i32; i = (i + 1)) { + some_loop_body(); ((i < 5) && (j < 10)); + for(var i : i32 = 0; + j = (i * 30); + } +} +)"; + + std::vector> function_positions = + mutator.GetFunctionBodyPositions(wgsl_code); + std::vector> expected_positions = {{20, false}, {32, false}}; ASSERT_EQ(expected_positions, function_positions); } @@ -586,5 +549,67 @@ d %= e; } } +TEST(TestInsertBreakOrContinue, TestLoopPositions1) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + std::string wgsl_code = " loop { } loop { } loop { }"; + std::vector loop_positions = mutator.GetLoopBodyPositions(wgsl_code); + std::vector expected_positions = {6, 15, 24}; + ASSERT_EQ(expected_positions, loop_positions); +} + +TEST(TestInsertBreakOrContinue, TestLoopPositions2) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + std::string wgsl_code = R"( loop { } loop +{ } loop { })"; + std::vector loop_positions = mutator.GetLoopBodyPositions(wgsl_code); + std::vector expected_positions = {6, 15, 24}; + ASSERT_EQ(expected_positions, loop_positions); +} + +TEST(TestInsertBreakOrContinue, TestLoopPositions3) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + // This WGSL-like code is not valid, but it suffices to test regex-based matching (which is + // intended to work well on semi-valid code). + std::string wgsl_code = + R"(fn compute_main() { + loop { + var twice: i32 = 2 * i; + i++; + if i == 5 { break; } + loop + { + var twice: i32 = 2 * i; + i++; + while (i < 100) { i++; } + if i == 5 { break; } + } + } + for (a = 0; a < 100; a++) { + if (a > 50) { + break; + } + while (i < 100) { i++; } + } +})"; + + std::vector loop_positions = mutator.GetLoopBodyPositions(wgsl_code); + std::vector expected_positions = {27, 108, 173, 249, 310}; + ASSERT_EQ(expected_positions, loop_positions); +} + +TEST(TestInsertBreakOrContinue, TestLoopPositions4) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + // This WGSL-like code is not valid, but it suffices to test regex-based matching (which is + // intended to work well on semi-valid code). + std::string wgsl_code = R"(unifor { } uniform { } sloop { } _loop { } _while { } awhile { } )"; + + std::vector loop_positions = mutator.GetLoopBodyPositions(wgsl_code); + ASSERT_TRUE(loop_positions.empty()); +} + } // namespace } // namespace tint::fuzzers::regex_fuzzer diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc index 773eea61d7..0d2083119d 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc @@ -99,33 +99,98 @@ size_t WgslMutator::FindClosingBrace(size_t opening_bracket_pos, const std::stri return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1; } -std::vector WgslMutator::GetFunctionBodyPositions(const std::string& wgsl_code) { +std::vector> WgslMutator::GetFunctionBodyPositions( + const std::string& wgsl_code) { // Finds all the functions with a non-void return value. - std::regex function_regex("fn.*?->.*?\\{"); - std::smatch match; + std::regex function_regex("fn[^a-zA-Z_0-9][^\\{]*\\{"); + std::vector> result; + + auto functions_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), function_regex); + auto functions_end = std::sregex_iterator(); + + for (std::sregex_iterator i = functions_begin; i != functions_end; ++i) { + bool returns_value = i->str().find("->") != std::string::npos; + result.push_back( + {static_cast(i->suffix().first - wgsl_code.cbegin() - 1), returns_value}); + } + return result; +} + +std::vector WgslMutator::GetLoopBodyPositions(const std::string& wgsl_code) { + // Finds all loops. + std::regex loop_regex("[^a-zA-Z_0-9](for|while|loop)[^\\{]*\\{"); std::vector result; - auto search_start(wgsl_code.cbegin()); - std::string prefix = ""; + auto loops_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), loop_regex); + auto loops_end = std::sregex_iterator(); - while (std::regex_search(search_start, wgsl_code.cend(), match, function_regex)) { - result.push_back(static_cast(match.suffix().first - wgsl_code.cbegin() - 1L)); - search_start = match.suffix().first; + for (std::sregex_iterator i = loops_begin; i != loops_end; ++i) { + result.push_back(static_cast(i->suffix().first - wgsl_code.cbegin() - 1)); } return result; } bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) { - std::vector function_body_positions = GetFunctionBodyPositions(wgsl_code); + std::vector> function_body_positions = + GetFunctionBodyPositions(wgsl_code); // No function was found in wgsl_code. if (function_body_positions.empty()) { return false; } - // Pick a random function's opening bracket, find the corresponding closing - // bracket, and find a semi-colon within the function body. - size_t left_bracket_pos = generator_.GetRandomElement(function_body_positions); + // Pick a random function + auto function = generator_.GetRandomElement(function_body_positions); + + // Find the corresponding closing bracket for the function, and find a semi-colon within the + // function body. + size_t left_bracket_pos = function.first; + + size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); + + if (right_bracket_pos == 0) { + return false; + } + + std::vector semicolon_positions; + for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1); pos < right_bracket_pos; + pos = wgsl_code.find(";", pos + 1)) { + semicolon_positions.push_back(pos); + } + + if (semicolon_positions.empty()) { + return false; + } + + std::string return_statement = "return"; + if (function.second) { + // The function returns a value. Get all identifiers and integer literals to use as + // potential return values. + std::vector> identifiers = GetIdentifiers(wgsl_code); + auto return_values = identifiers; + std::vector> int_literals = GetIntLiterals(wgsl_code); + return_values.insert(return_values.end(), int_literals.begin(), int_literals.end()); + std::pair return_value = generator_.GetRandomElement(return_values); + return_statement += " " + wgsl_code.substr(return_value.first, return_value.second); + } + return_statement += ";"; + + // Insert the return statement immediately after the semicolon. + wgsl_code.insert(generator_.GetRandomElement(semicolon_positions) + 1, return_statement); + return true; +} + +bool WgslMutator::InsertBreakOrContinue(std::string& wgsl_code) { + std::vector loop_body_positions = GetLoopBodyPositions(wgsl_code); + + // No loop was found in wgsl_code. + if (loop_body_positions.empty()) { + return false; + } + + // Pick a random loop's opening bracket, find the corresponding closing + // bracket, and find a semi-colon within the loop body. + size_t left_bracket_pos = generator_.GetRandomElement(loop_body_positions); size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); @@ -145,17 +210,8 @@ bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) { size_t semicolon_position = generator_.GetRandomElement(semicolon_positions); - // Get all identifiers and integer literals to use as potential return values. - std::vector> identifiers = GetIdentifiers(wgsl_code); - auto return_values = identifiers; - std::vector> int_literals = GetIntLiterals(wgsl_code); - return_values.insert(return_values.end(), int_literals.begin(), int_literals.end()); - std::pair return_value = generator_.GetRandomElement(return_values); - std::string return_statement = - "return " + wgsl_code.substr(return_value.first, return_value.second) + ";"; - - // Insert the return statement immediately after the semicolon. - wgsl_code.insert(semicolon_position + 1, return_statement); + // Insert a break or continue immediately after the semicolon. + wgsl_code.insert(semicolon_position + 1, generator_.GetBool() ? "break;" : "continue;"); return true; } diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h index c5ae900aaa..5308bf84a5 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h +++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h @@ -73,6 +73,11 @@ class WgslMutator { /// @return true if the mutation was succesful or false otherwise. bool InsertReturnStatement(std::string& wgsl_code); + /// Inserts a break or continue statement in a randomly chosen loop of a WGSL-like string. + /// @param wgsl_code - WGSL-like string that will be mutated. + /// @return true if the mutation was succesful or false otherwise. + bool InsertBreakOrContinue(std::string& wgsl_code); + /// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one /// randomly-chosen operator in the original string with another operator. /// @param wgsl_code - the initial WGSL-like string that will be mutated. @@ -103,14 +108,19 @@ class WgslMutator { /// brace. size_t FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code); - /// Returns the starting_position of the bodies of the functions - /// that follow the regular expression: fn.*?->.*?\\{, which searches for the - /// keyword fn followed by the function name, its return type and opening brace. + /// Returns the starting position of the bodies of the functions identified by an appropriate + /// function, together with a boolean indicating whether the function returns a value or not. /// @param wgsl_code - the WGSL-like string where the functions will be /// searched. - /// @return a vector with the starting position of the function bodies in - /// wgsl_code. - std::vector GetFunctionBodyPositions(const std::string& wgsl_code); + /// @return a vector of pairs, where each pair provides the starting position of the function + /// body, and the value true if and only if the function returns a value. + std::vector> GetFunctionBodyPositions(const std::string& wgsl_code); + + /// Returns the starting position of the bodies of the loops identified by an appropriate + /// regular expressions. + /// @param wgsl_code - the WGSL-like string in which loops will be searched for. + /// @return a vector with the starting position of the loop bodies in wgsl_code. + std::vector GetLoopBodyPositions(const std::string& wgsl_code); /// A function that finds all the identifiers in a WGSL-like string. /// @param wgsl_code - the WGSL-like string where the identifiers will be found.